{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 73136, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.3673250336703789e-05, "grad_norm": 9.694295883178711, "learning_rate": 3.4183359540575654e-08, "loss": 1.3352, "step": 1 }, { "epoch": 0.0001367325033670379, "grad_norm": 11.559527397155762, "learning_rate": 3.4183359540575647e-07, "loss": 1.0616, "step": 10 }, { "epoch": 0.0002734650067340758, "grad_norm": 5.545447826385498, "learning_rate": 6.836671908115129e-07, "loss": 1.1093, "step": 20 }, { "epoch": 0.0004101975101011137, "grad_norm": 8.48753833770752, "learning_rate": 1.0255007862172695e-06, "loss": 0.914, "step": 30 }, { "epoch": 0.0005469300134681516, "grad_norm": 23.72676658630371, "learning_rate": 1.3673343816230259e-06, "loss": 0.9962, "step": 40 }, { "epoch": 0.0006836625168351895, "grad_norm": 4.419444561004639, "learning_rate": 1.7091679770287825e-06, "loss": 1.0591, "step": 50 }, { "epoch": 0.0008203950202022274, "grad_norm": 13.655407905578613, "learning_rate": 2.051001572434539e-06, "loss": 0.9717, "step": 60 }, { "epoch": 0.0009571275235692653, "grad_norm": 7.209079265594482, "learning_rate": 2.392835167840295e-06, "loss": 1.2682, "step": 70 }, { "epoch": 0.0010938600269363031, "grad_norm": 5.900626182556152, "learning_rate": 2.7346687632460518e-06, "loss": 0.8418, "step": 80 }, { "epoch": 0.0012305925303033411, "grad_norm": 4.204265117645264, "learning_rate": 3.0765023586518083e-06, "loss": 1.0691, "step": 90 }, { "epoch": 0.001367325033670379, "grad_norm": 9.878252029418945, "learning_rate": 3.418335954057565e-06, "loss": 1.0445, "step": 100 }, { "epoch": 0.0015040575370374168, "grad_norm": 4.808658599853516, "learning_rate": 3.7601695494633215e-06, "loss": 0.919, "step": 110 }, { "epoch": 0.0016407900404044548, "grad_norm": 6.094480991363525, "learning_rate": 4.102003144869078e-06, "loss": 1.2332, "step": 120 }, { "epoch": 0.0017775225437714926, "grad_norm": 4.689969062805176, "learning_rate": 4.443836740274834e-06, "loss": 1.0547, "step": 130 }, { "epoch": 0.0019142550471385306, "grad_norm": 52.52847671508789, "learning_rate": 4.78567033568059e-06, "loss": 0.9076, "step": 140 }, { "epoch": 0.0020509875505055684, "grad_norm": 5.226283550262451, "learning_rate": 5.127503931086347e-06, "loss": 1.0007, "step": 150 }, { "epoch": 0.0021877200538726062, "grad_norm": 5.030527591705322, "learning_rate": 5.4693375264921035e-06, "loss": 0.9791, "step": 160 }, { "epoch": 0.002324452557239644, "grad_norm": 5.361509323120117, "learning_rate": 5.81117112189786e-06, "loss": 0.9412, "step": 170 }, { "epoch": 0.0024611850606066823, "grad_norm": 10.454483032226562, "learning_rate": 6.153004717303617e-06, "loss": 0.7534, "step": 180 }, { "epoch": 0.00259791756397372, "grad_norm": 9.59555721282959, "learning_rate": 6.494838312709373e-06, "loss": 0.921, "step": 190 }, { "epoch": 0.002734650067340758, "grad_norm": 9.032693862915039, "learning_rate": 6.83667190811513e-06, "loss": 1.0309, "step": 200 }, { "epoch": 0.0028713825707077957, "grad_norm": 4.685105323791504, "learning_rate": 7.1785055035208864e-06, "loss": 0.8453, "step": 210 }, { "epoch": 0.0030081150740748335, "grad_norm": 7.7351579666137695, "learning_rate": 7.520339098926643e-06, "loss": 0.9724, "step": 220 }, { "epoch": 0.0031448475774418718, "grad_norm": 4.964475154876709, "learning_rate": 7.8621726943324e-06, "loss": 0.8404, "step": 230 }, { "epoch": 0.0032815800808089096, "grad_norm": 5.649578094482422, "learning_rate": 8.204006289738156e-06, "loss": 1.0173, "step": 240 }, { "epoch": 0.0034183125841759474, "grad_norm": 5.642393589019775, "learning_rate": 8.545839885143913e-06, "loss": 0.8621, "step": 250 }, { "epoch": 0.003555045087542985, "grad_norm": 6.613762378692627, "learning_rate": 8.887673480549668e-06, "loss": 0.9501, "step": 260 }, { "epoch": 0.003691777590910023, "grad_norm": 5.375977039337158, "learning_rate": 9.229507075955426e-06, "loss": 1.0438, "step": 270 }, { "epoch": 0.0038285100942770612, "grad_norm": 3.7569994926452637, "learning_rate": 9.57134067136118e-06, "loss": 1.023, "step": 280 }, { "epoch": 0.003965242597644099, "grad_norm": 5.494469165802002, "learning_rate": 9.913174266766939e-06, "loss": 1.027, "step": 290 }, { "epoch": 0.004101975101011137, "grad_norm": 9.255108833312988, "learning_rate": 1.0255007862172694e-05, "loss": 0.8007, "step": 300 }, { "epoch": 0.004238707604378175, "grad_norm": 6.401493549346924, "learning_rate": 1.059684145757845e-05, "loss": 0.8474, "step": 310 }, { "epoch": 0.0043754401077452125, "grad_norm": 5.734095096588135, "learning_rate": 1.0938675052984207e-05, "loss": 0.8885, "step": 320 }, { "epoch": 0.00451217261111225, "grad_norm": 5.60014533996582, "learning_rate": 1.1280508648389964e-05, "loss": 0.9895, "step": 330 }, { "epoch": 0.004648905114479288, "grad_norm": 4.845831394195557, "learning_rate": 1.162234224379572e-05, "loss": 0.7525, "step": 340 }, { "epoch": 0.004785637617846327, "grad_norm": 4.947256088256836, "learning_rate": 1.1964175839201477e-05, "loss": 0.9129, "step": 350 }, { "epoch": 0.0049223701212133646, "grad_norm": 12.020007133483887, "learning_rate": 1.2306009434607233e-05, "loss": 0.8474, "step": 360 }, { "epoch": 0.005059102624580402, "grad_norm": 4.869012832641602, "learning_rate": 1.264784303001299e-05, "loss": 0.706, "step": 370 }, { "epoch": 0.00519583512794744, "grad_norm": 3.8115200996398926, "learning_rate": 1.2989676625418747e-05, "loss": 0.9143, "step": 380 }, { "epoch": 0.005332567631314478, "grad_norm": 3.2572340965270996, "learning_rate": 1.3331510220824503e-05, "loss": 0.7388, "step": 390 }, { "epoch": 0.005469300134681516, "grad_norm": 4.922916412353516, "learning_rate": 1.367334381623026e-05, "loss": 0.8262, "step": 400 }, { "epoch": 0.005606032638048554, "grad_norm": 9.531079292297363, "learning_rate": 1.4015177411636016e-05, "loss": 1.0421, "step": 410 }, { "epoch": 0.005742765141415591, "grad_norm": 7.504466533660889, "learning_rate": 1.4357011007041773e-05, "loss": 0.7495, "step": 420 }, { "epoch": 0.005879497644782629, "grad_norm": 4.159280300140381, "learning_rate": 1.469884460244753e-05, "loss": 1.0079, "step": 430 }, { "epoch": 0.006016230148149667, "grad_norm": 4.515174388885498, "learning_rate": 1.5040678197853286e-05, "loss": 0.8838, "step": 440 }, { "epoch": 0.006152962651516706, "grad_norm": 6.748945713043213, "learning_rate": 1.538251179325904e-05, "loss": 1.0346, "step": 450 }, { "epoch": 0.0062896951548837435, "grad_norm": 4.581608295440674, "learning_rate": 1.57243453886648e-05, "loss": 0.9089, "step": 460 }, { "epoch": 0.006426427658250781, "grad_norm": 2.8599305152893066, "learning_rate": 1.6066178984070554e-05, "loss": 0.9516, "step": 470 }, { "epoch": 0.006563160161617819, "grad_norm": 4.3215813636779785, "learning_rate": 1.6408012579476312e-05, "loss": 1.0686, "step": 480 }, { "epoch": 0.006699892664984857, "grad_norm": 4.998146057128906, "learning_rate": 1.6749846174882067e-05, "loss": 1.0, "step": 490 }, { "epoch": 0.006836625168351895, "grad_norm": 5.760498523712158, "learning_rate": 1.7091679770287825e-05, "loss": 1.0113, "step": 500 }, { "epoch": 0.0069733576717189326, "grad_norm": 4.671004772186279, "learning_rate": 1.743351336569358e-05, "loss": 1.0517, "step": 510 }, { "epoch": 0.00711009017508597, "grad_norm": 3.9378950595855713, "learning_rate": 1.7775346961099335e-05, "loss": 0.8858, "step": 520 }, { "epoch": 0.007246822678453008, "grad_norm": 6.21240234375, "learning_rate": 1.8117180556505094e-05, "loss": 0.7746, "step": 530 }, { "epoch": 0.007383555181820046, "grad_norm": 5.673166275024414, "learning_rate": 1.8459014151910852e-05, "loss": 1.255, "step": 540 }, { "epoch": 0.007520287685187085, "grad_norm": 4.38117790222168, "learning_rate": 1.8800847747316607e-05, "loss": 0.8573, "step": 550 }, { "epoch": 0.0076570201885541225, "grad_norm": 3.9247782230377197, "learning_rate": 1.914268134272236e-05, "loss": 0.9522, "step": 560 }, { "epoch": 0.00779375269192116, "grad_norm": 4.149727821350098, "learning_rate": 1.948451493812812e-05, "loss": 1.0712, "step": 570 }, { "epoch": 0.007930485195288198, "grad_norm": 5.256784915924072, "learning_rate": 1.9826348533533878e-05, "loss": 1.0898, "step": 580 }, { "epoch": 0.008067217698655236, "grad_norm": 4.570008754730225, "learning_rate": 2.0168182128939633e-05, "loss": 0.7491, "step": 590 }, { "epoch": 0.008203950202022274, "grad_norm": 3.4502243995666504, "learning_rate": 2.0510015724345388e-05, "loss": 1.01, "step": 600 }, { "epoch": 0.008340682705389312, "grad_norm": 3.2228071689605713, "learning_rate": 2.0851849319751146e-05, "loss": 1.0231, "step": 610 }, { "epoch": 0.00847741520875635, "grad_norm": 3.131662607192993, "learning_rate": 2.11936829151569e-05, "loss": 0.8085, "step": 620 }, { "epoch": 0.008614147712123387, "grad_norm": 3.7240188121795654, "learning_rate": 2.153551651056266e-05, "loss": 0.9317, "step": 630 }, { "epoch": 0.008750880215490425, "grad_norm": 4.707539081573486, "learning_rate": 2.1877350105968414e-05, "loss": 0.8776, "step": 640 }, { "epoch": 0.008887612718857463, "grad_norm": 4.738656044006348, "learning_rate": 2.2219183701374172e-05, "loss": 0.9642, "step": 650 }, { "epoch": 0.0090243452222245, "grad_norm": 3.23749041557312, "learning_rate": 2.2561017296779927e-05, "loss": 0.8038, "step": 660 }, { "epoch": 0.009161077725591538, "grad_norm": 4.134133815765381, "learning_rate": 2.2902850892185686e-05, "loss": 0.8662, "step": 670 }, { "epoch": 0.009297810228958576, "grad_norm": 3.7903079986572266, "learning_rate": 2.324468448759144e-05, "loss": 0.9063, "step": 680 }, { "epoch": 0.009434542732325614, "grad_norm": 4.590651035308838, "learning_rate": 2.35865180829972e-05, "loss": 0.9969, "step": 690 }, { "epoch": 0.009571275235692654, "grad_norm": 2.9253532886505127, "learning_rate": 2.3928351678402954e-05, "loss": 1.0298, "step": 700 }, { "epoch": 0.009708007739059691, "grad_norm": 3.4453604221343994, "learning_rate": 2.4270185273808712e-05, "loss": 1.032, "step": 710 }, { "epoch": 0.009844740242426729, "grad_norm": 3.5162811279296875, "learning_rate": 2.4612018869214467e-05, "loss": 0.7858, "step": 720 }, { "epoch": 0.009981472745793767, "grad_norm": 4.7231926918029785, "learning_rate": 2.495385246462022e-05, "loss": 0.912, "step": 730 }, { "epoch": 0.010118205249160805, "grad_norm": 3.227121591567993, "learning_rate": 2.529568606002598e-05, "loss": 0.7313, "step": 740 }, { "epoch": 0.010254937752527843, "grad_norm": 6.230903625488281, "learning_rate": 2.5637519655431735e-05, "loss": 0.8251, "step": 750 }, { "epoch": 0.01039167025589488, "grad_norm": 3.0534415245056152, "learning_rate": 2.5979353250837493e-05, "loss": 0.9141, "step": 760 }, { "epoch": 0.010528402759261918, "grad_norm": 8.756896018981934, "learning_rate": 2.6321186846243248e-05, "loss": 1.1149, "step": 770 }, { "epoch": 0.010665135262628956, "grad_norm": 2.8064751625061035, "learning_rate": 2.6663020441649006e-05, "loss": 0.9987, "step": 780 }, { "epoch": 0.010801867765995994, "grad_norm": 4.010786056518555, "learning_rate": 2.700485403705476e-05, "loss": 0.8695, "step": 790 }, { "epoch": 0.010938600269363032, "grad_norm": 3.1748769283294678, "learning_rate": 2.734668763246052e-05, "loss": 0.8231, "step": 800 }, { "epoch": 0.01107533277273007, "grad_norm": 4.38715124130249, "learning_rate": 2.7688521227866274e-05, "loss": 0.7886, "step": 810 }, { "epoch": 0.011212065276097107, "grad_norm": 6.667402267456055, "learning_rate": 2.8030354823272033e-05, "loss": 0.7548, "step": 820 }, { "epoch": 0.011348797779464145, "grad_norm": 3.032961845397949, "learning_rate": 2.8372188418677787e-05, "loss": 0.9601, "step": 830 }, { "epoch": 0.011485530282831183, "grad_norm": 4.855922222137451, "learning_rate": 2.8714022014083546e-05, "loss": 0.9811, "step": 840 }, { "epoch": 0.01162226278619822, "grad_norm": 3.321343421936035, "learning_rate": 2.90558556094893e-05, "loss": 0.8611, "step": 850 }, { "epoch": 0.011758995289565258, "grad_norm": 3.423513650894165, "learning_rate": 2.939768920489506e-05, "loss": 1.1758, "step": 860 }, { "epoch": 0.011895727792932296, "grad_norm": 4.3781418800354, "learning_rate": 2.9739522800300814e-05, "loss": 0.9551, "step": 870 }, { "epoch": 0.012032460296299334, "grad_norm": 3.379232406616211, "learning_rate": 3.0081356395706572e-05, "loss": 0.9075, "step": 880 }, { "epoch": 0.012169192799666372, "grad_norm": 3.8956775665283203, "learning_rate": 3.0423189991112327e-05, "loss": 0.8069, "step": 890 }, { "epoch": 0.012305925303033411, "grad_norm": 2.8801605701446533, "learning_rate": 3.076502358651808e-05, "loss": 0.8271, "step": 900 }, { "epoch": 0.01244265780640045, "grad_norm": 2.2827401161193848, "learning_rate": 3.110685718192384e-05, "loss": 0.7415, "step": 910 }, { "epoch": 0.012579390309767487, "grad_norm": 0.5120655298233032, "learning_rate": 3.14486907773296e-05, "loss": 0.8411, "step": 920 }, { "epoch": 0.012716122813134525, "grad_norm": 5.11733341217041, "learning_rate": 3.179052437273535e-05, "loss": 1.0062, "step": 930 }, { "epoch": 0.012852855316501563, "grad_norm": 2.637390613555908, "learning_rate": 3.213235796814111e-05, "loss": 1.0288, "step": 940 }, { "epoch": 0.0129895878198686, "grad_norm": 3.361844062805176, "learning_rate": 3.2474191563546866e-05, "loss": 0.8552, "step": 950 }, { "epoch": 0.013126320323235638, "grad_norm": 5.237191200256348, "learning_rate": 3.2816025158952625e-05, "loss": 0.8473, "step": 960 }, { "epoch": 0.013263052826602676, "grad_norm": 3.4231793880462646, "learning_rate": 3.315785875435838e-05, "loss": 0.8412, "step": 970 }, { "epoch": 0.013399785329969714, "grad_norm": 5.183591365814209, "learning_rate": 3.3499692349764134e-05, "loss": 0.9325, "step": 980 }, { "epoch": 0.013536517833336752, "grad_norm": 3.745784282684326, "learning_rate": 3.384152594516989e-05, "loss": 1.1441, "step": 990 }, { "epoch": 0.01367325033670379, "grad_norm": 2.155466318130493, "learning_rate": 3.418335954057565e-05, "loss": 0.6249, "step": 1000 }, { "epoch": 0.013809982840070827, "grad_norm": 4.662752628326416, "learning_rate": 3.45251931359814e-05, "loss": 1.2036, "step": 1010 }, { "epoch": 0.013946715343437865, "grad_norm": 3.0902466773986816, "learning_rate": 3.486702673138716e-05, "loss": 0.7142, "step": 1020 }, { "epoch": 0.014083447846804903, "grad_norm": 3.570683717727661, "learning_rate": 3.520886032679292e-05, "loss": 0.7851, "step": 1030 }, { "epoch": 0.01422018035017194, "grad_norm": 4.021545886993408, "learning_rate": 3.555069392219867e-05, "loss": 0.6826, "step": 1040 }, { "epoch": 0.014356912853538979, "grad_norm": 2.8438291549682617, "learning_rate": 3.5892527517604436e-05, "loss": 0.8725, "step": 1050 }, { "epoch": 0.014493645356906016, "grad_norm": 1.9080809354782104, "learning_rate": 3.623436111301019e-05, "loss": 0.711, "step": 1060 }, { "epoch": 0.014630377860273054, "grad_norm": 2.709104537963867, "learning_rate": 3.6576194708415945e-05, "loss": 0.8277, "step": 1070 }, { "epoch": 0.014767110363640092, "grad_norm": 3.4759674072265625, "learning_rate": 3.6918028303821704e-05, "loss": 0.9775, "step": 1080 }, { "epoch": 0.01490384286700713, "grad_norm": 4.664019584655762, "learning_rate": 3.7259861899227455e-05, "loss": 0.9759, "step": 1090 }, { "epoch": 0.01504057537037417, "grad_norm": 5.330552577972412, "learning_rate": 3.760169549463321e-05, "loss": 0.7657, "step": 1100 }, { "epoch": 0.015177307873741207, "grad_norm": 2.670328378677368, "learning_rate": 3.7943529090038965e-05, "loss": 1.1334, "step": 1110 }, { "epoch": 0.015314040377108245, "grad_norm": 2.2308359146118164, "learning_rate": 3.828536268544472e-05, "loss": 0.8049, "step": 1120 }, { "epoch": 0.015450772880475283, "grad_norm": 3.642517566680908, "learning_rate": 3.862719628085049e-05, "loss": 0.8012, "step": 1130 }, { "epoch": 0.01558750538384232, "grad_norm": 3.622128486633301, "learning_rate": 3.896902987625624e-05, "loss": 0.6279, "step": 1140 }, { "epoch": 0.015724237887209357, "grad_norm": 3.461625576019287, "learning_rate": 3.9310863471662e-05, "loss": 0.8022, "step": 1150 }, { "epoch": 0.015860970390576396, "grad_norm": 2.9875833988189697, "learning_rate": 3.9652697067067756e-05, "loss": 1.017, "step": 1160 }, { "epoch": 0.015997702893943432, "grad_norm": 3.020468235015869, "learning_rate": 3.999453066247351e-05, "loss": 1.0606, "step": 1170 }, { "epoch": 0.016134435397310472, "grad_norm": 4.272095680236816, "learning_rate": 4.0336364257879266e-05, "loss": 0.8552, "step": 1180 }, { "epoch": 0.016271167900677508, "grad_norm": 3.8980417251586914, "learning_rate": 4.067819785328502e-05, "loss": 1.0557, "step": 1190 }, { "epoch": 0.016407900404044547, "grad_norm": 2.8262500762939453, "learning_rate": 4.1020031448690776e-05, "loss": 0.8951, "step": 1200 }, { "epoch": 0.016544632907411587, "grad_norm": 2.3320205211639404, "learning_rate": 4.136186504409654e-05, "loss": 0.851, "step": 1210 }, { "epoch": 0.016681365410778623, "grad_norm": 4.557109355926514, "learning_rate": 4.170369863950229e-05, "loss": 1.0647, "step": 1220 }, { "epoch": 0.016818097914145663, "grad_norm": 5.391395092010498, "learning_rate": 4.204553223490805e-05, "loss": 0.963, "step": 1230 }, { "epoch": 0.0169548304175127, "grad_norm": 3.120253324508667, "learning_rate": 4.23873658303138e-05, "loss": 1.1931, "step": 1240 }, { "epoch": 0.017091562920879738, "grad_norm": 2.5931496620178223, "learning_rate": 4.272919942571956e-05, "loss": 0.8026, "step": 1250 }, { "epoch": 0.017228295424246774, "grad_norm": 3.2731289863586426, "learning_rate": 4.307103302112532e-05, "loss": 0.9741, "step": 1260 }, { "epoch": 0.017365027927613814, "grad_norm": 2.7755088806152344, "learning_rate": 4.341286661653107e-05, "loss": 0.8032, "step": 1270 }, { "epoch": 0.01750176043098085, "grad_norm": 3.536123037338257, "learning_rate": 4.375470021193683e-05, "loss": 0.8989, "step": 1280 }, { "epoch": 0.01763849293434789, "grad_norm": 4.073611259460449, "learning_rate": 4.4096533807342587e-05, "loss": 0.9594, "step": 1290 }, { "epoch": 0.017775225437714925, "grad_norm": 3.8742289543151855, "learning_rate": 4.4438367402748345e-05, "loss": 0.7068, "step": 1300 }, { "epoch": 0.017911957941081965, "grad_norm": 3.5143680572509766, "learning_rate": 4.47802009981541e-05, "loss": 0.8975, "step": 1310 }, { "epoch": 0.018048690444449, "grad_norm": 2.5022194385528564, "learning_rate": 4.5122034593559855e-05, "loss": 0.8994, "step": 1320 }, { "epoch": 0.01818542294781604, "grad_norm": 3.326460123062134, "learning_rate": 4.546386818896561e-05, "loss": 0.7402, "step": 1330 }, { "epoch": 0.018322155451183077, "grad_norm": 3.0820486545562744, "learning_rate": 4.580570178437137e-05, "loss": 0.9027, "step": 1340 }, { "epoch": 0.018458887954550116, "grad_norm": 3.3535289764404297, "learning_rate": 4.614753537977712e-05, "loss": 0.9894, "step": 1350 }, { "epoch": 0.018595620457917152, "grad_norm": 4.091365814208984, "learning_rate": 4.648936897518288e-05, "loss": 0.9073, "step": 1360 }, { "epoch": 0.018732352961284192, "grad_norm": 3.0008535385131836, "learning_rate": 4.683120257058863e-05, "loss": 1.0915, "step": 1370 }, { "epoch": 0.018869085464651228, "grad_norm": 3.124408006668091, "learning_rate": 4.71730361659944e-05, "loss": 0.9082, "step": 1380 }, { "epoch": 0.019005817968018267, "grad_norm": 1.4484549760818481, "learning_rate": 4.7514869761400156e-05, "loss": 0.739, "step": 1390 }, { "epoch": 0.019142550471385307, "grad_norm": 4.668883323669434, "learning_rate": 4.785670335680591e-05, "loss": 1.011, "step": 1400 }, { "epoch": 0.019279282974752343, "grad_norm": 3.568807363510132, "learning_rate": 4.8198536952211666e-05, "loss": 0.7398, "step": 1410 }, { "epoch": 0.019416015478119383, "grad_norm": 4.393196105957031, "learning_rate": 4.8540370547617424e-05, "loss": 0.8312, "step": 1420 }, { "epoch": 0.01955274798148642, "grad_norm": 3.3541808128356934, "learning_rate": 4.8882204143023175e-05, "loss": 0.795, "step": 1430 }, { "epoch": 0.019689480484853458, "grad_norm": 4.325143337249756, "learning_rate": 4.9224037738428934e-05, "loss": 0.7324, "step": 1440 }, { "epoch": 0.019826212988220494, "grad_norm": 3.564704656600952, "learning_rate": 4.9565871333834685e-05, "loss": 0.8253, "step": 1450 }, { "epoch": 0.019962945491587534, "grad_norm": 3.6482417583465576, "learning_rate": 4.990770492924044e-05, "loss": 0.8875, "step": 1460 }, { "epoch": 0.02009967799495457, "grad_norm": 3.0319011211395264, "learning_rate": 5.024953852464621e-05, "loss": 0.9214, "step": 1470 }, { "epoch": 0.02023641049832161, "grad_norm": 3.8827993869781494, "learning_rate": 5.059137212005196e-05, "loss": 0.884, "step": 1480 }, { "epoch": 0.020373143001688646, "grad_norm": 9.602070808410645, "learning_rate": 5.093320571545772e-05, "loss": 0.7677, "step": 1490 }, { "epoch": 0.020509875505055685, "grad_norm": 3.3309290409088135, "learning_rate": 5.127503931086347e-05, "loss": 0.8019, "step": 1500 }, { "epoch": 0.02064660800842272, "grad_norm": 2.3893401622772217, "learning_rate": 5.161687290626923e-05, "loss": 0.8469, "step": 1510 }, { "epoch": 0.02078334051178976, "grad_norm": 3.4387354850769043, "learning_rate": 5.1958706501674986e-05, "loss": 0.7899, "step": 1520 }, { "epoch": 0.020920073015156797, "grad_norm": 3.0368776321411133, "learning_rate": 5.230054009708074e-05, "loss": 0.9184, "step": 1530 }, { "epoch": 0.021056805518523836, "grad_norm": 3.9609155654907227, "learning_rate": 5.2642373692486496e-05, "loss": 0.9432, "step": 1540 }, { "epoch": 0.021193538021890872, "grad_norm": 2.7783684730529785, "learning_rate": 5.298420728789226e-05, "loss": 0.7664, "step": 1550 }, { "epoch": 0.021330270525257912, "grad_norm": 2.3380637168884277, "learning_rate": 5.332604088329801e-05, "loss": 1.0013, "step": 1560 }, { "epoch": 0.021467003028624948, "grad_norm": 2.994774341583252, "learning_rate": 5.366787447870377e-05, "loss": 0.9641, "step": 1570 }, { "epoch": 0.021603735531991988, "grad_norm": 2.269829273223877, "learning_rate": 5.400970807410952e-05, "loss": 0.8506, "step": 1580 }, { "epoch": 0.021740468035359024, "grad_norm": 3.9540953636169434, "learning_rate": 5.435154166951528e-05, "loss": 0.9421, "step": 1590 }, { "epoch": 0.021877200538726063, "grad_norm": 3.238497495651245, "learning_rate": 5.469337526492104e-05, "loss": 0.744, "step": 1600 }, { "epoch": 0.022013933042093103, "grad_norm": 4.58988094329834, "learning_rate": 5.503520886032679e-05, "loss": 0.7832, "step": 1610 }, { "epoch": 0.02215066554546014, "grad_norm": 2.803022861480713, "learning_rate": 5.537704245573255e-05, "loss": 0.7535, "step": 1620 }, { "epoch": 0.02228739804882718, "grad_norm": 2.925105094909668, "learning_rate": 5.571887605113831e-05, "loss": 0.9669, "step": 1630 }, { "epoch": 0.022424130552194214, "grad_norm": 7.0351643562316895, "learning_rate": 5.6060709646544065e-05, "loss": 0.933, "step": 1640 }, { "epoch": 0.022560863055561254, "grad_norm": 2.9068000316619873, "learning_rate": 5.640254324194982e-05, "loss": 0.9268, "step": 1650 }, { "epoch": 0.02269759555892829, "grad_norm": 3.7103493213653564, "learning_rate": 5.6744376837355575e-05, "loss": 0.9116, "step": 1660 }, { "epoch": 0.02283432806229533, "grad_norm": 2.939399242401123, "learning_rate": 5.708621043276133e-05, "loss": 0.7883, "step": 1670 }, { "epoch": 0.022971060565662366, "grad_norm": 3.1652276515960693, "learning_rate": 5.742804402816709e-05, "loss": 0.7928, "step": 1680 }, { "epoch": 0.023107793069029405, "grad_norm": 3.3316454887390137, "learning_rate": 5.776987762357284e-05, "loss": 0.9024, "step": 1690 }, { "epoch": 0.02324452557239644, "grad_norm": 3.0838494300842285, "learning_rate": 5.81117112189786e-05, "loss": 0.9066, "step": 1700 }, { "epoch": 0.02338125807576348, "grad_norm": 4.100976467132568, "learning_rate": 5.845354481438435e-05, "loss": 1.0308, "step": 1710 }, { "epoch": 0.023517990579130517, "grad_norm": 6.007636547088623, "learning_rate": 5.879537840979012e-05, "loss": 0.9718, "step": 1720 }, { "epoch": 0.023654723082497556, "grad_norm": 3.6542701721191406, "learning_rate": 5.9137212005195876e-05, "loss": 1.0846, "step": 1730 }, { "epoch": 0.023791455585864592, "grad_norm": 4.79132080078125, "learning_rate": 5.947904560060163e-05, "loss": 0.8708, "step": 1740 }, { "epoch": 0.023928188089231632, "grad_norm": 2.8893685340881348, "learning_rate": 5.9820879196007386e-05, "loss": 0.8877, "step": 1750 }, { "epoch": 0.024064920592598668, "grad_norm": 4.339055061340332, "learning_rate": 6.0162712791413144e-05, "loss": 1.0987, "step": 1760 }, { "epoch": 0.024201653095965708, "grad_norm": 3.254171371459961, "learning_rate": 6.0504546386818896e-05, "loss": 0.9617, "step": 1770 }, { "epoch": 0.024338385599332744, "grad_norm": 3.6374218463897705, "learning_rate": 6.0846379982224654e-05, "loss": 0.9043, "step": 1780 }, { "epoch": 0.024475118102699783, "grad_norm": 3.360774040222168, "learning_rate": 6.11882135776304e-05, "loss": 0.9064, "step": 1790 }, { "epoch": 0.024611850606066823, "grad_norm": 3.9458749294281006, "learning_rate": 6.153004717303616e-05, "loss": 0.766, "step": 1800 }, { "epoch": 0.02474858310943386, "grad_norm": 3.6589207649230957, "learning_rate": 6.187188076844192e-05, "loss": 0.9645, "step": 1810 }, { "epoch": 0.0248853156128009, "grad_norm": 2.555492877960205, "learning_rate": 6.221371436384768e-05, "loss": 0.845, "step": 1820 }, { "epoch": 0.025022048116167935, "grad_norm": 5.603726387023926, "learning_rate": 6.255554795925342e-05, "loss": 0.861, "step": 1830 }, { "epoch": 0.025158780619534974, "grad_norm": 4.148266315460205, "learning_rate": 6.28973815546592e-05, "loss": 0.7941, "step": 1840 }, { "epoch": 0.02529551312290201, "grad_norm": 3.7512876987457275, "learning_rate": 6.323921515006495e-05, "loss": 1.0372, "step": 1850 }, { "epoch": 0.02543224562626905, "grad_norm": 3.080998420715332, "learning_rate": 6.35810487454707e-05, "loss": 1.0122, "step": 1860 }, { "epoch": 0.025568978129636086, "grad_norm": 4.577815532684326, "learning_rate": 6.392288234087647e-05, "loss": 0.9078, "step": 1870 }, { "epoch": 0.025705710633003125, "grad_norm": 3.0601418018341064, "learning_rate": 6.426471593628222e-05, "loss": 0.6931, "step": 1880 }, { "epoch": 0.02584244313637016, "grad_norm": 2.478607177734375, "learning_rate": 6.460654953168797e-05, "loss": 0.8448, "step": 1890 }, { "epoch": 0.0259791756397372, "grad_norm": 2.637928009033203, "learning_rate": 6.494838312709373e-05, "loss": 0.9581, "step": 1900 }, { "epoch": 0.026115908143104237, "grad_norm": 2.681654930114746, "learning_rate": 6.529021672249949e-05, "loss": 0.8571, "step": 1910 }, { "epoch": 0.026252640646471277, "grad_norm": 2.741722822189331, "learning_rate": 6.563205031790525e-05, "loss": 1.0346, "step": 1920 }, { "epoch": 0.026389373149838313, "grad_norm": 2.476750612258911, "learning_rate": 6.5973883913311e-05, "loss": 0.9052, "step": 1930 }, { "epoch": 0.026526105653205352, "grad_norm": 2.8570265769958496, "learning_rate": 6.631571750871677e-05, "loss": 0.9119, "step": 1940 }, { "epoch": 0.026662838156572388, "grad_norm": 2.511575222015381, "learning_rate": 6.665755110412251e-05, "loss": 0.9542, "step": 1950 }, { "epoch": 0.026799570659939428, "grad_norm": 4.232297420501709, "learning_rate": 6.699938469952827e-05, "loss": 0.6195, "step": 1960 }, { "epoch": 0.026936303163306464, "grad_norm": 3.6617319583892822, "learning_rate": 6.734121829493403e-05, "loss": 0.9763, "step": 1970 }, { "epoch": 0.027073035666673503, "grad_norm": 2.1814215183258057, "learning_rate": 6.768305189033979e-05, "loss": 0.7925, "step": 1980 }, { "epoch": 0.027209768170040543, "grad_norm": 17.113502502441406, "learning_rate": 6.802488548574553e-05, "loss": 0.8298, "step": 1990 }, { "epoch": 0.02734650067340758, "grad_norm": 3.4960243701934814, "learning_rate": 6.83667190811513e-05, "loss": 0.8721, "step": 2000 }, { "epoch": 0.02748323317677462, "grad_norm": 4.182999134063721, "learning_rate": 6.870855267655706e-05, "loss": 0.911, "step": 2010 }, { "epoch": 0.027619965680141655, "grad_norm": 4.693713665008545, "learning_rate": 6.90503862719628e-05, "loss": 0.7091, "step": 2020 }, { "epoch": 0.027756698183508694, "grad_norm": 2.829740524291992, "learning_rate": 6.939221986736858e-05, "loss": 0.8763, "step": 2030 }, { "epoch": 0.02789343068687573, "grad_norm": 1.0617178678512573, "learning_rate": 6.973405346277432e-05, "loss": 0.8438, "step": 2040 }, { "epoch": 0.02803016319024277, "grad_norm": 4.178346157073975, "learning_rate": 7.007588705818008e-05, "loss": 0.7991, "step": 2050 }, { "epoch": 0.028166895693609806, "grad_norm": 2.2853472232818604, "learning_rate": 7.041772065358584e-05, "loss": 0.8282, "step": 2060 }, { "epoch": 0.028303628196976845, "grad_norm": 3.688190221786499, "learning_rate": 7.07595542489916e-05, "loss": 0.8754, "step": 2070 }, { "epoch": 0.02844036070034388, "grad_norm": 2.6273787021636963, "learning_rate": 7.110138784439734e-05, "loss": 1.0127, "step": 2080 }, { "epoch": 0.02857709320371092, "grad_norm": 2.569284439086914, "learning_rate": 7.14432214398031e-05, "loss": 0.8463, "step": 2090 }, { "epoch": 0.028713825707077957, "grad_norm": 6.572686672210693, "learning_rate": 7.178505503520887e-05, "loss": 1.0443, "step": 2100 }, { "epoch": 0.028850558210444997, "grad_norm": 4.140763759613037, "learning_rate": 7.212688863061462e-05, "loss": 1.0043, "step": 2110 }, { "epoch": 0.028987290713812033, "grad_norm": 3.1951944828033447, "learning_rate": 7.246872222602037e-05, "loss": 1.1079, "step": 2120 }, { "epoch": 0.029124023217179072, "grad_norm": 6.717692852020264, "learning_rate": 7.281055582142613e-05, "loss": 0.634, "step": 2130 }, { "epoch": 0.02926075572054611, "grad_norm": 2.3729910850524902, "learning_rate": 7.315238941683189e-05, "loss": 0.9244, "step": 2140 }, { "epoch": 0.029397488223913148, "grad_norm": 2.570344924926758, "learning_rate": 7.349422301223764e-05, "loss": 1.0899, "step": 2150 }, { "epoch": 0.029534220727280184, "grad_norm": 2.64841628074646, "learning_rate": 7.383605660764341e-05, "loss": 0.7208, "step": 2160 }, { "epoch": 0.029670953230647223, "grad_norm": 2.6356513500213623, "learning_rate": 7.417789020304917e-05, "loss": 0.7456, "step": 2170 }, { "epoch": 0.02980768573401426, "grad_norm": 3.4986305236816406, "learning_rate": 7.451972379845491e-05, "loss": 1.0054, "step": 2180 }, { "epoch": 0.0299444182373813, "grad_norm": 2.8755462169647217, "learning_rate": 7.486155739386068e-05, "loss": 1.0023, "step": 2190 }, { "epoch": 0.03008115074074834, "grad_norm": 6.682464599609375, "learning_rate": 7.520339098926643e-05, "loss": 0.9834, "step": 2200 }, { "epoch": 0.030217883244115375, "grad_norm": 3.676694393157959, "learning_rate": 7.554522458467218e-05, "loss": 1.015, "step": 2210 }, { "epoch": 0.030354615747482414, "grad_norm": 2.269314765930176, "learning_rate": 7.588705818007793e-05, "loss": 0.8955, "step": 2220 }, { "epoch": 0.03049134825084945, "grad_norm": 2.956770896911621, "learning_rate": 7.62288917754837e-05, "loss": 0.8956, "step": 2230 }, { "epoch": 0.03062808075421649, "grad_norm": 2.4388020038604736, "learning_rate": 7.657072537088945e-05, "loss": 0.9067, "step": 2240 }, { "epoch": 0.030764813257583526, "grad_norm": 2.436910390853882, "learning_rate": 7.69125589662952e-05, "loss": 0.8968, "step": 2250 }, { "epoch": 0.030901545760950565, "grad_norm": 8.31393051147461, "learning_rate": 7.725439256170098e-05, "loss": 0.8868, "step": 2260 }, { "epoch": 0.0310382782643176, "grad_norm": 4.031277179718018, "learning_rate": 7.759622615710672e-05, "loss": 0.8737, "step": 2270 }, { "epoch": 0.03117501076768464, "grad_norm": 3.9178354740142822, "learning_rate": 7.793805975251248e-05, "loss": 0.9129, "step": 2280 }, { "epoch": 0.03131174327105168, "grad_norm": 4.726451396942139, "learning_rate": 7.827989334791824e-05, "loss": 0.8898, "step": 2290 }, { "epoch": 0.03144847577441871, "grad_norm": 3.656195878982544, "learning_rate": 7.8621726943324e-05, "loss": 0.8819, "step": 2300 }, { "epoch": 0.031585208277785756, "grad_norm": 5.74253511428833, "learning_rate": 7.896356053872974e-05, "loss": 0.8332, "step": 2310 }, { "epoch": 0.03172194078115279, "grad_norm": 3.098468065261841, "learning_rate": 7.930539413413551e-05, "loss": 1.1132, "step": 2320 }, { "epoch": 0.03185867328451983, "grad_norm": 3.3627471923828125, "learning_rate": 7.964722772954126e-05, "loss": 0.8502, "step": 2330 }, { "epoch": 0.031995405787886864, "grad_norm": 3.2187423706054688, "learning_rate": 7.998906132494702e-05, "loss": 0.9649, "step": 2340 }, { "epoch": 0.03213213829125391, "grad_norm": 4.002790927886963, "learning_rate": 8.033089492035277e-05, "loss": 0.8267, "step": 2350 }, { "epoch": 0.032268870794620944, "grad_norm": 5.6300048828125, "learning_rate": 8.067272851575853e-05, "loss": 0.8453, "step": 2360 }, { "epoch": 0.03240560329798798, "grad_norm": 3.4506354331970215, "learning_rate": 8.101456211116429e-05, "loss": 1.0082, "step": 2370 }, { "epoch": 0.032542335801355016, "grad_norm": 5.977406024932861, "learning_rate": 8.135639570657003e-05, "loss": 0.8404, "step": 2380 }, { "epoch": 0.03267906830472206, "grad_norm": 4.943693161010742, "learning_rate": 8.16982293019758e-05, "loss": 1.0212, "step": 2390 }, { "epoch": 0.032815800808089095, "grad_norm": 3.6617629528045654, "learning_rate": 8.204006289738155e-05, "loss": 1.0405, "step": 2400 }, { "epoch": 0.03295253331145613, "grad_norm": 4.7897186279296875, "learning_rate": 8.238189649278731e-05, "loss": 0.896, "step": 2410 }, { "epoch": 0.033089265814823174, "grad_norm": 2.953903913497925, "learning_rate": 8.272373008819308e-05, "loss": 0.8947, "step": 2420 }, { "epoch": 0.03322599831819021, "grad_norm": 4.907362461090088, "learning_rate": 8.306556368359883e-05, "loss": 1.1276, "step": 2430 }, { "epoch": 0.033362730821557246, "grad_norm": 3.7504265308380127, "learning_rate": 8.340739727900458e-05, "loss": 1.0276, "step": 2440 }, { "epoch": 0.03349946332492428, "grad_norm": 2.9375815391540527, "learning_rate": 8.374923087441034e-05, "loss": 1.0157, "step": 2450 }, { "epoch": 0.033636195828291325, "grad_norm": 3.745138168334961, "learning_rate": 8.40910644698161e-05, "loss": 1.1478, "step": 2460 }, { "epoch": 0.03377292833165836, "grad_norm": 4.083057403564453, "learning_rate": 8.443289806522185e-05, "loss": 0.9175, "step": 2470 }, { "epoch": 0.0339096608350254, "grad_norm": 4.2754011154174805, "learning_rate": 8.47747316606276e-05, "loss": 1.0039, "step": 2480 }, { "epoch": 0.03404639333839243, "grad_norm": 3.424111843109131, "learning_rate": 8.511656525603336e-05, "loss": 1.0879, "step": 2490 }, { "epoch": 0.034183125841759476, "grad_norm": 4.6498847007751465, "learning_rate": 8.545839885143912e-05, "loss": 0.9479, "step": 2500 }, { "epoch": 0.03431985834512651, "grad_norm": 3.502425193786621, "learning_rate": 8.580023244684488e-05, "loss": 1.0315, "step": 2510 }, { "epoch": 0.03445659084849355, "grad_norm": 2.840390920639038, "learning_rate": 8.614206604225064e-05, "loss": 0.9051, "step": 2520 }, { "epoch": 0.034593323351860585, "grad_norm": 7.591707229614258, "learning_rate": 8.64838996376564e-05, "loss": 1.094, "step": 2530 }, { "epoch": 0.03473005585522763, "grad_norm": 3.7349178791046143, "learning_rate": 8.682573323306214e-05, "loss": 0.9104, "step": 2540 }, { "epoch": 0.034866788358594664, "grad_norm": 5.130105018615723, "learning_rate": 8.716756682846791e-05, "loss": 1.0765, "step": 2550 }, { "epoch": 0.0350035208619617, "grad_norm": 4.654808044433594, "learning_rate": 8.750940042387366e-05, "loss": 0.9687, "step": 2560 }, { "epoch": 0.035140253365328736, "grad_norm": 3.61781907081604, "learning_rate": 8.785123401927941e-05, "loss": 0.822, "step": 2570 }, { "epoch": 0.03527698586869578, "grad_norm": 4.492804050445557, "learning_rate": 8.819306761468517e-05, "loss": 0.9356, "step": 2580 }, { "epoch": 0.035413718372062815, "grad_norm": 3.6035079956054688, "learning_rate": 8.853490121009093e-05, "loss": 0.8172, "step": 2590 }, { "epoch": 0.03555045087542985, "grad_norm": 4.015285491943359, "learning_rate": 8.887673480549669e-05, "loss": 1.1392, "step": 2600 }, { "epoch": 0.035687183378796894, "grad_norm": 3.9042186737060547, "learning_rate": 8.921856840090243e-05, "loss": 0.9681, "step": 2610 }, { "epoch": 0.03582391588216393, "grad_norm": 3.382235050201416, "learning_rate": 8.95604019963082e-05, "loss": 0.9674, "step": 2620 }, { "epoch": 0.035960648385530966, "grad_norm": 4.6510467529296875, "learning_rate": 8.990223559171395e-05, "loss": 0.8062, "step": 2630 }, { "epoch": 0.036097380888898, "grad_norm": 4.242761611938477, "learning_rate": 9.024406918711971e-05, "loss": 0.8857, "step": 2640 }, { "epoch": 0.036234113392265045, "grad_norm": 3.0898170471191406, "learning_rate": 9.058590278252547e-05, "loss": 0.7148, "step": 2650 }, { "epoch": 0.03637084589563208, "grad_norm": 2.882920265197754, "learning_rate": 9.092773637793123e-05, "loss": 0.9532, "step": 2660 }, { "epoch": 0.03650757839899912, "grad_norm": 4.780941486358643, "learning_rate": 9.126956997333697e-05, "loss": 0.8658, "step": 2670 }, { "epoch": 0.03664431090236615, "grad_norm": 3.0474753379821777, "learning_rate": 9.161140356874274e-05, "loss": 0.8529, "step": 2680 }, { "epoch": 0.036781043405733196, "grad_norm": 3.6352930068969727, "learning_rate": 9.19532371641485e-05, "loss": 1.0583, "step": 2690 }, { "epoch": 0.03691777590910023, "grad_norm": 3.3680579662323, "learning_rate": 9.229507075955425e-05, "loss": 0.875, "step": 2700 }, { "epoch": 0.03705450841246727, "grad_norm": 2.5361523628234863, "learning_rate": 9.263690435496002e-05, "loss": 1.047, "step": 2710 }, { "epoch": 0.037191240915834305, "grad_norm": 3.5060789585113525, "learning_rate": 9.297873795036576e-05, "loss": 0.685, "step": 2720 }, { "epoch": 0.03732797341920135, "grad_norm": 4.525522232055664, "learning_rate": 9.332057154577152e-05, "loss": 1.0531, "step": 2730 }, { "epoch": 0.037464705922568384, "grad_norm": 7.866311550140381, "learning_rate": 9.366240514117726e-05, "loss": 0.9445, "step": 2740 }, { "epoch": 0.03760143842593542, "grad_norm": 7.903514862060547, "learning_rate": 9.400423873658304e-05, "loss": 1.0686, "step": 2750 }, { "epoch": 0.037738170929302456, "grad_norm": 3.971968650817871, "learning_rate": 9.43460723319888e-05, "loss": 1.1034, "step": 2760 }, { "epoch": 0.0378749034326695, "grad_norm": 3.9011454582214355, "learning_rate": 9.468790592739454e-05, "loss": 1.0553, "step": 2770 }, { "epoch": 0.038011635936036535, "grad_norm": 5.980458736419678, "learning_rate": 9.502973952280031e-05, "loss": 1.0268, "step": 2780 }, { "epoch": 0.03814836843940357, "grad_norm": 3.5851621627807617, "learning_rate": 9.537157311820606e-05, "loss": 0.782, "step": 2790 }, { "epoch": 0.038285100942770614, "grad_norm": 3.6818156242370605, "learning_rate": 9.571340671361181e-05, "loss": 0.9212, "step": 2800 }, { "epoch": 0.03842183344613765, "grad_norm": 4.270318508148193, "learning_rate": 9.605524030901757e-05, "loss": 0.9186, "step": 2810 }, { "epoch": 0.038558565949504686, "grad_norm": 5.639037609100342, "learning_rate": 9.639707390442333e-05, "loss": 0.8825, "step": 2820 }, { "epoch": 0.03869529845287172, "grad_norm": 3.4236645698547363, "learning_rate": 9.673890749982908e-05, "loss": 0.9678, "step": 2830 }, { "epoch": 0.038832030956238765, "grad_norm": 4.343820571899414, "learning_rate": 9.708074109523485e-05, "loss": 0.9782, "step": 2840 }, { "epoch": 0.0389687634596058, "grad_norm": 4.108419418334961, "learning_rate": 9.74225746906406e-05, "loss": 1.0797, "step": 2850 }, { "epoch": 0.03910549596297284, "grad_norm": 4.918668270111084, "learning_rate": 9.776440828604635e-05, "loss": 1.0352, "step": 2860 }, { "epoch": 0.039242228466339873, "grad_norm": 5.305405616760254, "learning_rate": 9.810624188145211e-05, "loss": 0.8408, "step": 2870 }, { "epoch": 0.039378960969706917, "grad_norm": 1.6730798482894897, "learning_rate": 9.844807547685787e-05, "loss": 0.826, "step": 2880 }, { "epoch": 0.03951569347307395, "grad_norm": 3.361149311065674, "learning_rate": 9.878990907226363e-05, "loss": 0.9064, "step": 2890 }, { "epoch": 0.03965242597644099, "grad_norm": 4.295650005340576, "learning_rate": 9.913174266766937e-05, "loss": 0.9878, "step": 2900 }, { "epoch": 0.039789158479808025, "grad_norm": 3.087808847427368, "learning_rate": 9.947357626307514e-05, "loss": 0.9527, "step": 2910 }, { "epoch": 0.03992589098317507, "grad_norm": 5.134415149688721, "learning_rate": 9.981540985848089e-05, "loss": 0.9837, "step": 2920 }, { "epoch": 0.040062623486542104, "grad_norm": 3.430088520050049, "learning_rate": 0.00010015724345388664, "loss": 1.1325, "step": 2930 }, { "epoch": 0.04019935598990914, "grad_norm": 3.9032719135284424, "learning_rate": 0.00010049907704929242, "loss": 0.9129, "step": 2940 }, { "epoch": 0.040336088493276176, "grad_norm": 5.094065189361572, "learning_rate": 0.00010084091064469816, "loss": 1.125, "step": 2950 }, { "epoch": 0.04047282099664322, "grad_norm": 3.235760450363159, "learning_rate": 0.00010118274424010392, "loss": 0.8536, "step": 2960 }, { "epoch": 0.040609553500010255, "grad_norm": 3.2804415225982666, "learning_rate": 0.00010152457783550968, "loss": 1.035, "step": 2970 }, { "epoch": 0.04074628600337729, "grad_norm": 3.320518732070923, "learning_rate": 0.00010186641143091544, "loss": 0.8321, "step": 2980 }, { "epoch": 0.040883018506744334, "grad_norm": 3.916083335876465, "learning_rate": 0.00010220824502632118, "loss": 0.9208, "step": 2990 }, { "epoch": 0.04101975101011137, "grad_norm": 2.229526996612549, "learning_rate": 0.00010255007862172694, "loss": 1.0188, "step": 3000 }, { "epoch": 0.041156483513478406, "grad_norm": 4.492844581604004, "learning_rate": 0.00010289191221713271, "loss": 0.9914, "step": 3010 }, { "epoch": 0.04129321601684544, "grad_norm": 3.2864277362823486, "learning_rate": 0.00010323374581253846, "loss": 0.909, "step": 3020 }, { "epoch": 0.041429948520212485, "grad_norm": 2.835158109664917, "learning_rate": 0.00010357557940794421, "loss": 0.805, "step": 3030 }, { "epoch": 0.04156668102357952, "grad_norm": 3.861030101776123, "learning_rate": 0.00010391741300334997, "loss": 0.845, "step": 3040 }, { "epoch": 0.04170341352694656, "grad_norm": 4.240000247955322, "learning_rate": 0.00010425924659875573, "loss": 1.1071, "step": 3050 }, { "epoch": 0.041840146030313594, "grad_norm": 3.048426389694214, "learning_rate": 0.00010460108019416148, "loss": 0.8698, "step": 3060 }, { "epoch": 0.04197687853368064, "grad_norm": 3.5908970832824707, "learning_rate": 0.00010494291378956725, "loss": 1.1716, "step": 3070 }, { "epoch": 0.04211361103704767, "grad_norm": 3.589881181716919, "learning_rate": 0.00010528474738497299, "loss": 0.7751, "step": 3080 }, { "epoch": 0.04225034354041471, "grad_norm": 3.6888246536254883, "learning_rate": 0.00010562658098037875, "loss": 0.9145, "step": 3090 }, { "epoch": 0.042387076043781745, "grad_norm": 3.1264424324035645, "learning_rate": 0.00010596841457578452, "loss": 0.9209, "step": 3100 }, { "epoch": 0.04252380854714879, "grad_norm": 2.842658758163452, "learning_rate": 0.00010631024817119027, "loss": 0.8518, "step": 3110 }, { "epoch": 0.042660541050515824, "grad_norm": 4.1961846351623535, "learning_rate": 0.00010665208176659602, "loss": 1.0851, "step": 3120 }, { "epoch": 0.04279727355388286, "grad_norm": 3.6262331008911133, "learning_rate": 0.00010699391536200177, "loss": 0.9877, "step": 3130 }, { "epoch": 0.042934006057249896, "grad_norm": 2.409003257751465, "learning_rate": 0.00010733574895740754, "loss": 0.8332, "step": 3140 }, { "epoch": 0.04307073856061694, "grad_norm": 3.858372688293457, "learning_rate": 0.00010767758255281329, "loss": 0.8523, "step": 3150 }, { "epoch": 0.043207471063983975, "grad_norm": 5.644826889038086, "learning_rate": 0.00010801941614821904, "loss": 0.8775, "step": 3160 }, { "epoch": 0.04334420356735101, "grad_norm": 3.0290684700012207, "learning_rate": 0.00010836124974362482, "loss": 0.893, "step": 3170 }, { "epoch": 0.04348093607071805, "grad_norm": 3.4390006065368652, "learning_rate": 0.00010870308333903056, "loss": 0.8695, "step": 3180 }, { "epoch": 0.04361766857408509, "grad_norm": 3.6704788208007812, "learning_rate": 0.00010904491693443632, "loss": 0.8999, "step": 3190 }, { "epoch": 0.043754401077452126, "grad_norm": 3.5894947052001953, "learning_rate": 0.00010938675052984208, "loss": 0.8781, "step": 3200 }, { "epoch": 0.04389113358081916, "grad_norm": 3.613201856613159, "learning_rate": 0.00010972858412524784, "loss": 1.0621, "step": 3210 }, { "epoch": 0.044027866084186205, "grad_norm": 3.2661781311035156, "learning_rate": 0.00011007041772065358, "loss": 0.8235, "step": 3220 }, { "epoch": 0.04416459858755324, "grad_norm": 6.1339545249938965, "learning_rate": 0.00011041225131605935, "loss": 0.9939, "step": 3230 }, { "epoch": 0.04430133109092028, "grad_norm": 4.147660732269287, "learning_rate": 0.0001107540849114651, "loss": 0.9305, "step": 3240 }, { "epoch": 0.044438063594287314, "grad_norm": 3.9536499977111816, "learning_rate": 0.00011109591850687086, "loss": 1.0048, "step": 3250 }, { "epoch": 0.04457479609765436, "grad_norm": 2.4408986568450928, "learning_rate": 0.00011143775210227661, "loss": 0.9371, "step": 3260 }, { "epoch": 0.04471152860102139, "grad_norm": 3.7730438709259033, "learning_rate": 0.00011177958569768237, "loss": 0.8991, "step": 3270 }, { "epoch": 0.04484826110438843, "grad_norm": 3.58587908744812, "learning_rate": 0.00011212141929308813, "loss": 0.8628, "step": 3280 }, { "epoch": 0.044984993607755465, "grad_norm": 4.238520622253418, "learning_rate": 0.00011246325288849387, "loss": 0.9118, "step": 3290 }, { "epoch": 0.04512172611112251, "grad_norm": 4.993622303009033, "learning_rate": 0.00011280508648389965, "loss": 1.0305, "step": 3300 }, { "epoch": 0.045258458614489544, "grad_norm": 3.7452759742736816, "learning_rate": 0.00011314692007930539, "loss": 0.6968, "step": 3310 }, { "epoch": 0.04539519111785658, "grad_norm": 5.787862300872803, "learning_rate": 0.00011348875367471115, "loss": 1.0417, "step": 3320 }, { "epoch": 0.045531923621223616, "grad_norm": 8.301568031311035, "learning_rate": 0.00011383058727011691, "loss": 0.8387, "step": 3330 }, { "epoch": 0.04566865612459066, "grad_norm": 4.045403003692627, "learning_rate": 0.00011417242086552267, "loss": 0.7826, "step": 3340 }, { "epoch": 0.045805388627957695, "grad_norm": 4.566899299621582, "learning_rate": 0.00011451425446092842, "loss": 1.1162, "step": 3350 }, { "epoch": 0.04594212113132473, "grad_norm": 3.9042224884033203, "learning_rate": 0.00011485608805633418, "loss": 1.0069, "step": 3360 }, { "epoch": 0.04607885363469177, "grad_norm": 3.810936689376831, "learning_rate": 0.00011519792165173994, "loss": 0.8774, "step": 3370 }, { "epoch": 0.04621558613805881, "grad_norm": 2.480936050415039, "learning_rate": 0.00011553975524714569, "loss": 1.0466, "step": 3380 }, { "epoch": 0.046352318641425846, "grad_norm": 3.7951600551605225, "learning_rate": 0.00011588158884255146, "loss": 0.817, "step": 3390 }, { "epoch": 0.04648905114479288, "grad_norm": 3.265629291534424, "learning_rate": 0.0001162234224379572, "loss": 0.9415, "step": 3400 }, { "epoch": 0.046625783648159926, "grad_norm": 3.5552120208740234, "learning_rate": 0.00011656525603336296, "loss": 0.8521, "step": 3410 }, { "epoch": 0.04676251615152696, "grad_norm": 4.5192975997924805, "learning_rate": 0.0001169070896287687, "loss": 0.9724, "step": 3420 }, { "epoch": 0.046899248654894, "grad_norm": 4.08444356918335, "learning_rate": 0.00011724892322417448, "loss": 0.9253, "step": 3430 }, { "epoch": 0.047035981158261034, "grad_norm": 3.0139176845550537, "learning_rate": 0.00011759075681958024, "loss": 0.8876, "step": 3440 }, { "epoch": 0.04717271366162808, "grad_norm": 4.672104835510254, "learning_rate": 0.00011793259041498598, "loss": 0.9361, "step": 3450 }, { "epoch": 0.04730944616499511, "grad_norm": 3.578533172607422, "learning_rate": 0.00011827442401039175, "loss": 0.7912, "step": 3460 }, { "epoch": 0.04744617866836215, "grad_norm": 3.6859447956085205, "learning_rate": 0.0001186162576057975, "loss": 1.0688, "step": 3470 }, { "epoch": 0.047582911171729185, "grad_norm": 3.7876367568969727, "learning_rate": 0.00011895809120120325, "loss": 0.7864, "step": 3480 }, { "epoch": 0.04771964367509623, "grad_norm": 3.743321657180786, "learning_rate": 0.00011929992479660901, "loss": 0.8569, "step": 3490 }, { "epoch": 0.047856376178463264, "grad_norm": 4.717001438140869, "learning_rate": 0.00011964175839201477, "loss": 1.1908, "step": 3500 }, { "epoch": 0.0479931086818303, "grad_norm": 3.6356592178344727, "learning_rate": 0.00011998359198742053, "loss": 0.8597, "step": 3510 }, { "epoch": 0.048129841185197336, "grad_norm": 4.139952182769775, "learning_rate": 0.00012032542558282629, "loss": 0.9713, "step": 3520 }, { "epoch": 0.04826657368856438, "grad_norm": 4.472915172576904, "learning_rate": 0.00012066725917823205, "loss": 0.8929, "step": 3530 }, { "epoch": 0.048403306191931415, "grad_norm": 4.642165184020996, "learning_rate": 0.00012100909277363779, "loss": 0.9488, "step": 3540 }, { "epoch": 0.04854003869529845, "grad_norm": 4.880780220031738, "learning_rate": 0.00012135092636904355, "loss": 1.1826, "step": 3550 }, { "epoch": 0.04867677119866549, "grad_norm": 3.0126781463623047, "learning_rate": 0.00012169275996444931, "loss": 0.8112, "step": 3560 }, { "epoch": 0.04881350370203253, "grad_norm": 4.249320030212402, "learning_rate": 0.00012203459355985507, "loss": 1.0507, "step": 3570 }, { "epoch": 0.04895023620539957, "grad_norm": 2.5304675102233887, "learning_rate": 0.0001223764271552608, "loss": 0.8312, "step": 3580 }, { "epoch": 0.0490869687087666, "grad_norm": 3.2925639152526855, "learning_rate": 0.0001227182607506666, "loss": 0.9465, "step": 3590 }, { "epoch": 0.049223701212133646, "grad_norm": 3.4014463424682617, "learning_rate": 0.00012306009434607233, "loss": 1.2469, "step": 3600 }, { "epoch": 0.04936043371550068, "grad_norm": 6.275844573974609, "learning_rate": 0.00012340192794147809, "loss": 0.947, "step": 3610 }, { "epoch": 0.04949716621886772, "grad_norm": 3.9634642601013184, "learning_rate": 0.00012374376153688384, "loss": 0.9228, "step": 3620 }, { "epoch": 0.049633898722234754, "grad_norm": 3.0333993434906006, "learning_rate": 0.0001240855951322896, "loss": 0.7531, "step": 3630 }, { "epoch": 0.0497706312256018, "grad_norm": 4.367456436157227, "learning_rate": 0.00012442742872769536, "loss": 0.8012, "step": 3640 }, { "epoch": 0.04990736372896883, "grad_norm": 5.1423234939575195, "learning_rate": 0.00012476926232310112, "loss": 0.8888, "step": 3650 }, { "epoch": 0.05004409623233587, "grad_norm": 3.4878766536712646, "learning_rate": 0.00012511109591850685, "loss": 1.1054, "step": 3660 }, { "epoch": 0.050180828735702905, "grad_norm": 6.753676891326904, "learning_rate": 0.00012545292951391264, "loss": 1.2519, "step": 3670 }, { "epoch": 0.05031756123906995, "grad_norm": 3.4324069023132324, "learning_rate": 0.0001257947631093184, "loss": 0.8139, "step": 3680 }, { "epoch": 0.050454293742436984, "grad_norm": 5.1391377449035645, "learning_rate": 0.00012613659670472412, "loss": 1.0326, "step": 3690 }, { "epoch": 0.05059102624580402, "grad_norm": 5.3087992668151855, "learning_rate": 0.0001264784303001299, "loss": 1.062, "step": 3700 }, { "epoch": 0.050727758749171056, "grad_norm": 5.963212013244629, "learning_rate": 0.00012682026389553567, "loss": 0.8515, "step": 3710 }, { "epoch": 0.0508644912525381, "grad_norm": 3.578108072280884, "learning_rate": 0.0001271620974909414, "loss": 0.9557, "step": 3720 }, { "epoch": 0.051001223755905135, "grad_norm": 5.250110149383545, "learning_rate": 0.00012750393108634716, "loss": 0.9083, "step": 3730 }, { "epoch": 0.05113795625927217, "grad_norm": 3.604308843612671, "learning_rate": 0.00012784576468175294, "loss": 1.077, "step": 3740 }, { "epoch": 0.05127468876263921, "grad_norm": 3.432690143585205, "learning_rate": 0.00012818759827715867, "loss": 0.7403, "step": 3750 }, { "epoch": 0.05141142126600625, "grad_norm": 6.084092140197754, "learning_rate": 0.00012852943187256443, "loss": 0.9385, "step": 3760 }, { "epoch": 0.05154815376937329, "grad_norm": 3.417497396469116, "learning_rate": 0.00012887126546797022, "loss": 0.8766, "step": 3770 }, { "epoch": 0.05168488627274032, "grad_norm": 4.422177314758301, "learning_rate": 0.00012921309906337595, "loss": 0.7825, "step": 3780 }, { "epoch": 0.051821618776107366, "grad_norm": 2.7452290058135986, "learning_rate": 0.0001295549326587817, "loss": 0.8314, "step": 3790 }, { "epoch": 0.0519583512794744, "grad_norm": 6.4709086418151855, "learning_rate": 0.00012989676625418747, "loss": 1.0349, "step": 3800 }, { "epoch": 0.05209508378284144, "grad_norm": 3.764509677886963, "learning_rate": 0.00013023859984959322, "loss": 0.9571, "step": 3810 }, { "epoch": 0.052231816286208474, "grad_norm": 3.7608580589294434, "learning_rate": 0.00013058043344499898, "loss": 1.0824, "step": 3820 }, { "epoch": 0.05236854878957552, "grad_norm": 4.058236598968506, "learning_rate": 0.00013092226704040474, "loss": 0.9241, "step": 3830 }, { "epoch": 0.05250528129294255, "grad_norm": 3.5375351905822754, "learning_rate": 0.0001312641006358105, "loss": 0.7688, "step": 3840 }, { "epoch": 0.05264201379630959, "grad_norm": 3.4873907566070557, "learning_rate": 0.00013160593423121626, "loss": 0.8585, "step": 3850 }, { "epoch": 0.052778746299676625, "grad_norm": 5.366227149963379, "learning_rate": 0.000131947767826622, "loss": 1.1695, "step": 3860 }, { "epoch": 0.05291547880304367, "grad_norm": 5.144421577453613, "learning_rate": 0.00013228960142202775, "loss": 1.0378, "step": 3870 }, { "epoch": 0.053052211306410704, "grad_norm": 3.6354825496673584, "learning_rate": 0.00013263143501743353, "loss": 0.924, "step": 3880 }, { "epoch": 0.05318894380977774, "grad_norm": 3.6774935722351074, "learning_rate": 0.00013297326861283926, "loss": 1.0544, "step": 3890 }, { "epoch": 0.053325676313144776, "grad_norm": 3.573453187942505, "learning_rate": 0.00013331510220824502, "loss": 0.7796, "step": 3900 }, { "epoch": 0.05346240881651182, "grad_norm": 4.503958702087402, "learning_rate": 0.00013365693580365078, "loss": 0.8478, "step": 3910 }, { "epoch": 0.053599141319878855, "grad_norm": 4.178395748138428, "learning_rate": 0.00013399876939905654, "loss": 0.9386, "step": 3920 }, { "epoch": 0.05373587382324589, "grad_norm": 4.857720375061035, "learning_rate": 0.0001343406029944623, "loss": 0.7718, "step": 3930 }, { "epoch": 0.05387260632661293, "grad_norm": 6.109419822692871, "learning_rate": 0.00013468243658986805, "loss": 1.0238, "step": 3940 }, { "epoch": 0.05400933882997997, "grad_norm": 5.987557411193848, "learning_rate": 0.0001350242701852738, "loss": 1.0316, "step": 3950 }, { "epoch": 0.05414607133334701, "grad_norm": 1.3941224813461304, "learning_rate": 0.00013536610378067957, "loss": 0.6489, "step": 3960 }, { "epoch": 0.05428280383671404, "grad_norm": 3.504652976989746, "learning_rate": 0.00013570793737608533, "loss": 0.9269, "step": 3970 }, { "epoch": 0.054419536340081086, "grad_norm": 3.870225429534912, "learning_rate": 0.00013604977097149106, "loss": 0.9771, "step": 3980 }, { "epoch": 0.05455626884344812, "grad_norm": 2.6900553703308105, "learning_rate": 0.00013639160456689685, "loss": 0.9799, "step": 3990 }, { "epoch": 0.05469300134681516, "grad_norm": 8.098913192749023, "learning_rate": 0.0001367334381623026, "loss": 1.1482, "step": 4000 }, { "epoch": 0.054829733850182194, "grad_norm": 6.59432315826416, "learning_rate": 0.00013707527175770833, "loss": 1.0496, "step": 4010 }, { "epoch": 0.05496646635354924, "grad_norm": 4.693748474121094, "learning_rate": 0.00013741710535311412, "loss": 1.1503, "step": 4020 }, { "epoch": 0.05510319885691627, "grad_norm": 3.3811473846435547, "learning_rate": 0.00013775893894851988, "loss": 0.8037, "step": 4030 }, { "epoch": 0.05523993136028331, "grad_norm": 3.4824564456939697, "learning_rate": 0.0001381007725439256, "loss": 0.8778, "step": 4040 }, { "epoch": 0.055376663863650345, "grad_norm": 5.610710144042969, "learning_rate": 0.00013844260613933137, "loss": 1.0431, "step": 4050 }, { "epoch": 0.05551339636701739, "grad_norm": 4.462490081787109, "learning_rate": 0.00013878443973473715, "loss": 0.9885, "step": 4060 }, { "epoch": 0.055650128870384424, "grad_norm": 4.875885486602783, "learning_rate": 0.00013912627333014288, "loss": 1.1014, "step": 4070 }, { "epoch": 0.05578686137375146, "grad_norm": 5.966736316680908, "learning_rate": 0.00013946810692554864, "loss": 1.1073, "step": 4080 }, { "epoch": 0.055923593877118496, "grad_norm": 5.008415222167969, "learning_rate": 0.00013980994052095443, "loss": 1.0901, "step": 4090 }, { "epoch": 0.05606032638048554, "grad_norm": 3.972790479660034, "learning_rate": 0.00014015177411636016, "loss": 0.7721, "step": 4100 }, { "epoch": 0.056197058883852576, "grad_norm": 7.003477096557617, "learning_rate": 0.00014049360771176592, "loss": 0.958, "step": 4110 }, { "epoch": 0.05633379138721961, "grad_norm": 4.368973731994629, "learning_rate": 0.00014083544130717168, "loss": 0.7867, "step": 4120 }, { "epoch": 0.05647052389058665, "grad_norm": 3.8553426265716553, "learning_rate": 0.00014117727490257743, "loss": 1.0016, "step": 4130 }, { "epoch": 0.05660725639395369, "grad_norm": 3.6231229305267334, "learning_rate": 0.0001415191084979832, "loss": 0.9477, "step": 4140 }, { "epoch": 0.05674398889732073, "grad_norm": 6.716744899749756, "learning_rate": 0.00014186094209338892, "loss": 0.9726, "step": 4150 }, { "epoch": 0.05688072140068776, "grad_norm": 4.203152656555176, "learning_rate": 0.00014220277568879468, "loss": 0.9281, "step": 4160 }, { "epoch": 0.0570174539040548, "grad_norm": 4.154209613800049, "learning_rate": 0.00014254460928420047, "loss": 1.1089, "step": 4170 }, { "epoch": 0.05715418640742184, "grad_norm": 5.625837326049805, "learning_rate": 0.0001428864428796062, "loss": 0.7873, "step": 4180 }, { "epoch": 0.05729091891078888, "grad_norm": 5.117359161376953, "learning_rate": 0.00014322827647501196, "loss": 0.8245, "step": 4190 }, { "epoch": 0.057427651414155914, "grad_norm": 3.7787704467773438, "learning_rate": 0.00014357011007041774, "loss": 0.728, "step": 4200 }, { "epoch": 0.05756438391752296, "grad_norm": 4.626887798309326, "learning_rate": 0.00014391194366582347, "loss": 1.0725, "step": 4210 }, { "epoch": 0.05770111642088999, "grad_norm": 5.2256646156311035, "learning_rate": 0.00014425377726122923, "loss": 1.2256, "step": 4220 }, { "epoch": 0.05783784892425703, "grad_norm": 3.002168655395508, "learning_rate": 0.000144595610856635, "loss": 0.9352, "step": 4230 }, { "epoch": 0.057974581427624065, "grad_norm": 2.800670862197876, "learning_rate": 0.00014493744445204075, "loss": 0.7256, "step": 4240 }, { "epoch": 0.05811131393099111, "grad_norm": 5.81245231628418, "learning_rate": 0.0001452792780474465, "loss": 0.9745, "step": 4250 }, { "epoch": 0.058248046434358144, "grad_norm": 4.481237888336182, "learning_rate": 0.00014562111164285226, "loss": 1.2143, "step": 4260 }, { "epoch": 0.05838477893772518, "grad_norm": 3.1642978191375732, "learning_rate": 0.00014596294523825802, "loss": 1.0694, "step": 4270 }, { "epoch": 0.05852151144109222, "grad_norm": 3.1283721923828125, "learning_rate": 0.00014630477883366378, "loss": 0.7948, "step": 4280 }, { "epoch": 0.05865824394445926, "grad_norm": 4.196324348449707, "learning_rate": 0.00014664661242906954, "loss": 1.1598, "step": 4290 }, { "epoch": 0.058794976447826296, "grad_norm": 5.555298805236816, "learning_rate": 0.00014698844602447527, "loss": 0.9304, "step": 4300 }, { "epoch": 0.05893170895119333, "grad_norm": 5.1728291511535645, "learning_rate": 0.00014733027961988106, "loss": 0.8943, "step": 4310 }, { "epoch": 0.05906844145456037, "grad_norm": 4.199537754058838, "learning_rate": 0.00014767211321528681, "loss": 0.859, "step": 4320 }, { "epoch": 0.05920517395792741, "grad_norm": 4.074863910675049, "learning_rate": 0.00014801394681069255, "loss": 0.9326, "step": 4330 }, { "epoch": 0.05934190646129445, "grad_norm": 4.476974964141846, "learning_rate": 0.00014835578040609833, "loss": 1.0585, "step": 4340 }, { "epoch": 0.05947863896466148, "grad_norm": 3.932835102081299, "learning_rate": 0.0001486976140015041, "loss": 0.942, "step": 4350 }, { "epoch": 0.05961537146802852, "grad_norm": 2.9863619804382324, "learning_rate": 0.00014903944759690982, "loss": 1.0164, "step": 4360 }, { "epoch": 0.05975210397139556, "grad_norm": 3.1972897052764893, "learning_rate": 0.00014938128119231558, "loss": 0.7536, "step": 4370 }, { "epoch": 0.0598888364747626, "grad_norm": 3.4866034984588623, "learning_rate": 0.00014972311478772136, "loss": 0.9471, "step": 4380 }, { "epoch": 0.060025568978129634, "grad_norm": 3.3814477920532227, "learning_rate": 0.0001500649483831271, "loss": 0.9045, "step": 4390 }, { "epoch": 0.06016230148149668, "grad_norm": 4.248346328735352, "learning_rate": 0.00015040678197853285, "loss": 1.1961, "step": 4400 }, { "epoch": 0.06029903398486371, "grad_norm": 3.8553075790405273, "learning_rate": 0.00015074861557393858, "loss": 1.1533, "step": 4410 }, { "epoch": 0.06043576648823075, "grad_norm": 4.739254951477051, "learning_rate": 0.00015109044916934437, "loss": 0.9663, "step": 4420 }, { "epoch": 0.060572498991597785, "grad_norm": 5.022219657897949, "learning_rate": 0.00015143228276475013, "loss": 0.9858, "step": 4430 }, { "epoch": 0.06070923149496483, "grad_norm": 3.393871784210205, "learning_rate": 0.00015177411636015586, "loss": 1.1169, "step": 4440 }, { "epoch": 0.060845963998331865, "grad_norm": 9.675875663757324, "learning_rate": 0.00015211594995556164, "loss": 1.0689, "step": 4450 }, { "epoch": 0.0609826965016989, "grad_norm": 3.5830209255218506, "learning_rate": 0.0001524577835509674, "loss": 1.1008, "step": 4460 }, { "epoch": 0.06111942900506594, "grad_norm": 4.159592151641846, "learning_rate": 0.00015279961714637313, "loss": 1.1772, "step": 4470 }, { "epoch": 0.06125616150843298, "grad_norm": 3.557790994644165, "learning_rate": 0.0001531414507417789, "loss": 1.1308, "step": 4480 }, { "epoch": 0.061392894011800016, "grad_norm": 9.347163200378418, "learning_rate": 0.00015348328433718468, "loss": 0.8832, "step": 4490 }, { "epoch": 0.06152962651516705, "grad_norm": 5.0755534172058105, "learning_rate": 0.0001538251179325904, "loss": 1.0214, "step": 4500 }, { "epoch": 0.06166635901853409, "grad_norm": 3.887949228286743, "learning_rate": 0.00015416695152799617, "loss": 1.2785, "step": 4510 }, { "epoch": 0.06180309152190113, "grad_norm": 5.529502868652344, "learning_rate": 0.00015450878512340195, "loss": 0.9443, "step": 4520 }, { "epoch": 0.06193982402526817, "grad_norm": 5.050822734832764, "learning_rate": 0.00015485061871880768, "loss": 0.9422, "step": 4530 }, { "epoch": 0.0620765565286352, "grad_norm": 4.8009138107299805, "learning_rate": 0.00015519245231421344, "loss": 0.8783, "step": 4540 }, { "epoch": 0.06221328903200224, "grad_norm": 3.8026251792907715, "learning_rate": 0.0001555342859096192, "loss": 1.0595, "step": 4550 }, { "epoch": 0.06235002153536928, "grad_norm": 5.311044692993164, "learning_rate": 0.00015587611950502496, "loss": 1.2445, "step": 4560 }, { "epoch": 0.06248675403873632, "grad_norm": 5.842742443084717, "learning_rate": 0.00015621795310043072, "loss": 1.1262, "step": 4570 }, { "epoch": 0.06262348654210335, "grad_norm": 3.8607418537139893, "learning_rate": 0.00015655978669583648, "loss": 0.9252, "step": 4580 }, { "epoch": 0.0627602190454704, "grad_norm": 4.388648986816406, "learning_rate": 0.0001569016202912422, "loss": 1.0533, "step": 4590 }, { "epoch": 0.06289695154883743, "grad_norm": 4.362020969390869, "learning_rate": 0.000157243453886648, "loss": 1.1809, "step": 4600 }, { "epoch": 0.06303368405220447, "grad_norm": 5.411655426025391, "learning_rate": 0.00015758528748205375, "loss": 0.8572, "step": 4610 }, { "epoch": 0.06317041655557151, "grad_norm": 5.177088260650635, "learning_rate": 0.00015792712107745948, "loss": 1.088, "step": 4620 }, { "epoch": 0.06330714905893854, "grad_norm": 3.823418378829956, "learning_rate": 0.00015826895467286527, "loss": 0.8609, "step": 4630 }, { "epoch": 0.06344388156230558, "grad_norm": 4.607205390930176, "learning_rate": 0.00015861078826827102, "loss": 0.8653, "step": 4640 }, { "epoch": 0.06358061406567263, "grad_norm": 9.808913230895996, "learning_rate": 0.00015895262186367676, "loss": 1.0211, "step": 4650 }, { "epoch": 0.06371734656903966, "grad_norm": 4.119844913482666, "learning_rate": 0.00015929445545908251, "loss": 0.9982, "step": 4660 }, { "epoch": 0.0638540790724067, "grad_norm": 3.124828815460205, "learning_rate": 0.00015963628905448827, "loss": 1.0358, "step": 4670 }, { "epoch": 0.06399081157577373, "grad_norm": 4.773937702178955, "learning_rate": 0.00015997812264989403, "loss": 1.0997, "step": 4680 }, { "epoch": 0.06412754407914077, "grad_norm": 5.870059967041016, "learning_rate": 0.0001603199562452998, "loss": 1.03, "step": 4690 }, { "epoch": 0.06426427658250781, "grad_norm": 5.412544250488281, "learning_rate": 0.00016066178984070555, "loss": 0.9268, "step": 4700 }, { "epoch": 0.06440100908587484, "grad_norm": 4.933790683746338, "learning_rate": 0.0001610036234361113, "loss": 1.0543, "step": 4710 }, { "epoch": 0.06453774158924189, "grad_norm": 5.362005233764648, "learning_rate": 0.00016134545703151706, "loss": 1.1608, "step": 4720 }, { "epoch": 0.06467447409260893, "grad_norm": 4.958373069763184, "learning_rate": 0.0001616872906269228, "loss": 0.9585, "step": 4730 }, { "epoch": 0.06481120659597596, "grad_norm": 7.497626304626465, "learning_rate": 0.00016202912422232858, "loss": 1.0617, "step": 4740 }, { "epoch": 0.064947939099343, "grad_norm": 7.451600551605225, "learning_rate": 0.00016237095781773434, "loss": 0.9901, "step": 4750 }, { "epoch": 0.06508467160271003, "grad_norm": 4.937698841094971, "learning_rate": 0.00016271279141314007, "loss": 1.045, "step": 4760 }, { "epoch": 0.06522140410607707, "grad_norm": 8.644155502319336, "learning_rate": 0.00016305462500854586, "loss": 0.9704, "step": 4770 }, { "epoch": 0.06535813660944412, "grad_norm": 6.893190860748291, "learning_rate": 0.0001633964586039516, "loss": 1.0661, "step": 4780 }, { "epoch": 0.06549486911281115, "grad_norm": 4.90767240524292, "learning_rate": 0.00016373829219935734, "loss": 1.189, "step": 4790 }, { "epoch": 0.06563160161617819, "grad_norm": 4.362288951873779, "learning_rate": 0.0001640801257947631, "loss": 1.1122, "step": 4800 }, { "epoch": 0.06576833411954523, "grad_norm": 5.633723258972168, "learning_rate": 0.0001644219593901689, "loss": 0.9739, "step": 4810 }, { "epoch": 0.06590506662291226, "grad_norm": 3.7552602291107178, "learning_rate": 0.00016476379298557462, "loss": 1.1368, "step": 4820 }, { "epoch": 0.0660417991262793, "grad_norm": 5.267897605895996, "learning_rate": 0.00016510562658098038, "loss": 1.0593, "step": 4830 }, { "epoch": 0.06617853162964635, "grad_norm": 7.950118064880371, "learning_rate": 0.00016544746017638616, "loss": 1.2268, "step": 4840 }, { "epoch": 0.06631526413301338, "grad_norm": 5.144598007202148, "learning_rate": 0.0001657892937717919, "loss": 0.9812, "step": 4850 }, { "epoch": 0.06645199663638042, "grad_norm": 3.1745264530181885, "learning_rate": 0.00016613112736719765, "loss": 1.1867, "step": 4860 }, { "epoch": 0.06658872913974745, "grad_norm": 6.7406325340271, "learning_rate": 0.0001664729609626034, "loss": 0.8968, "step": 4870 }, { "epoch": 0.06672546164311449, "grad_norm": 4.783789157867432, "learning_rate": 0.00016681479455800917, "loss": 1.0064, "step": 4880 }, { "epoch": 0.06686219414648154, "grad_norm": 4.424810409545898, "learning_rate": 0.00016715662815341493, "loss": 1.0976, "step": 4890 }, { "epoch": 0.06699892664984856, "grad_norm": 6.029757976531982, "learning_rate": 0.00016749846174882069, "loss": 0.8588, "step": 4900 }, { "epoch": 0.06713565915321561, "grad_norm": 4.910025119781494, "learning_rate": 0.00016784029534422642, "loss": 0.9351, "step": 4910 }, { "epoch": 0.06727239165658265, "grad_norm": 4.355762958526611, "learning_rate": 0.0001681821289396322, "loss": 0.9468, "step": 4920 }, { "epoch": 0.06740912415994968, "grad_norm": 5.278584957122803, "learning_rate": 0.00016852396253503793, "loss": 1.0525, "step": 4930 }, { "epoch": 0.06754585666331672, "grad_norm": 6.32243013381958, "learning_rate": 0.0001688657961304437, "loss": 1.0991, "step": 4940 }, { "epoch": 0.06768258916668375, "grad_norm": 4.871994495391846, "learning_rate": 0.00016920762972584948, "loss": 0.9708, "step": 4950 }, { "epoch": 0.0678193216700508, "grad_norm": 3.8857767581939697, "learning_rate": 0.0001695494633212552, "loss": 1.0312, "step": 4960 }, { "epoch": 0.06795605417341784, "grad_norm": 3.9820542335510254, "learning_rate": 0.00016989129691666097, "loss": 1.0116, "step": 4970 }, { "epoch": 0.06809278667678487, "grad_norm": 4.048628330230713, "learning_rate": 0.00017023313051206672, "loss": 1.0914, "step": 4980 }, { "epoch": 0.06822951918015191, "grad_norm": 4.524248123168945, "learning_rate": 0.00017057496410747248, "loss": 1.07, "step": 4990 }, { "epoch": 0.06836625168351895, "grad_norm": 6.042446613311768, "learning_rate": 0.00017091679770287824, "loss": 1.0567, "step": 5000 }, { "epoch": 0.06850298418688598, "grad_norm": 6.150750637054443, "learning_rate": 0.000171258631298284, "loss": 0.9685, "step": 5010 }, { "epoch": 0.06863971669025302, "grad_norm": 5.857245922088623, "learning_rate": 0.00017160046489368976, "loss": 0.8917, "step": 5020 }, { "epoch": 0.06877644919362007, "grad_norm": 5.726778984069824, "learning_rate": 0.00017194229848909552, "loss": 1.1118, "step": 5030 }, { "epoch": 0.0689131816969871, "grad_norm": 3.9815194606781006, "learning_rate": 0.00017228413208450127, "loss": 1.073, "step": 5040 }, { "epoch": 0.06904991420035414, "grad_norm": 8.080240249633789, "learning_rate": 0.000172625965679907, "loss": 0.9502, "step": 5050 }, { "epoch": 0.06918664670372117, "grad_norm": 5.5691118240356445, "learning_rate": 0.0001729677992753128, "loss": 1.0183, "step": 5060 }, { "epoch": 0.06932337920708821, "grad_norm": 5.511457443237305, "learning_rate": 0.00017330963287071855, "loss": 0.8584, "step": 5070 }, { "epoch": 0.06946011171045526, "grad_norm": 4.736003398895264, "learning_rate": 0.00017365146646612428, "loss": 1.161, "step": 5080 }, { "epoch": 0.06959684421382228, "grad_norm": 4.273977756500244, "learning_rate": 0.00017399330006153007, "loss": 0.9507, "step": 5090 }, { "epoch": 0.06973357671718933, "grad_norm": 6.457561016082764, "learning_rate": 0.00017433513365693582, "loss": 0.7805, "step": 5100 }, { "epoch": 0.06987030922055637, "grad_norm": 9.155440330505371, "learning_rate": 0.00017467696725234156, "loss": 1.1338, "step": 5110 }, { "epoch": 0.0700070417239234, "grad_norm": 5.350612163543701, "learning_rate": 0.0001750188008477473, "loss": 1.0816, "step": 5120 }, { "epoch": 0.07014377422729044, "grad_norm": 4.966467380523682, "learning_rate": 0.0001753606344431531, "loss": 1.1793, "step": 5130 }, { "epoch": 0.07028050673065747, "grad_norm": 4.0669941902160645, "learning_rate": 0.00017570246803855883, "loss": 0.9315, "step": 5140 }, { "epoch": 0.07041723923402451, "grad_norm": 5.885744571685791, "learning_rate": 0.0001760443016339646, "loss": 1.2303, "step": 5150 }, { "epoch": 0.07055397173739156, "grad_norm": 4.109452724456787, "learning_rate": 0.00017638613522937035, "loss": 1.1458, "step": 5160 }, { "epoch": 0.07069070424075859, "grad_norm": 3.980508327484131, "learning_rate": 0.0001767279688247761, "loss": 1.1616, "step": 5170 }, { "epoch": 0.07082743674412563, "grad_norm": 4.660134315490723, "learning_rate": 0.00017706980242018186, "loss": 0.9214, "step": 5180 }, { "epoch": 0.07096416924749267, "grad_norm": 4.316200256347656, "learning_rate": 0.0001774116360155876, "loss": 1.1315, "step": 5190 }, { "epoch": 0.0711009017508597, "grad_norm": 4.61984920501709, "learning_rate": 0.00017775346961099338, "loss": 1.1113, "step": 5200 }, { "epoch": 0.07123763425422674, "grad_norm": 3.6560347080230713, "learning_rate": 0.00017809530320639914, "loss": 1.003, "step": 5210 }, { "epoch": 0.07137436675759379, "grad_norm": 5.070419788360596, "learning_rate": 0.00017843713680180487, "loss": 0.9466, "step": 5220 }, { "epoch": 0.07151109926096082, "grad_norm": 5.674863815307617, "learning_rate": 0.00017877897039721063, "loss": 0.8894, "step": 5230 }, { "epoch": 0.07164783176432786, "grad_norm": 5.111645698547363, "learning_rate": 0.0001791208039926164, "loss": 1.1373, "step": 5240 }, { "epoch": 0.07178456426769489, "grad_norm": 4.939906597137451, "learning_rate": 0.00017946263758802214, "loss": 1.3193, "step": 5250 }, { "epoch": 0.07192129677106193, "grad_norm": 5.693269729614258, "learning_rate": 0.0001798044711834279, "loss": 0.935, "step": 5260 }, { "epoch": 0.07205802927442898, "grad_norm": 4.974367618560791, "learning_rate": 0.0001801463047788337, "loss": 1.1091, "step": 5270 }, { "epoch": 0.072194761777796, "grad_norm": 7.4310197830200195, "learning_rate": 0.00018048813837423942, "loss": 1.1045, "step": 5280 }, { "epoch": 0.07233149428116305, "grad_norm": 4.284292221069336, "learning_rate": 0.00018082997196964518, "loss": 1.0759, "step": 5290 }, { "epoch": 0.07246822678453009, "grad_norm": 3.987494707107544, "learning_rate": 0.00018117180556505094, "loss": 1.303, "step": 5300 }, { "epoch": 0.07260495928789712, "grad_norm": 4.202698707580566, "learning_rate": 0.0001815136391604567, "loss": 0.9991, "step": 5310 }, { "epoch": 0.07274169179126416, "grad_norm": 2.8291099071502686, "learning_rate": 0.00018185547275586245, "loss": 1.0501, "step": 5320 }, { "epoch": 0.07287842429463119, "grad_norm": 5.761359214782715, "learning_rate": 0.0001821973063512682, "loss": 1.0322, "step": 5330 }, { "epoch": 0.07301515679799823, "grad_norm": 5.651093482971191, "learning_rate": 0.00018253913994667394, "loss": 0.9844, "step": 5340 }, { "epoch": 0.07315188930136528, "grad_norm": 3.970623254776001, "learning_rate": 0.00018288097354207973, "loss": 1.0349, "step": 5350 }, { "epoch": 0.0732886218047323, "grad_norm": 6.001987934112549, "learning_rate": 0.00018322280713748548, "loss": 1.0586, "step": 5360 }, { "epoch": 0.07342535430809935, "grad_norm": 4.898134231567383, "learning_rate": 0.00018356464073289122, "loss": 0.9785, "step": 5370 }, { "epoch": 0.07356208681146639, "grad_norm": 5.776147365570068, "learning_rate": 0.000183906474328297, "loss": 1.2935, "step": 5380 }, { "epoch": 0.07369881931483342, "grad_norm": 6.242198467254639, "learning_rate": 0.00018424830792370276, "loss": 0.8663, "step": 5390 }, { "epoch": 0.07383555181820046, "grad_norm": 4.71022367477417, "learning_rate": 0.0001845901415191085, "loss": 1.1619, "step": 5400 }, { "epoch": 0.07397228432156751, "grad_norm": 3.2922770977020264, "learning_rate": 0.00018493197511451425, "loss": 1.077, "step": 5410 }, { "epoch": 0.07410901682493454, "grad_norm": 6.338130950927734, "learning_rate": 0.00018527380870992003, "loss": 0.9353, "step": 5420 }, { "epoch": 0.07424574932830158, "grad_norm": 5.863500595092773, "learning_rate": 0.00018561564230532577, "loss": 1.2281, "step": 5430 }, { "epoch": 0.07438248183166861, "grad_norm": 6.251711368560791, "learning_rate": 0.00018595747590073152, "loss": 0.9964, "step": 5440 }, { "epoch": 0.07451921433503565, "grad_norm": 4.107809543609619, "learning_rate": 0.0001862993094961373, "loss": 1.1502, "step": 5450 }, { "epoch": 0.0746559468384027, "grad_norm": 4.284097194671631, "learning_rate": 0.00018664114309154304, "loss": 1.1964, "step": 5460 }, { "epoch": 0.07479267934176972, "grad_norm": 3.9874682426452637, "learning_rate": 0.0001869829766869488, "loss": 1.255, "step": 5470 }, { "epoch": 0.07492941184513677, "grad_norm": 8.484665870666504, "learning_rate": 0.00018732481028235453, "loss": 0.8834, "step": 5480 }, { "epoch": 0.07506614434850381, "grad_norm": 5.753522872924805, "learning_rate": 0.00018766664387776032, "loss": 1.07, "step": 5490 }, { "epoch": 0.07520287685187084, "grad_norm": 4.987293720245361, "learning_rate": 0.00018800847747316607, "loss": 0.9986, "step": 5500 }, { "epoch": 0.07533960935523788, "grad_norm": 6.935995578765869, "learning_rate": 0.0001883503110685718, "loss": 1.0986, "step": 5510 }, { "epoch": 0.07547634185860491, "grad_norm": 8.237496376037598, "learning_rate": 0.0001886921446639776, "loss": 1.1852, "step": 5520 }, { "epoch": 0.07561307436197195, "grad_norm": 5.637815475463867, "learning_rate": 0.00018903397825938335, "loss": 1.1298, "step": 5530 }, { "epoch": 0.075749806865339, "grad_norm": 5.31977653503418, "learning_rate": 0.00018937581185478908, "loss": 0.8848, "step": 5540 }, { "epoch": 0.07588653936870603, "grad_norm": 3.146467924118042, "learning_rate": 0.00018971764545019484, "loss": 1.0255, "step": 5550 }, { "epoch": 0.07602327187207307, "grad_norm": 4.9011454582214355, "learning_rate": 0.00019005947904560062, "loss": 1.0387, "step": 5560 }, { "epoch": 0.07616000437544011, "grad_norm": 5.1503005027771, "learning_rate": 0.00019040131264100635, "loss": 1.105, "step": 5570 }, { "epoch": 0.07629673687880714, "grad_norm": 4.061588764190674, "learning_rate": 0.0001907431462364121, "loss": 1.0956, "step": 5580 }, { "epoch": 0.07643346938217419, "grad_norm": 4.172738552093506, "learning_rate": 0.0001910849798318179, "loss": 1.1208, "step": 5590 }, { "epoch": 0.07657020188554123, "grad_norm": 20.793148040771484, "learning_rate": 0.00019142681342722363, "loss": 1.2312, "step": 5600 }, { "epoch": 0.07670693438890826, "grad_norm": 4.729097366333008, "learning_rate": 0.0001917686470226294, "loss": 1.0315, "step": 5610 }, { "epoch": 0.0768436668922753, "grad_norm": 10.117670059204102, "learning_rate": 0.00019211048061803515, "loss": 1.0204, "step": 5620 }, { "epoch": 0.07698039939564233, "grad_norm": 4.869976043701172, "learning_rate": 0.0001924523142134409, "loss": 1.2508, "step": 5630 }, { "epoch": 0.07711713189900937, "grad_norm": 4.904376029968262, "learning_rate": 0.00019279414780884666, "loss": 0.9708, "step": 5640 }, { "epoch": 0.07725386440237642, "grad_norm": 5.766175270080566, "learning_rate": 0.00019313598140425242, "loss": 1.2158, "step": 5650 }, { "epoch": 0.07739059690574344, "grad_norm": 4.544596195220947, "learning_rate": 0.00019347781499965815, "loss": 1.1944, "step": 5660 }, { "epoch": 0.07752732940911049, "grad_norm": 6.994892597198486, "learning_rate": 0.00019381964859506394, "loss": 1.0168, "step": 5670 }, { "epoch": 0.07766406191247753, "grad_norm": 5.89672327041626, "learning_rate": 0.0001941614821904697, "loss": 1.1817, "step": 5680 }, { "epoch": 0.07780079441584456, "grad_norm": 4.911345481872559, "learning_rate": 0.00019450331578587543, "loss": 1.2423, "step": 5690 }, { "epoch": 0.0779375269192116, "grad_norm": 6.177627086639404, "learning_rate": 0.0001948451493812812, "loss": 1.1773, "step": 5700 }, { "epoch": 0.07807425942257863, "grad_norm": 4.48140287399292, "learning_rate": 0.00019518698297668697, "loss": 1.0602, "step": 5710 }, { "epoch": 0.07821099192594567, "grad_norm": 3.531926393508911, "learning_rate": 0.0001955288165720927, "loss": 1.0074, "step": 5720 }, { "epoch": 0.07834772442931272, "grad_norm": 6.138145923614502, "learning_rate": 0.00019587065016749846, "loss": 0.934, "step": 5730 }, { "epoch": 0.07848445693267975, "grad_norm": 8.352402687072754, "learning_rate": 0.00019621248376290422, "loss": 1.0777, "step": 5740 }, { "epoch": 0.07862118943604679, "grad_norm": 5.94435977935791, "learning_rate": 0.00019655431735830998, "loss": 0.9341, "step": 5750 }, { "epoch": 0.07875792193941383, "grad_norm": 5.590919494628906, "learning_rate": 0.00019689615095371573, "loss": 1.1334, "step": 5760 }, { "epoch": 0.07889465444278086, "grad_norm": 4.187180995941162, "learning_rate": 0.0001972379845491215, "loss": 1.0686, "step": 5770 }, { "epoch": 0.0790313869461479, "grad_norm": 4.212770938873291, "learning_rate": 0.00019757981814452725, "loss": 0.9114, "step": 5780 }, { "epoch": 0.07916811944951495, "grad_norm": 5.1608757972717285, "learning_rate": 0.000197921651739933, "loss": 0.8756, "step": 5790 }, { "epoch": 0.07930485195288198, "grad_norm": 6.125673294067383, "learning_rate": 0.00019826348533533874, "loss": 1.0927, "step": 5800 }, { "epoch": 0.07944158445624902, "grad_norm": 3.9610538482666016, "learning_rate": 0.00019860531893074453, "loss": 1.0823, "step": 5810 }, { "epoch": 0.07957831695961605, "grad_norm": 7.8915910720825195, "learning_rate": 0.00019894715252615028, "loss": 1.1207, "step": 5820 }, { "epoch": 0.07971504946298309, "grad_norm": 6.686652183532715, "learning_rate": 0.00019928898612155602, "loss": 1.2051, "step": 5830 }, { "epoch": 0.07985178196635014, "grad_norm": 7.154703617095947, "learning_rate": 0.00019963081971696177, "loss": 1.0102, "step": 5840 }, { "epoch": 0.07998851446971716, "grad_norm": 4.81575345993042, "learning_rate": 0.00019997265331236756, "loss": 1.2304, "step": 5850 }, { "epoch": 0.08012524697308421, "grad_norm": 5.799408912658691, "learning_rate": 0.0002003144869077733, "loss": 1.1637, "step": 5860 }, { "epoch": 0.08026197947645125, "grad_norm": 6.785093784332275, "learning_rate": 0.00020065632050317905, "loss": 1.1955, "step": 5870 }, { "epoch": 0.08039871197981828, "grad_norm": 5.648496150970459, "learning_rate": 0.00020099815409858483, "loss": 0.917, "step": 5880 }, { "epoch": 0.08053544448318532, "grad_norm": 4.822740077972412, "learning_rate": 0.00020133998769399056, "loss": 1.1216, "step": 5890 }, { "epoch": 0.08067217698655235, "grad_norm": 6.293355941772461, "learning_rate": 0.00020168182128939632, "loss": 1.166, "step": 5900 }, { "epoch": 0.0808089094899194, "grad_norm": 5.6594767570495605, "learning_rate": 0.00020202365488480208, "loss": 1.1201, "step": 5910 }, { "epoch": 0.08094564199328644, "grad_norm": 7.598113059997559, "learning_rate": 0.00020236548848020784, "loss": 1.0059, "step": 5920 }, { "epoch": 0.08108237449665347, "grad_norm": 6.7809295654296875, "learning_rate": 0.0002027073220756136, "loss": 1.3248, "step": 5930 }, { "epoch": 0.08121910700002051, "grad_norm": 6.362959384918213, "learning_rate": 0.00020304915567101936, "loss": 1.2682, "step": 5940 }, { "epoch": 0.08135583950338755, "grad_norm": 4.15017032623291, "learning_rate": 0.00020339098926642511, "loss": 1.2128, "step": 5950 }, { "epoch": 0.08149257200675458, "grad_norm": 8.418197631835938, "learning_rate": 0.00020373282286183087, "loss": 1.1163, "step": 5960 }, { "epoch": 0.08162930451012163, "grad_norm": 8.440979957580566, "learning_rate": 0.00020407465645723663, "loss": 1.1871, "step": 5970 }, { "epoch": 0.08176603701348867, "grad_norm": 5.445818901062012, "learning_rate": 0.00020441649005264236, "loss": 1.1282, "step": 5980 }, { "epoch": 0.0819027695168557, "grad_norm": 4.818733215332031, "learning_rate": 0.00020475832364804815, "loss": 0.995, "step": 5990 }, { "epoch": 0.08203950202022274, "grad_norm": 8.445326805114746, "learning_rate": 0.00020510015724345388, "loss": 0.9368, "step": 6000 }, { "epoch": 0.08217623452358977, "grad_norm": 7.617399215698242, "learning_rate": 0.00020544199083885964, "loss": 1.0627, "step": 6010 }, { "epoch": 0.08231296702695681, "grad_norm": 5.030130863189697, "learning_rate": 0.00020578382443426542, "loss": 1.3711, "step": 6020 }, { "epoch": 0.08244969953032386, "grad_norm": 4.175108432769775, "learning_rate": 0.00020612565802967115, "loss": 1.1488, "step": 6030 }, { "epoch": 0.08258643203369088, "grad_norm": 6.395186901092529, "learning_rate": 0.0002064674916250769, "loss": 1.0006, "step": 6040 }, { "epoch": 0.08272316453705793, "grad_norm": 4.461913108825684, "learning_rate": 0.00020680932522048267, "loss": 1.1272, "step": 6050 }, { "epoch": 0.08285989704042497, "grad_norm": 4.175055027008057, "learning_rate": 0.00020715115881588843, "loss": 1.1828, "step": 6060 }, { "epoch": 0.082996629543792, "grad_norm": 7.792077541351318, "learning_rate": 0.00020749299241129419, "loss": 1.1548, "step": 6070 }, { "epoch": 0.08313336204715904, "grad_norm": 7.256392002105713, "learning_rate": 0.00020783482600669994, "loss": 1.2419, "step": 6080 }, { "epoch": 0.08327009455052607, "grad_norm": 7.101800918579102, "learning_rate": 0.00020817665960210568, "loss": 1.0212, "step": 6090 }, { "epoch": 0.08340682705389312, "grad_norm": 5.421491622924805, "learning_rate": 0.00020851849319751146, "loss": 0.8954, "step": 6100 }, { "epoch": 0.08354355955726016, "grad_norm": 8.441722869873047, "learning_rate": 0.00020886032679291722, "loss": 1.0808, "step": 6110 }, { "epoch": 0.08368029206062719, "grad_norm": 4.238038063049316, "learning_rate": 0.00020920216038832295, "loss": 1.3661, "step": 6120 }, { "epoch": 0.08381702456399423, "grad_norm": 6.391550064086914, "learning_rate": 0.00020954399398372874, "loss": 1.0822, "step": 6130 }, { "epoch": 0.08395375706736127, "grad_norm": 5.582723617553711, "learning_rate": 0.0002098858275791345, "loss": 1.1305, "step": 6140 }, { "epoch": 0.0840904895707283, "grad_norm": 5.017193794250488, "learning_rate": 0.00021022766117454023, "loss": 1.2498, "step": 6150 }, { "epoch": 0.08422722207409535, "grad_norm": 6.476848602294922, "learning_rate": 0.00021056949476994598, "loss": 1.061, "step": 6160 }, { "epoch": 0.08436395457746239, "grad_norm": 6.1557841300964355, "learning_rate": 0.00021091132836535177, "loss": 1.0364, "step": 6170 }, { "epoch": 0.08450068708082942, "grad_norm": 7.148631572723389, "learning_rate": 0.0002112531619607575, "loss": 1.2834, "step": 6180 }, { "epoch": 0.08463741958419646, "grad_norm": 4.715476036071777, "learning_rate": 0.00021159499555616326, "loss": 1.2789, "step": 6190 }, { "epoch": 0.08477415208756349, "grad_norm": 6.317337989807129, "learning_rate": 0.00021193682915156904, "loss": 1.0325, "step": 6200 }, { "epoch": 0.08491088459093053, "grad_norm": 5.148715972900391, "learning_rate": 0.00021227866274697478, "loss": 1.0254, "step": 6210 }, { "epoch": 0.08504761709429758, "grad_norm": 4.714263439178467, "learning_rate": 0.00021262049634238053, "loss": 1.1394, "step": 6220 }, { "epoch": 0.0851843495976646, "grad_norm": 6.206027984619141, "learning_rate": 0.0002129623299377863, "loss": 0.9752, "step": 6230 }, { "epoch": 0.08532108210103165, "grad_norm": 5.5668721199035645, "learning_rate": 0.00021330416353319205, "loss": 1.298, "step": 6240 }, { "epoch": 0.08545781460439869, "grad_norm": 5.345678806304932, "learning_rate": 0.0002136459971285978, "loss": 1.1341, "step": 6250 }, { "epoch": 0.08559454710776572, "grad_norm": 6.414546489715576, "learning_rate": 0.00021398783072400354, "loss": 0.8508, "step": 6260 }, { "epoch": 0.08573127961113276, "grad_norm": 7.316380977630615, "learning_rate": 0.00021432966431940932, "loss": 1.2016, "step": 6270 }, { "epoch": 0.08586801211449979, "grad_norm": 6.392462730407715, "learning_rate": 0.00021467149791481508, "loss": 1.0842, "step": 6280 }, { "epoch": 0.08600474461786684, "grad_norm": 5.249095916748047, "learning_rate": 0.00021501333151022081, "loss": 1.106, "step": 6290 }, { "epoch": 0.08614147712123388, "grad_norm": 10.548511505126953, "learning_rate": 0.00021535516510562657, "loss": 1.2782, "step": 6300 }, { "epoch": 0.08627820962460091, "grad_norm": 6.671372413635254, "learning_rate": 0.00021569699870103236, "loss": 1.3429, "step": 6310 }, { "epoch": 0.08641494212796795, "grad_norm": 6.844815254211426, "learning_rate": 0.0002160388322964381, "loss": 1.333, "step": 6320 }, { "epoch": 0.086551674631335, "grad_norm": 4.499391078948975, "learning_rate": 0.00021638066589184385, "loss": 1.1918, "step": 6330 }, { "epoch": 0.08668840713470202, "grad_norm": 4.820998191833496, "learning_rate": 0.00021672249948724963, "loss": 1.0394, "step": 6340 }, { "epoch": 0.08682513963806907, "grad_norm": 6.922667026519775, "learning_rate": 0.00021706433308265536, "loss": 0.959, "step": 6350 }, { "epoch": 0.0869618721414361, "grad_norm": 8.403138160705566, "learning_rate": 0.00021740616667806112, "loss": 1.0589, "step": 6360 }, { "epoch": 0.08709860464480314, "grad_norm": 4.253501892089844, "learning_rate": 0.00021774800027346688, "loss": 1.2424, "step": 6370 }, { "epoch": 0.08723533714817018, "grad_norm": 6.748459815979004, "learning_rate": 0.00021808983386887264, "loss": 1.1841, "step": 6380 }, { "epoch": 0.08737206965153721, "grad_norm": 8.381468772888184, "learning_rate": 0.0002184316674642784, "loss": 1.0409, "step": 6390 }, { "epoch": 0.08750880215490425, "grad_norm": 5.027193069458008, "learning_rate": 0.00021877350105968416, "loss": 1.1305, "step": 6400 }, { "epoch": 0.0876455346582713, "grad_norm": 6.441908359527588, "learning_rate": 0.00021911533465508989, "loss": 1.2001, "step": 6410 }, { "epoch": 0.08778226716163832, "grad_norm": 5.090795993804932, "learning_rate": 0.00021945716825049567, "loss": 1.2069, "step": 6420 }, { "epoch": 0.08791899966500537, "grad_norm": 6.17643404006958, "learning_rate": 0.00021979900184590143, "loss": 1.1053, "step": 6430 }, { "epoch": 0.08805573216837241, "grad_norm": 7.0662102699279785, "learning_rate": 0.00022014083544130716, "loss": 1.0861, "step": 6440 }, { "epoch": 0.08819246467173944, "grad_norm": 6.958805084228516, "learning_rate": 0.00022048266903671295, "loss": 1.2787, "step": 6450 }, { "epoch": 0.08832919717510648, "grad_norm": 6.157546520233154, "learning_rate": 0.0002208245026321187, "loss": 1.1236, "step": 6460 }, { "epoch": 0.08846592967847351, "grad_norm": 7.470794677734375, "learning_rate": 0.00022116633622752444, "loss": 1.0781, "step": 6470 }, { "epoch": 0.08860266218184056, "grad_norm": 6.164478302001953, "learning_rate": 0.0002215081698229302, "loss": 1.1775, "step": 6480 }, { "epoch": 0.0887393946852076, "grad_norm": 6.451532363891602, "learning_rate": 0.00022185000341833598, "loss": 1.0705, "step": 6490 }, { "epoch": 0.08887612718857463, "grad_norm": 7.553691387176514, "learning_rate": 0.0002221918370137417, "loss": 1.0456, "step": 6500 }, { "epoch": 0.08901285969194167, "grad_norm": 6.098942756652832, "learning_rate": 0.00022253367060914747, "loss": 1.2271, "step": 6510 }, { "epoch": 0.08914959219530871, "grad_norm": 4.516818046569824, "learning_rate": 0.00022287550420455323, "loss": 0.8787, "step": 6520 }, { "epoch": 0.08928632469867574, "grad_norm": 7.099648475646973, "learning_rate": 0.00022321733779995899, "loss": 1.0155, "step": 6530 }, { "epoch": 0.08942305720204279, "grad_norm": 7.110953330993652, "learning_rate": 0.00022355917139536474, "loss": 1.452, "step": 6540 }, { "epoch": 0.08955978970540981, "grad_norm": 5.687777519226074, "learning_rate": 0.00022390100499077048, "loss": 1.1829, "step": 6550 }, { "epoch": 0.08969652220877686, "grad_norm": 5.783627033233643, "learning_rate": 0.00022424283858617626, "loss": 1.201, "step": 6560 }, { "epoch": 0.0898332547121439, "grad_norm": 5.160383224487305, "learning_rate": 0.00022458467218158202, "loss": 1.1612, "step": 6570 }, { "epoch": 0.08996998721551093, "grad_norm": 7.029409885406494, "learning_rate": 0.00022492650577698775, "loss": 1.0996, "step": 6580 }, { "epoch": 0.09010671971887797, "grad_norm": 12.10134220123291, "learning_rate": 0.0002252683393723935, "loss": 1.1178, "step": 6590 }, { "epoch": 0.09024345222224502, "grad_norm": 7.97524356842041, "learning_rate": 0.0002256101729677993, "loss": 1.1906, "step": 6600 }, { "epoch": 0.09038018472561204, "grad_norm": 6.932685852050781, "learning_rate": 0.00022595200656320502, "loss": 1.3618, "step": 6610 }, { "epoch": 0.09051691722897909, "grad_norm": 5.3867621421813965, "learning_rate": 0.00022629384015861078, "loss": 1.1641, "step": 6620 }, { "epoch": 0.09065364973234613, "grad_norm": 5.573099136352539, "learning_rate": 0.00022663567375401657, "loss": 1.007, "step": 6630 }, { "epoch": 0.09079038223571316, "grad_norm": 3.3727009296417236, "learning_rate": 0.0002269775073494223, "loss": 1.1147, "step": 6640 }, { "epoch": 0.0909271147390802, "grad_norm": 12.300259590148926, "learning_rate": 0.00022731934094482806, "loss": 0.9387, "step": 6650 }, { "epoch": 0.09106384724244723, "grad_norm": 6.029952049255371, "learning_rate": 0.00022766117454023382, "loss": 1.1253, "step": 6660 }, { "epoch": 0.09120057974581428, "grad_norm": 7.058362007141113, "learning_rate": 0.00022800300813563957, "loss": 1.2575, "step": 6670 }, { "epoch": 0.09133731224918132, "grad_norm": 13.636723518371582, "learning_rate": 0.00022834484173104533, "loss": 0.8354, "step": 6680 }, { "epoch": 0.09147404475254835, "grad_norm": 11.1083345413208, "learning_rate": 0.0002286866753264511, "loss": 1.1937, "step": 6690 }, { "epoch": 0.09161077725591539, "grad_norm": 6.841421127319336, "learning_rate": 0.00022902850892185685, "loss": 1.1156, "step": 6700 }, { "epoch": 0.09174750975928243, "grad_norm": 5.8431782722473145, "learning_rate": 0.0002293703425172626, "loss": 0.9753, "step": 6710 }, { "epoch": 0.09188424226264946, "grad_norm": 4.671689987182617, "learning_rate": 0.00022971217611266837, "loss": 1.3819, "step": 6720 }, { "epoch": 0.0920209747660165, "grad_norm": 8.124401092529297, "learning_rate": 0.0002300540097080741, "loss": 1.1543, "step": 6730 }, { "epoch": 0.09215770726938353, "grad_norm": 4.48124361038208, "learning_rate": 0.00023039584330347988, "loss": 1.2404, "step": 6740 }, { "epoch": 0.09229443977275058, "grad_norm": 4.962923526763916, "learning_rate": 0.00023073767689888564, "loss": 0.7881, "step": 6750 }, { "epoch": 0.09243117227611762, "grad_norm": 11.625554084777832, "learning_rate": 0.00023107951049429137, "loss": 1.1725, "step": 6760 }, { "epoch": 0.09256790477948465, "grad_norm": 4.975678443908691, "learning_rate": 0.00023142134408969716, "loss": 1.118, "step": 6770 }, { "epoch": 0.09270463728285169, "grad_norm": 4.730610370635986, "learning_rate": 0.00023176317768510292, "loss": 1.2653, "step": 6780 }, { "epoch": 0.09284136978621874, "grad_norm": 4.6666765213012695, "learning_rate": 0.00023210501128050865, "loss": 1.2558, "step": 6790 }, { "epoch": 0.09297810228958577, "grad_norm": 5.635897636413574, "learning_rate": 0.0002324468448759144, "loss": 1.3754, "step": 6800 }, { "epoch": 0.09311483479295281, "grad_norm": 6.436832904815674, "learning_rate": 0.00023278867847132016, "loss": 1.265, "step": 6810 }, { "epoch": 0.09325156729631985, "grad_norm": 4.321234226226807, "learning_rate": 0.00023313051206672592, "loss": 0.9773, "step": 6820 }, { "epoch": 0.09338829979968688, "grad_norm": 9.431012153625488, "learning_rate": 0.00023347234566213168, "loss": 1.0132, "step": 6830 }, { "epoch": 0.09352503230305392, "grad_norm": 5.797915458679199, "learning_rate": 0.0002338141792575374, "loss": 1.2378, "step": 6840 }, { "epoch": 0.09366176480642095, "grad_norm": 5.561328887939453, "learning_rate": 0.0002341560128529432, "loss": 1.1994, "step": 6850 }, { "epoch": 0.093798497309788, "grad_norm": 7.145861625671387, "learning_rate": 0.00023449784644834895, "loss": 1.2624, "step": 6860 }, { "epoch": 0.09393522981315504, "grad_norm": 5.382163047790527, "learning_rate": 0.00023483968004375469, "loss": 0.8925, "step": 6870 }, { "epoch": 0.09407196231652207, "grad_norm": 5.817798614501953, "learning_rate": 0.00023518151363916047, "loss": 1.0877, "step": 6880 }, { "epoch": 0.09420869481988911, "grad_norm": 14.198504447937012, "learning_rate": 0.00023552334723456623, "loss": 1.2206, "step": 6890 }, { "epoch": 0.09434542732325615, "grad_norm": 11.813673973083496, "learning_rate": 0.00023586518082997196, "loss": 0.9329, "step": 6900 }, { "epoch": 0.09448215982662318, "grad_norm": 4.494518280029297, "learning_rate": 0.00023620701442537772, "loss": 1.159, "step": 6910 }, { "epoch": 0.09461889232999023, "grad_norm": 5.47347354888916, "learning_rate": 0.0002365488480207835, "loss": 1.2561, "step": 6920 }, { "epoch": 0.09475562483335725, "grad_norm": 46.72405242919922, "learning_rate": 0.00023689068161618924, "loss": 1.0994, "step": 6930 }, { "epoch": 0.0948923573367243, "grad_norm": 8.545106887817383, "learning_rate": 0.000237232515211595, "loss": 1.1022, "step": 6940 }, { "epoch": 0.09502908984009134, "grad_norm": 7.60125207901001, "learning_rate": 0.00023757434880700078, "loss": 1.0921, "step": 6950 }, { "epoch": 0.09516582234345837, "grad_norm": 5.18351936340332, "learning_rate": 0.0002379161824024065, "loss": 1.4041, "step": 6960 }, { "epoch": 0.09530255484682541, "grad_norm": 6.261326789855957, "learning_rate": 0.00023825801599781227, "loss": 0.8893, "step": 6970 }, { "epoch": 0.09543928735019246, "grad_norm": 4.945760250091553, "learning_rate": 0.00023859984959321803, "loss": 1.1125, "step": 6980 }, { "epoch": 0.09557601985355949, "grad_norm": 6.279932498931885, "learning_rate": 0.00023894168318862378, "loss": 1.1492, "step": 6990 }, { "epoch": 0.09571275235692653, "grad_norm": 7.003011703491211, "learning_rate": 0.00023928351678402954, "loss": 1.457, "step": 7000 }, { "epoch": 0.09584948486029357, "grad_norm": 6.150653839111328, "learning_rate": 0.0002396253503794353, "loss": 1.0898, "step": 7010 }, { "epoch": 0.0959862173636606, "grad_norm": 13.142325401306152, "learning_rate": 0.00023996718397484106, "loss": 0.8286, "step": 7020 }, { "epoch": 0.09612294986702764, "grad_norm": 6.723555088043213, "learning_rate": 0.00024030901757024682, "loss": 1.2111, "step": 7030 }, { "epoch": 0.09625968237039467, "grad_norm": 7.199960231781006, "learning_rate": 0.00024065085116565258, "loss": 1.3299, "step": 7040 }, { "epoch": 0.09639641487376172, "grad_norm": 7.131250381469727, "learning_rate": 0.0002409926847610583, "loss": 1.1993, "step": 7050 }, { "epoch": 0.09653314737712876, "grad_norm": 13.028485298156738, "learning_rate": 0.0002413345183564641, "loss": 1.2808, "step": 7060 }, { "epoch": 0.09666987988049579, "grad_norm": 7.676147937774658, "learning_rate": 0.00024167635195186982, "loss": 1.249, "step": 7070 }, { "epoch": 0.09680661238386283, "grad_norm": 10.654341697692871, "learning_rate": 0.00024201818554727558, "loss": 1.1338, "step": 7080 }, { "epoch": 0.09694334488722987, "grad_norm": 7.425471782684326, "learning_rate": 0.00024236001914268134, "loss": 1.1607, "step": 7090 }, { "epoch": 0.0970800773905969, "grad_norm": 7.207551002502441, "learning_rate": 0.0002427018527380871, "loss": 0.9065, "step": 7100 }, { "epoch": 0.09721680989396395, "grad_norm": 8.423810958862305, "learning_rate": 0.00024304368633349286, "loss": 1.236, "step": 7110 }, { "epoch": 0.09735354239733097, "grad_norm": 5.680992126464844, "learning_rate": 0.00024338551992889862, "loss": 1.1466, "step": 7120 }, { "epoch": 0.09749027490069802, "grad_norm": 6.9917473793029785, "learning_rate": 0.00024372735352430437, "loss": 1.0805, "step": 7130 }, { "epoch": 0.09762700740406506, "grad_norm": 12.587326049804688, "learning_rate": 0.00024406918711971013, "loss": 1.285, "step": 7140 }, { "epoch": 0.09776373990743209, "grad_norm": 5.974912166595459, "learning_rate": 0.0002444110207151159, "loss": 1.0165, "step": 7150 }, { "epoch": 0.09790047241079913, "grad_norm": 7.374284744262695, "learning_rate": 0.0002447528543105216, "loss": 1.0081, "step": 7160 }, { "epoch": 0.09803720491416618, "grad_norm": 8.522710800170898, "learning_rate": 0.0002450946879059274, "loss": 1.2781, "step": 7170 }, { "epoch": 0.0981739374175332, "grad_norm": 5.575756072998047, "learning_rate": 0.0002454365215013332, "loss": 1.2208, "step": 7180 }, { "epoch": 0.09831066992090025, "grad_norm": 5.9297027587890625, "learning_rate": 0.0002457783550967389, "loss": 1.2981, "step": 7190 }, { "epoch": 0.09844740242426729, "grad_norm": 7.935986518859863, "learning_rate": 0.00024612018869214465, "loss": 1.2217, "step": 7200 }, { "epoch": 0.09858413492763432, "grad_norm": 5.538963794708252, "learning_rate": 0.0002464620222875504, "loss": 1.2105, "step": 7210 }, { "epoch": 0.09872086743100136, "grad_norm": 5.823004722595215, "learning_rate": 0.00024680385588295617, "loss": 1.3098, "step": 7220 }, { "epoch": 0.09885759993436839, "grad_norm": 11.261076927185059, "learning_rate": 0.00024714568947836193, "loss": 1.1882, "step": 7230 }, { "epoch": 0.09899433243773544, "grad_norm": 6.255049705505371, "learning_rate": 0.0002474875230737677, "loss": 1.2568, "step": 7240 }, { "epoch": 0.09913106494110248, "grad_norm": 6.409606456756592, "learning_rate": 0.00024782935666917345, "loss": 1.1719, "step": 7250 }, { "epoch": 0.09926779744446951, "grad_norm": 7.049551010131836, "learning_rate": 0.0002481711902645792, "loss": 1.0402, "step": 7260 }, { "epoch": 0.09940452994783655, "grad_norm": 7.284207344055176, "learning_rate": 0.00024851302385998496, "loss": 1.1383, "step": 7270 }, { "epoch": 0.0995412624512036, "grad_norm": 8.249618530273438, "learning_rate": 0.0002488548574553907, "loss": 1.1742, "step": 7280 }, { "epoch": 0.09967799495457062, "grad_norm": 8.223416328430176, "learning_rate": 0.0002491966910507965, "loss": 1.1656, "step": 7290 }, { "epoch": 0.09981472745793767, "grad_norm": 9.384042739868164, "learning_rate": 0.00024953852464620224, "loss": 1.2843, "step": 7300 }, { "epoch": 0.0999514599613047, "grad_norm": 7.733480453491211, "learning_rate": 0.000249880358241608, "loss": 1.0383, "step": 7310 }, { "epoch": 0.10008819246467174, "grad_norm": 9.337465286254883, "learning_rate": 0.0002502221918370137, "loss": 1.1215, "step": 7320 }, { "epoch": 0.10022492496803878, "grad_norm": 6.6433563232421875, "learning_rate": 0.0002505640254324195, "loss": 1.2111, "step": 7330 }, { "epoch": 0.10036165747140581, "grad_norm": 6.9528985023498535, "learning_rate": 0.00025090585902782527, "loss": 1.2336, "step": 7340 }, { "epoch": 0.10049838997477285, "grad_norm": 5.613892555236816, "learning_rate": 0.000251247692623231, "loss": 1.3131, "step": 7350 }, { "epoch": 0.1006351224781399, "grad_norm": 26.826671600341797, "learning_rate": 0.0002515895262186368, "loss": 1.1828, "step": 7360 }, { "epoch": 0.10077185498150693, "grad_norm": 7.039429187774658, "learning_rate": 0.00025193135981404254, "loss": 1.2549, "step": 7370 }, { "epoch": 0.10090858748487397, "grad_norm": 6.107513904571533, "learning_rate": 0.00025227319340944825, "loss": 1.1891, "step": 7380 }, { "epoch": 0.10104531998824101, "grad_norm": 6.446129322052002, "learning_rate": 0.00025261502700485406, "loss": 1.4408, "step": 7390 }, { "epoch": 0.10118205249160804, "grad_norm": 5.479048728942871, "learning_rate": 0.0002529568606002598, "loss": 0.9992, "step": 7400 }, { "epoch": 0.10131878499497508, "grad_norm": 5.669996738433838, "learning_rate": 0.0002532986941956655, "loss": 1.2892, "step": 7410 }, { "epoch": 0.10145551749834211, "grad_norm": 9.615148544311523, "learning_rate": 0.00025364052779107134, "loss": 1.1836, "step": 7420 }, { "epoch": 0.10159225000170916, "grad_norm": 5.722320079803467, "learning_rate": 0.0002539823613864771, "loss": 1.2163, "step": 7430 }, { "epoch": 0.1017289825050762, "grad_norm": 5.406513690948486, "learning_rate": 0.0002543241949818828, "loss": 1.0513, "step": 7440 }, { "epoch": 0.10186571500844323, "grad_norm": 6.480174541473389, "learning_rate": 0.0002546660285772886, "loss": 1.4362, "step": 7450 }, { "epoch": 0.10200244751181027, "grad_norm": 15.346138000488281, "learning_rate": 0.0002550078621726943, "loss": 1.2677, "step": 7460 }, { "epoch": 0.10213918001517731, "grad_norm": 10.763150215148926, "learning_rate": 0.0002553496957681001, "loss": 1.3277, "step": 7470 }, { "epoch": 0.10227591251854434, "grad_norm": 5.347362041473389, "learning_rate": 0.0002556915293635059, "loss": 1.173, "step": 7480 }, { "epoch": 0.10241264502191139, "grad_norm": 13.424551010131836, "learning_rate": 0.0002560333629589116, "loss": 1.0306, "step": 7490 }, { "epoch": 0.10254937752527842, "grad_norm": 7.3758625984191895, "learning_rate": 0.00025637519655431735, "loss": 1.1414, "step": 7500 }, { "epoch": 0.10268611002864546, "grad_norm": 4.573659896850586, "learning_rate": 0.00025671703014972316, "loss": 1.276, "step": 7510 }, { "epoch": 0.1028228425320125, "grad_norm": 9.651052474975586, "learning_rate": 0.00025705886374512886, "loss": 1.2523, "step": 7520 }, { "epoch": 0.10295957503537953, "grad_norm": 6.374401569366455, "learning_rate": 0.0002574006973405346, "loss": 1.2051, "step": 7530 }, { "epoch": 0.10309630753874657, "grad_norm": 7.322989463806152, "learning_rate": 0.00025774253093594044, "loss": 1.2283, "step": 7540 }, { "epoch": 0.10323304004211362, "grad_norm": 8.355438232421875, "learning_rate": 0.00025808436453134614, "loss": 1.1074, "step": 7550 }, { "epoch": 0.10336977254548065, "grad_norm": 5.764510154724121, "learning_rate": 0.0002584261981267519, "loss": 1.1917, "step": 7560 }, { "epoch": 0.10350650504884769, "grad_norm": 6.571407318115234, "learning_rate": 0.00025876803172215766, "loss": 1.2805, "step": 7570 }, { "epoch": 0.10364323755221473, "grad_norm": 6.404865264892578, "learning_rate": 0.0002591098653175634, "loss": 1.3593, "step": 7580 }, { "epoch": 0.10377997005558176, "grad_norm": 7.661734104156494, "learning_rate": 0.00025945169891296917, "loss": 1.0439, "step": 7590 }, { "epoch": 0.1039167025589488, "grad_norm": 7.779289245605469, "learning_rate": 0.00025979353250837493, "loss": 1.0806, "step": 7600 }, { "epoch": 0.10405343506231583, "grad_norm": 6.660257816314697, "learning_rate": 0.0002601353661037807, "loss": 1.1302, "step": 7610 }, { "epoch": 0.10419016756568288, "grad_norm": 11.061768531799316, "learning_rate": 0.00026047719969918645, "loss": 1.2519, "step": 7620 }, { "epoch": 0.10432690006904992, "grad_norm": 5.612277984619141, "learning_rate": 0.0002608190332945922, "loss": 1.1189, "step": 7630 }, { "epoch": 0.10446363257241695, "grad_norm": 8.857458114624023, "learning_rate": 0.00026116086688999796, "loss": 1.0767, "step": 7640 }, { "epoch": 0.10460036507578399, "grad_norm": 10.37850570678711, "learning_rate": 0.0002615027004854037, "loss": 1.3753, "step": 7650 }, { "epoch": 0.10473709757915103, "grad_norm": 5.858030319213867, "learning_rate": 0.0002618445340808095, "loss": 1.3907, "step": 7660 }, { "epoch": 0.10487383008251806, "grad_norm": 7.543013095855713, "learning_rate": 0.00026218636767621524, "loss": 1.029, "step": 7670 }, { "epoch": 0.1050105625858851, "grad_norm": 5.643473148345947, "learning_rate": 0.000262528201271621, "loss": 1.2072, "step": 7680 }, { "epoch": 0.10514729508925214, "grad_norm": 9.898833274841309, "learning_rate": 0.00026287003486702676, "loss": 1.3707, "step": 7690 }, { "epoch": 0.10528402759261918, "grad_norm": 6.164198398590088, "learning_rate": 0.0002632118684624325, "loss": 1.3198, "step": 7700 }, { "epoch": 0.10542076009598622, "grad_norm": 6.362168312072754, "learning_rate": 0.0002635537020578382, "loss": 1.1248, "step": 7710 }, { "epoch": 0.10555749259935325, "grad_norm": 9.803006172180176, "learning_rate": 0.000263895535653244, "loss": 1.2448, "step": 7720 }, { "epoch": 0.1056942251027203, "grad_norm": 12.687444686889648, "learning_rate": 0.0002642373692486498, "loss": 0.9798, "step": 7730 }, { "epoch": 0.10583095760608734, "grad_norm": 7.167172908782959, "learning_rate": 0.0002645792028440555, "loss": 1.1823, "step": 7740 }, { "epoch": 0.10596769010945437, "grad_norm": 10.451888084411621, "learning_rate": 0.00026492103643946125, "loss": 1.3242, "step": 7750 }, { "epoch": 0.10610442261282141, "grad_norm": 8.188249588012695, "learning_rate": 0.00026526287003486706, "loss": 1.1339, "step": 7760 }, { "epoch": 0.10624115511618845, "grad_norm": 5.171695232391357, "learning_rate": 0.00026560470363027277, "loss": 1.3062, "step": 7770 }, { "epoch": 0.10637788761955548, "grad_norm": 7.3235368728637695, "learning_rate": 0.0002659465372256785, "loss": 1.2092, "step": 7780 }, { "epoch": 0.10651462012292252, "grad_norm": 9.739580154418945, "learning_rate": 0.00026628837082108434, "loss": 1.1782, "step": 7790 }, { "epoch": 0.10665135262628955, "grad_norm": 6.66044282913208, "learning_rate": 0.00026663020441649004, "loss": 1.1239, "step": 7800 }, { "epoch": 0.1067880851296566, "grad_norm": 7.963847637176514, "learning_rate": 0.0002669720380118958, "loss": 1.3148, "step": 7810 }, { "epoch": 0.10692481763302364, "grad_norm": 7.520671367645264, "learning_rate": 0.00026731387160730156, "loss": 1.2354, "step": 7820 }, { "epoch": 0.10706155013639067, "grad_norm": 5.634294509887695, "learning_rate": 0.0002676557052027073, "loss": 1.2167, "step": 7830 }, { "epoch": 0.10719828263975771, "grad_norm": 7.80869197845459, "learning_rate": 0.0002679975387981131, "loss": 1.0309, "step": 7840 }, { "epoch": 0.10733501514312475, "grad_norm": 9.316112518310547, "learning_rate": 0.00026833937239351883, "loss": 1.4403, "step": 7850 }, { "epoch": 0.10747174764649178, "grad_norm": 6.541590213775635, "learning_rate": 0.0002686812059889246, "loss": 1.1278, "step": 7860 }, { "epoch": 0.10760848014985883, "grad_norm": 11.560871124267578, "learning_rate": 0.00026902303958433035, "loss": 1.3634, "step": 7870 }, { "epoch": 0.10774521265322586, "grad_norm": 6.837497711181641, "learning_rate": 0.0002693648731797361, "loss": 1.0415, "step": 7880 }, { "epoch": 0.1078819451565929, "grad_norm": 8.315403938293457, "learning_rate": 0.00026970670677514187, "loss": 1.2796, "step": 7890 }, { "epoch": 0.10801867765995994, "grad_norm": 9.376300811767578, "learning_rate": 0.0002700485403705476, "loss": 1.3374, "step": 7900 }, { "epoch": 0.10815541016332697, "grad_norm": 11.71534538269043, "learning_rate": 0.0002703903739659534, "loss": 1.2781, "step": 7910 }, { "epoch": 0.10829214266669401, "grad_norm": 6.10835599899292, "learning_rate": 0.00027073220756135914, "loss": 1.2944, "step": 7920 }, { "epoch": 0.10842887517006106, "grad_norm": 7.686083793640137, "learning_rate": 0.00027107404115676485, "loss": 1.4007, "step": 7930 }, { "epoch": 0.10856560767342809, "grad_norm": 7.173365116119385, "learning_rate": 0.00027141587475217066, "loss": 1.2098, "step": 7940 }, { "epoch": 0.10870234017679513, "grad_norm": 6.098764896392822, "learning_rate": 0.0002717577083475764, "loss": 1.1996, "step": 7950 }, { "epoch": 0.10883907268016217, "grad_norm": 6.676900386810303, "learning_rate": 0.0002720995419429821, "loss": 1.3666, "step": 7960 }, { "epoch": 0.1089758051835292, "grad_norm": 6.958308219909668, "learning_rate": 0.00027244137553838793, "loss": 1.1716, "step": 7970 }, { "epoch": 0.10911253768689624, "grad_norm": 24.79461097717285, "learning_rate": 0.0002727832091337937, "loss": 1.2077, "step": 7980 }, { "epoch": 0.10924927019026327, "grad_norm": 5.985872268676758, "learning_rate": 0.0002731250427291994, "loss": 1.1182, "step": 7990 }, { "epoch": 0.10938600269363032, "grad_norm": 7.812863826751709, "learning_rate": 0.0002734668763246052, "loss": 1.4357, "step": 8000 }, { "epoch": 0.10952273519699736, "grad_norm": 8.36412239074707, "learning_rate": 0.00027380870992001097, "loss": 1.452, "step": 8010 }, { "epoch": 0.10965946770036439, "grad_norm": 6.155104637145996, "learning_rate": 0.00027415054351541667, "loss": 1.2706, "step": 8020 }, { "epoch": 0.10979620020373143, "grad_norm": 76.93814849853516, "learning_rate": 0.0002744923771108225, "loss": 1.3006, "step": 8030 }, { "epoch": 0.10993293270709847, "grad_norm": 7.750058174133301, "learning_rate": 0.00027483421070622824, "loss": 1.2627, "step": 8040 }, { "epoch": 0.1100696652104655, "grad_norm": 11.734310150146484, "learning_rate": 0.00027517604430163394, "loss": 1.1943, "step": 8050 }, { "epoch": 0.11020639771383255, "grad_norm": 9.503214836120605, "learning_rate": 0.00027551787789703976, "loss": 1.3318, "step": 8060 }, { "epoch": 0.11034313021719958, "grad_norm": 8.938076972961426, "learning_rate": 0.00027585971149244546, "loss": 1.5312, "step": 8070 }, { "epoch": 0.11047986272056662, "grad_norm": 10.09266471862793, "learning_rate": 0.0002762015450878512, "loss": 1.3032, "step": 8080 }, { "epoch": 0.11061659522393366, "grad_norm": 11.2560453414917, "learning_rate": 0.00027654337868325703, "loss": 1.1978, "step": 8090 }, { "epoch": 0.11075332772730069, "grad_norm": 6.227878093719482, "learning_rate": 0.00027688521227866274, "loss": 1.1935, "step": 8100 }, { "epoch": 0.11089006023066773, "grad_norm": 9.600732803344727, "learning_rate": 0.0002772270458740685, "loss": 1.5102, "step": 8110 }, { "epoch": 0.11102679273403478, "grad_norm": 7.554925918579102, "learning_rate": 0.0002775688794694743, "loss": 1.2785, "step": 8120 }, { "epoch": 0.1111635252374018, "grad_norm": 9.541436195373535, "learning_rate": 0.00027791071306488, "loss": 1.1124, "step": 8130 }, { "epoch": 0.11130025774076885, "grad_norm": 6.700206279754639, "learning_rate": 0.00027825254666028577, "loss": 1.2668, "step": 8140 }, { "epoch": 0.11143699024413588, "grad_norm": 8.08810806274414, "learning_rate": 0.0002785943802556916, "loss": 1.3011, "step": 8150 }, { "epoch": 0.11157372274750292, "grad_norm": 9.212454795837402, "learning_rate": 0.0002789362138510973, "loss": 1.2883, "step": 8160 }, { "epoch": 0.11171045525086996, "grad_norm": 6.430050373077393, "learning_rate": 0.00027927804744650304, "loss": 1.1377, "step": 8170 }, { "epoch": 0.11184718775423699, "grad_norm": 8.621210098266602, "learning_rate": 0.00027961988104190886, "loss": 1.2487, "step": 8180 }, { "epoch": 0.11198392025760404, "grad_norm": 6.240113735198975, "learning_rate": 0.00027996171463731456, "loss": 1.4392, "step": 8190 }, { "epoch": 0.11212065276097108, "grad_norm": 23.17302131652832, "learning_rate": 0.0002803035482327203, "loss": 1.2877, "step": 8200 }, { "epoch": 0.11225738526433811, "grad_norm": 11.189237594604492, "learning_rate": 0.0002806453818281261, "loss": 1.1173, "step": 8210 }, { "epoch": 0.11239411776770515, "grad_norm": 8.472495079040527, "learning_rate": 0.00028098721542353184, "loss": 1.3043, "step": 8220 }, { "epoch": 0.1125308502710722, "grad_norm": 13.320552825927734, "learning_rate": 0.0002813290490189376, "loss": 1.3446, "step": 8230 }, { "epoch": 0.11266758277443922, "grad_norm": 6.154887676239014, "learning_rate": 0.00028167088261434335, "loss": 1.2303, "step": 8240 }, { "epoch": 0.11280431527780627, "grad_norm": 13.890785217285156, "learning_rate": 0.0002820127162097491, "loss": 1.1257, "step": 8250 }, { "epoch": 0.1129410477811733, "grad_norm": 8.541028022766113, "learning_rate": 0.00028235454980515487, "loss": 1.3846, "step": 8260 }, { "epoch": 0.11307778028454034, "grad_norm": 11.784552574157715, "learning_rate": 0.00028269638340056057, "loss": 1.2882, "step": 8270 }, { "epoch": 0.11321451278790738, "grad_norm": 11.044509887695312, "learning_rate": 0.0002830382169959664, "loss": 1.2767, "step": 8280 }, { "epoch": 0.11335124529127441, "grad_norm": 8.47109317779541, "learning_rate": 0.00028338005059137214, "loss": 1.2661, "step": 8290 }, { "epoch": 0.11348797779464145, "grad_norm": 8.15289306640625, "learning_rate": 0.00028372188418677785, "loss": 1.2025, "step": 8300 }, { "epoch": 0.1136247102980085, "grad_norm": 7.354960918426514, "learning_rate": 0.00028406371778218366, "loss": 1.1944, "step": 8310 }, { "epoch": 0.11376144280137553, "grad_norm": 5.999179840087891, "learning_rate": 0.00028440555137758936, "loss": 1.2261, "step": 8320 }, { "epoch": 0.11389817530474257, "grad_norm": 8.905896186828613, "learning_rate": 0.0002847473849729951, "loss": 1.0689, "step": 8330 }, { "epoch": 0.1140349078081096, "grad_norm": 10.670548439025879, "learning_rate": 0.00028508921856840093, "loss": 1.351, "step": 8340 }, { "epoch": 0.11417164031147664, "grad_norm": 6.345682621002197, "learning_rate": 0.00028543105216380664, "loss": 1.2838, "step": 8350 }, { "epoch": 0.11430837281484368, "grad_norm": 11.471070289611816, "learning_rate": 0.0002857728857592124, "loss": 1.3658, "step": 8360 }, { "epoch": 0.11444510531821071, "grad_norm": 3.7484567165374756, "learning_rate": 0.0002861147193546182, "loss": 1.0715, "step": 8370 }, { "epoch": 0.11458183782157776, "grad_norm": 7.235467910766602, "learning_rate": 0.0002864565529500239, "loss": 1.4195, "step": 8380 }, { "epoch": 0.1147185703249448, "grad_norm": 8.022528648376465, "learning_rate": 0.00028679838654542967, "loss": 1.5158, "step": 8390 }, { "epoch": 0.11485530282831183, "grad_norm": 8.117220878601074, "learning_rate": 0.0002871402201408355, "loss": 1.4174, "step": 8400 }, { "epoch": 0.11499203533167887, "grad_norm": 6.872897624969482, "learning_rate": 0.0002874820537362412, "loss": 1.3206, "step": 8410 }, { "epoch": 0.11512876783504591, "grad_norm": 12.793618202209473, "learning_rate": 0.00028782388733164695, "loss": 1.3255, "step": 8420 }, { "epoch": 0.11526550033841294, "grad_norm": 8.042529106140137, "learning_rate": 0.00028816572092705276, "loss": 1.3765, "step": 8430 }, { "epoch": 0.11540223284177999, "grad_norm": 11.3339204788208, "learning_rate": 0.00028850755452245846, "loss": 1.4191, "step": 8440 }, { "epoch": 0.11553896534514702, "grad_norm": 11.726129531860352, "learning_rate": 0.0002888493881178642, "loss": 1.3179, "step": 8450 }, { "epoch": 0.11567569784851406, "grad_norm": 8.289619445800781, "learning_rate": 0.00028919122171327, "loss": 1.3407, "step": 8460 }, { "epoch": 0.1158124303518811, "grad_norm": 5.977689266204834, "learning_rate": 0.00028953305530867574, "loss": 1.144, "step": 8470 }, { "epoch": 0.11594916285524813, "grad_norm": 7.151710033416748, "learning_rate": 0.0002898748889040815, "loss": 1.3123, "step": 8480 }, { "epoch": 0.11608589535861517, "grad_norm": 9.212043762207031, "learning_rate": 0.00029021672249948725, "loss": 1.0525, "step": 8490 }, { "epoch": 0.11622262786198222, "grad_norm": 15.58935260772705, "learning_rate": 0.000290558556094893, "loss": 1.1909, "step": 8500 }, { "epoch": 0.11635936036534925, "grad_norm": 4.111899375915527, "learning_rate": 0.00029090038969029877, "loss": 1.1783, "step": 8510 }, { "epoch": 0.11649609286871629, "grad_norm": 8.74038314819336, "learning_rate": 0.00029124222328570453, "loss": 1.4273, "step": 8520 }, { "epoch": 0.11663282537208332, "grad_norm": 8.993966102600098, "learning_rate": 0.0002915840568811103, "loss": 1.4012, "step": 8530 }, { "epoch": 0.11676955787545036, "grad_norm": 26.15325927734375, "learning_rate": 0.00029192589047651605, "loss": 1.4548, "step": 8540 }, { "epoch": 0.1169062903788174, "grad_norm": 11.602246284484863, "learning_rate": 0.0002922677240719218, "loss": 1.5273, "step": 8550 }, { "epoch": 0.11704302288218443, "grad_norm": 27.025747299194336, "learning_rate": 0.00029260955766732756, "loss": 1.3144, "step": 8560 }, { "epoch": 0.11717975538555148, "grad_norm": 18.67245864868164, "learning_rate": 0.00029295139126273327, "loss": 1.5043, "step": 8570 }, { "epoch": 0.11731648788891852, "grad_norm": 11.750663757324219, "learning_rate": 0.0002932932248581391, "loss": 1.4189, "step": 8580 }, { "epoch": 0.11745322039228555, "grad_norm": 5.442091464996338, "learning_rate": 0.00029363505845354484, "loss": 1.3025, "step": 8590 }, { "epoch": 0.11758995289565259, "grad_norm": 8.427131652832031, "learning_rate": 0.00029397689204895054, "loss": 1.2493, "step": 8600 }, { "epoch": 0.11772668539901963, "grad_norm": 7.909998416900635, "learning_rate": 0.00029431872564435635, "loss": 1.4858, "step": 8610 }, { "epoch": 0.11786341790238666, "grad_norm": 9.733124732971191, "learning_rate": 0.0002946605592397621, "loss": 1.3895, "step": 8620 }, { "epoch": 0.1180001504057537, "grad_norm": 7.942836761474609, "learning_rate": 0.0002950023928351678, "loss": 1.1689, "step": 8630 }, { "epoch": 0.11813688290912074, "grad_norm": 29.0411376953125, "learning_rate": 0.00029534422643057363, "loss": 1.0993, "step": 8640 }, { "epoch": 0.11827361541248778, "grad_norm": 7.210087299346924, "learning_rate": 0.0002956860600259794, "loss": 1.4328, "step": 8650 }, { "epoch": 0.11841034791585482, "grad_norm": 11.663816452026367, "learning_rate": 0.0002960278936213851, "loss": 1.4992, "step": 8660 }, { "epoch": 0.11854708041922185, "grad_norm": 11.896017074584961, "learning_rate": 0.0002963697272167909, "loss": 1.2331, "step": 8670 }, { "epoch": 0.1186838129225889, "grad_norm": 10.328230857849121, "learning_rate": 0.00029671156081219666, "loss": 1.3803, "step": 8680 }, { "epoch": 0.11882054542595594, "grad_norm": 18.50098419189453, "learning_rate": 0.00029705339440760237, "loss": 1.3324, "step": 8690 }, { "epoch": 0.11895727792932297, "grad_norm": 15.840170860290527, "learning_rate": 0.0002973952280030082, "loss": 1.3421, "step": 8700 }, { "epoch": 0.11909401043269001, "grad_norm": 10.202373504638672, "learning_rate": 0.0002977370615984139, "loss": 1.2844, "step": 8710 }, { "epoch": 0.11923074293605704, "grad_norm": 9.901867866516113, "learning_rate": 0.00029807889519381964, "loss": 1.3577, "step": 8720 }, { "epoch": 0.11936747543942408, "grad_norm": 9.339385986328125, "learning_rate": 0.00029842072878922545, "loss": 1.1829, "step": 8730 }, { "epoch": 0.11950420794279112, "grad_norm": 9.22814655303955, "learning_rate": 0.00029876256238463116, "loss": 1.5716, "step": 8740 }, { "epoch": 0.11964094044615815, "grad_norm": 8.23211669921875, "learning_rate": 0.0002991043959800369, "loss": 1.3271, "step": 8750 }, { "epoch": 0.1197776729495252, "grad_norm": 8.800820350646973, "learning_rate": 0.00029944622957544273, "loss": 1.2671, "step": 8760 }, { "epoch": 0.11991440545289224, "grad_norm": 8.961423873901367, "learning_rate": 0.00029978806317084843, "loss": 1.2863, "step": 8770 }, { "epoch": 0.12005113795625927, "grad_norm": 63.359317779541016, "learning_rate": 0.0003001298967662542, "loss": 1.4023, "step": 8780 }, { "epoch": 0.12018787045962631, "grad_norm": 13.327144622802734, "learning_rate": 0.00030047173036165995, "loss": 1.3851, "step": 8790 }, { "epoch": 0.12032460296299335, "grad_norm": 9.22321605682373, "learning_rate": 0.0003008135639570657, "loss": 1.2717, "step": 8800 }, { "epoch": 0.12046133546636038, "grad_norm": 10.883658409118652, "learning_rate": 0.00030115539755247146, "loss": 1.451, "step": 8810 }, { "epoch": 0.12059806796972743, "grad_norm": 8.02194881439209, "learning_rate": 0.00030149723114787717, "loss": 1.3799, "step": 8820 }, { "epoch": 0.12073480047309446, "grad_norm": 8.609525680541992, "learning_rate": 0.000301839064743283, "loss": 1.2494, "step": 8830 }, { "epoch": 0.1208715329764615, "grad_norm": 13.613035202026367, "learning_rate": 0.00030218089833868874, "loss": 1.3722, "step": 8840 }, { "epoch": 0.12100826547982854, "grad_norm": 10.080571174621582, "learning_rate": 0.00030252273193409444, "loss": 1.4398, "step": 8850 }, { "epoch": 0.12114499798319557, "grad_norm": 7.660046100616455, "learning_rate": 0.00030286456552950026, "loss": 1.2549, "step": 8860 }, { "epoch": 0.12128173048656261, "grad_norm": 9.298494338989258, "learning_rate": 0.000303206399124906, "loss": 1.1684, "step": 8870 }, { "epoch": 0.12141846298992966, "grad_norm": 15.259774208068848, "learning_rate": 0.0003035482327203117, "loss": 1.4109, "step": 8880 }, { "epoch": 0.12155519549329669, "grad_norm": 7.802633762359619, "learning_rate": 0.00030389006631571753, "loss": 1.5063, "step": 8890 }, { "epoch": 0.12169192799666373, "grad_norm": 12.839223861694336, "learning_rate": 0.0003042318999111233, "loss": 1.4701, "step": 8900 }, { "epoch": 0.12182866050003076, "grad_norm": 98.6209945678711, "learning_rate": 0.000304573733506529, "loss": 1.2159, "step": 8910 }, { "epoch": 0.1219653930033978, "grad_norm": 31.120424270629883, "learning_rate": 0.0003049155671019348, "loss": 1.4644, "step": 8920 }, { "epoch": 0.12210212550676484, "grad_norm": 15.42745304107666, "learning_rate": 0.00030525740069734056, "loss": 1.0596, "step": 8930 }, { "epoch": 0.12223885801013187, "grad_norm": 10.26223373413086, "learning_rate": 0.00030559923429274627, "loss": 1.2215, "step": 8940 }, { "epoch": 0.12237559051349892, "grad_norm": 12.047752380371094, "learning_rate": 0.0003059410678881521, "loss": 1.3824, "step": 8950 }, { "epoch": 0.12251232301686596, "grad_norm": 8.08845043182373, "learning_rate": 0.0003062829014835578, "loss": 1.6063, "step": 8960 }, { "epoch": 0.12264905552023299, "grad_norm": 30.195911407470703, "learning_rate": 0.00030662473507896354, "loss": 1.2429, "step": 8970 }, { "epoch": 0.12278578802360003, "grad_norm": 11.791861534118652, "learning_rate": 0.00030696656867436936, "loss": 1.5644, "step": 8980 }, { "epoch": 0.12292252052696707, "grad_norm": 12.640359878540039, "learning_rate": 0.00030730840226977506, "loss": 1.5, "step": 8990 }, { "epoch": 0.1230592530303341, "grad_norm": 11.301085472106934, "learning_rate": 0.0003076502358651808, "loss": 1.5478, "step": 9000 }, { "epoch": 0.12319598553370115, "grad_norm": 8.74278736114502, "learning_rate": 0.00030799206946058663, "loss": 1.5019, "step": 9010 }, { "epoch": 0.12333271803706818, "grad_norm": 9.511492729187012, "learning_rate": 0.00030833390305599233, "loss": 1.3225, "step": 9020 }, { "epoch": 0.12346945054043522, "grad_norm": 10.874874114990234, "learning_rate": 0.0003086757366513981, "loss": 1.4276, "step": 9030 }, { "epoch": 0.12360618304380226, "grad_norm": 10.75759506225586, "learning_rate": 0.0003090175702468039, "loss": 1.3696, "step": 9040 }, { "epoch": 0.12374291554716929, "grad_norm": 15.179597854614258, "learning_rate": 0.0003093594038422096, "loss": 1.3486, "step": 9050 }, { "epoch": 0.12387964805053633, "grad_norm": 17.20182991027832, "learning_rate": 0.00030970123743761537, "loss": 1.4098, "step": 9060 }, { "epoch": 0.12401638055390338, "grad_norm": 18.694255828857422, "learning_rate": 0.0003100430710330211, "loss": 1.3178, "step": 9070 }, { "epoch": 0.1241531130572704, "grad_norm": 13.832926750183105, "learning_rate": 0.0003103849046284269, "loss": 1.4222, "step": 9080 }, { "epoch": 0.12428984556063745, "grad_norm": 13.466696739196777, "learning_rate": 0.00031072673822383264, "loss": 1.5618, "step": 9090 }, { "epoch": 0.12442657806400448, "grad_norm": 12.512505531311035, "learning_rate": 0.0003110685718192384, "loss": 1.5365, "step": 9100 }, { "epoch": 0.12456331056737152, "grad_norm": 12.908215522766113, "learning_rate": 0.00031141040541464416, "loss": 1.351, "step": 9110 }, { "epoch": 0.12470004307073856, "grad_norm": 15.309252738952637, "learning_rate": 0.0003117522390100499, "loss": 1.2871, "step": 9120 }, { "epoch": 0.1248367755741056, "grad_norm": 7.675347328186035, "learning_rate": 0.0003120940726054557, "loss": 1.1958, "step": 9130 }, { "epoch": 0.12497350807747264, "grad_norm": 10.688709259033203, "learning_rate": 0.00031243590620086143, "loss": 1.2371, "step": 9140 }, { "epoch": 0.12511024058083967, "grad_norm": 10.63930606842041, "learning_rate": 0.0003127777397962672, "loss": 1.5312, "step": 9150 }, { "epoch": 0.1252469730842067, "grad_norm": 11.053671836853027, "learning_rate": 0.00031311957339167295, "loss": 1.3239, "step": 9160 }, { "epoch": 0.12538370558757375, "grad_norm": 10.485281944274902, "learning_rate": 0.0003134614069870787, "loss": 1.1972, "step": 9170 }, { "epoch": 0.1255204380909408, "grad_norm": 21.659608840942383, "learning_rate": 0.0003138032405824844, "loss": 1.4204, "step": 9180 }, { "epoch": 0.12565717059430784, "grad_norm": 14.500595092773438, "learning_rate": 0.0003141450741778902, "loss": 1.506, "step": 9190 }, { "epoch": 0.12579390309767485, "grad_norm": 8.404601097106934, "learning_rate": 0.000314486907773296, "loss": 1.2945, "step": 9200 }, { "epoch": 0.1259306356010419, "grad_norm": 10.603863716125488, "learning_rate": 0.0003148287413687017, "loss": 1.2536, "step": 9210 }, { "epoch": 0.12606736810440894, "grad_norm": 11.154974937438965, "learning_rate": 0.0003151705749641075, "loss": 1.6204, "step": 9220 }, { "epoch": 0.12620410060777598, "grad_norm": 9.506645202636719, "learning_rate": 0.00031551240855951326, "loss": 1.5468, "step": 9230 }, { "epoch": 0.12634083311114302, "grad_norm": 22.1984920501709, "learning_rate": 0.00031585424215491896, "loss": 1.5159, "step": 9240 }, { "epoch": 0.12647756561451004, "grad_norm": 13.480185508728027, "learning_rate": 0.0003161960757503248, "loss": 1.4475, "step": 9250 }, { "epoch": 0.12661429811787708, "grad_norm": 65.08501434326172, "learning_rate": 0.00031653790934573053, "loss": 1.5204, "step": 9260 }, { "epoch": 0.12675103062124413, "grad_norm": 10.57223892211914, "learning_rate": 0.00031687974294113624, "loss": 1.4336, "step": 9270 }, { "epoch": 0.12688776312461117, "grad_norm": 10.277785301208496, "learning_rate": 0.00031722157653654205, "loss": 1.3028, "step": 9280 }, { "epoch": 0.1270244956279782, "grad_norm": 10.015487670898438, "learning_rate": 0.0003175634101319478, "loss": 1.3188, "step": 9290 }, { "epoch": 0.12716122813134526, "grad_norm": 14.363490104675293, "learning_rate": 0.0003179052437273535, "loss": 1.2616, "step": 9300 }, { "epoch": 0.12729796063471227, "grad_norm": 10.609081268310547, "learning_rate": 0.00031824707732275927, "loss": 1.5987, "step": 9310 }, { "epoch": 0.1274346931380793, "grad_norm": 7.5180745124816895, "learning_rate": 0.00031858891091816503, "loss": 1.4735, "step": 9320 }, { "epoch": 0.12757142564144636, "grad_norm": 11.000030517578125, "learning_rate": 0.0003189307445135708, "loss": 1.4192, "step": 9330 }, { "epoch": 0.1277081581448134, "grad_norm": 21.77729606628418, "learning_rate": 0.00031927257810897654, "loss": 1.4278, "step": 9340 }, { "epoch": 0.12784489064818044, "grad_norm": 8.108946800231934, "learning_rate": 0.0003196144117043823, "loss": 1.3607, "step": 9350 }, { "epoch": 0.12798162315154746, "grad_norm": 17.568559646606445, "learning_rate": 0.00031995624529978806, "loss": 1.3389, "step": 9360 }, { "epoch": 0.1281183556549145, "grad_norm": 12.780149459838867, "learning_rate": 0.0003202980788951938, "loss": 1.3018, "step": 9370 }, { "epoch": 0.12825508815828154, "grad_norm": 9.997119903564453, "learning_rate": 0.0003206399124905996, "loss": 1.4749, "step": 9380 }, { "epoch": 0.1283918206616486, "grad_norm": 21.551610946655273, "learning_rate": 0.00032098174608600534, "loss": 1.4588, "step": 9390 }, { "epoch": 0.12852855316501563, "grad_norm": 9.669708251953125, "learning_rate": 0.0003213235796814111, "loss": 1.2452, "step": 9400 }, { "epoch": 0.12866528566838267, "grad_norm": 8.125384330749512, "learning_rate": 0.00032166541327681685, "loss": 1.4914, "step": 9410 }, { "epoch": 0.1288020181717497, "grad_norm": 9.516205787658691, "learning_rate": 0.0003220072468722226, "loss": 1.3134, "step": 9420 }, { "epoch": 0.12893875067511673, "grad_norm": 9.4537992477417, "learning_rate": 0.0003223490804676283, "loss": 1.2351, "step": 9430 }, { "epoch": 0.12907548317848377, "grad_norm": 9.79903793334961, "learning_rate": 0.00032269091406303413, "loss": 1.5225, "step": 9440 }, { "epoch": 0.12921221568185082, "grad_norm": 11.204296112060547, "learning_rate": 0.0003230327476584399, "loss": 1.4422, "step": 9450 }, { "epoch": 0.12934894818521786, "grad_norm": 8.4437894821167, "learning_rate": 0.0003233745812538456, "loss": 1.3343, "step": 9460 }, { "epoch": 0.12948568068858488, "grad_norm": 8.403410911560059, "learning_rate": 0.0003237164148492514, "loss": 1.602, "step": 9470 }, { "epoch": 0.12962241319195192, "grad_norm": 10.07909870147705, "learning_rate": 0.00032405824844465716, "loss": 1.3958, "step": 9480 }, { "epoch": 0.12975914569531896, "grad_norm": 14.005367279052734, "learning_rate": 0.00032440008204006286, "loss": 1.4961, "step": 9490 }, { "epoch": 0.129895878198686, "grad_norm": 11.612699508666992, "learning_rate": 0.0003247419156354687, "loss": 1.5245, "step": 9500 }, { "epoch": 0.13003261070205305, "grad_norm": 12.996234893798828, "learning_rate": 0.00032508374923087444, "loss": 1.4234, "step": 9510 }, { "epoch": 0.13016934320542006, "grad_norm": 12.759358406066895, "learning_rate": 0.00032542558282628014, "loss": 1.2465, "step": 9520 }, { "epoch": 0.1303060757087871, "grad_norm": 11.646781921386719, "learning_rate": 0.00032576741642168595, "loss": 1.4907, "step": 9530 }, { "epoch": 0.13044280821215415, "grad_norm": 11.09575080871582, "learning_rate": 0.0003261092500170917, "loss": 1.4347, "step": 9540 }, { "epoch": 0.1305795407155212, "grad_norm": 11.774147987365723, "learning_rate": 0.0003264510836124974, "loss": 1.4332, "step": 9550 }, { "epoch": 0.13071627321888823, "grad_norm": 7.563789367675781, "learning_rate": 0.0003267929172079032, "loss": 1.4343, "step": 9560 }, { "epoch": 0.13085300572225528, "grad_norm": 13.390583038330078, "learning_rate": 0.00032713475080330893, "loss": 1.4161, "step": 9570 }, { "epoch": 0.1309897382256223, "grad_norm": 9.933676719665527, "learning_rate": 0.0003274765843987147, "loss": 1.3637, "step": 9580 }, { "epoch": 0.13112647072898934, "grad_norm": 11.027811050415039, "learning_rate": 0.0003278184179941205, "loss": 1.655, "step": 9590 }, { "epoch": 0.13126320323235638, "grad_norm": 12.395076751708984, "learning_rate": 0.0003281602515895262, "loss": 1.3998, "step": 9600 }, { "epoch": 0.13139993573572342, "grad_norm": 11.401869773864746, "learning_rate": 0.00032850208518493196, "loss": 1.3026, "step": 9610 }, { "epoch": 0.13153666823909047, "grad_norm": 13.582301139831543, "learning_rate": 0.0003288439187803378, "loss": 1.4411, "step": 9620 }, { "epoch": 0.13167340074245748, "grad_norm": 12.150846481323242, "learning_rate": 0.0003291857523757435, "loss": 1.2089, "step": 9630 }, { "epoch": 0.13181013324582452, "grad_norm": 9.729326248168945, "learning_rate": 0.00032952758597114924, "loss": 1.6651, "step": 9640 }, { "epoch": 0.13194686574919157, "grad_norm": 12.239168167114258, "learning_rate": 0.00032986941956655505, "loss": 1.4865, "step": 9650 }, { "epoch": 0.1320835982525586, "grad_norm": 15.135849952697754, "learning_rate": 0.00033021125316196076, "loss": 1.7742, "step": 9660 }, { "epoch": 0.13222033075592565, "grad_norm": 7.150547504425049, "learning_rate": 0.0003305530867573665, "loss": 1.3652, "step": 9670 }, { "epoch": 0.1323570632592927, "grad_norm": 10.305261611938477, "learning_rate": 0.0003308949203527723, "loss": 1.609, "step": 9680 }, { "epoch": 0.1324937957626597, "grad_norm": 15.962980270385742, "learning_rate": 0.00033123675394817803, "loss": 1.5368, "step": 9690 }, { "epoch": 0.13263052826602675, "grad_norm": 21.072053909301758, "learning_rate": 0.0003315785875435838, "loss": 1.385, "step": 9700 }, { "epoch": 0.1327672607693938, "grad_norm": 12.363661766052246, "learning_rate": 0.00033192042113898955, "loss": 1.6193, "step": 9710 }, { "epoch": 0.13290399327276084, "grad_norm": 41.560089111328125, "learning_rate": 0.0003322622547343953, "loss": 1.5392, "step": 9720 }, { "epoch": 0.13304072577612788, "grad_norm": 21.057579040527344, "learning_rate": 0.00033260408832980106, "loss": 1.6205, "step": 9730 }, { "epoch": 0.1331774582794949, "grad_norm": 17.372833251953125, "learning_rate": 0.0003329459219252068, "loss": 1.459, "step": 9740 }, { "epoch": 0.13331419078286194, "grad_norm": 11.51163101196289, "learning_rate": 0.0003332877555206126, "loss": 1.4668, "step": 9750 }, { "epoch": 0.13345092328622898, "grad_norm": 10.902771949768066, "learning_rate": 0.00033362958911601834, "loss": 1.3311, "step": 9760 }, { "epoch": 0.13358765578959603, "grad_norm": 12.908489227294922, "learning_rate": 0.0003339714227114241, "loss": 1.3246, "step": 9770 }, { "epoch": 0.13372438829296307, "grad_norm": 9.400343894958496, "learning_rate": 0.00033431325630682985, "loss": 1.4628, "step": 9780 }, { "epoch": 0.1338611207963301, "grad_norm": 11.747254371643066, "learning_rate": 0.0003346550899022356, "loss": 1.6393, "step": 9790 }, { "epoch": 0.13399785329969713, "grad_norm": 11.114270210266113, "learning_rate": 0.00033499692349764137, "loss": 1.4782, "step": 9800 }, { "epoch": 0.13413458580306417, "grad_norm": 8.186043739318848, "learning_rate": 0.00033533875709304713, "loss": 1.6672, "step": 9810 }, { "epoch": 0.13427131830643121, "grad_norm": 21.253807067871094, "learning_rate": 0.00033568059068845283, "loss": 1.6568, "step": 9820 }, { "epoch": 0.13440805080979826, "grad_norm": 14.472782135009766, "learning_rate": 0.00033602242428385865, "loss": 1.5068, "step": 9830 }, { "epoch": 0.1345447833131653, "grad_norm": 8.103318214416504, "learning_rate": 0.0003363642578792644, "loss": 1.5382, "step": 9840 }, { "epoch": 0.13468151581653232, "grad_norm": 11.945072174072266, "learning_rate": 0.0003367060914746701, "loss": 1.6475, "step": 9850 }, { "epoch": 0.13481824831989936, "grad_norm": 12.468182563781738, "learning_rate": 0.00033704792507007587, "loss": 1.575, "step": 9860 }, { "epoch": 0.1349549808232664, "grad_norm": 28.873916625976562, "learning_rate": 0.0003373897586654817, "loss": 1.3807, "step": 9870 }, { "epoch": 0.13509171332663344, "grad_norm": 12.157588958740234, "learning_rate": 0.0003377315922608874, "loss": 1.2197, "step": 9880 }, { "epoch": 0.1352284458300005, "grad_norm": 13.094972610473633, "learning_rate": 0.00033807342585629314, "loss": 1.4274, "step": 9890 }, { "epoch": 0.1353651783333675, "grad_norm": 9.150672912597656, "learning_rate": 0.00033841525945169895, "loss": 1.513, "step": 9900 }, { "epoch": 0.13550191083673455, "grad_norm": 13.772185325622559, "learning_rate": 0.00033875709304710466, "loss": 1.6979, "step": 9910 }, { "epoch": 0.1356386433401016, "grad_norm": 39.736083984375, "learning_rate": 0.0003390989266425104, "loss": 1.4333, "step": 9920 }, { "epoch": 0.13577537584346863, "grad_norm": 15.833773612976074, "learning_rate": 0.00033944076023791623, "loss": 1.4786, "step": 9930 }, { "epoch": 0.13591210834683567, "grad_norm": 25.77311897277832, "learning_rate": 0.00033978259383332193, "loss": 1.4945, "step": 9940 }, { "epoch": 0.13604884085020272, "grad_norm": 13.642654418945312, "learning_rate": 0.0003401244274287277, "loss": 1.6918, "step": 9950 }, { "epoch": 0.13618557335356973, "grad_norm": 11.80904483795166, "learning_rate": 0.00034046626102413345, "loss": 1.3865, "step": 9960 }, { "epoch": 0.13632230585693678, "grad_norm": 9.331991195678711, "learning_rate": 0.0003408080946195392, "loss": 1.3116, "step": 9970 }, { "epoch": 0.13645903836030382, "grad_norm": 13.54224681854248, "learning_rate": 0.00034114992821494497, "loss": 1.6351, "step": 9980 }, { "epoch": 0.13659577086367086, "grad_norm": 9.857804298400879, "learning_rate": 0.0003414917618103507, "loss": 1.4654, "step": 9990 }, { "epoch": 0.1367325033670379, "grad_norm": 12.436583518981934, "learning_rate": 0.0003418335954057565, "loss": 1.4006, "step": 10000 }, { "epoch": 0.13686923587040492, "grad_norm": 10.11821174621582, "learning_rate": 0.00034217542900116224, "loss": 1.4266, "step": 10010 }, { "epoch": 0.13700596837377196, "grad_norm": 12.173975944519043, "learning_rate": 0.000342517262596568, "loss": 1.4621, "step": 10020 }, { "epoch": 0.137142700877139, "grad_norm": 17.135644912719727, "learning_rate": 0.00034285909619197376, "loss": 1.6224, "step": 10030 }, { "epoch": 0.13727943338050605, "grad_norm": 17.63218879699707, "learning_rate": 0.0003432009297873795, "loss": 1.6282, "step": 10040 }, { "epoch": 0.1374161658838731, "grad_norm": 10.5219144821167, "learning_rate": 0.0003435427633827853, "loss": 1.7941, "step": 10050 }, { "epoch": 0.13755289838724014, "grad_norm": 11.811152458190918, "learning_rate": 0.00034388459697819103, "loss": 1.5252, "step": 10060 }, { "epoch": 0.13768963089060715, "grad_norm": 11.402345657348633, "learning_rate": 0.00034422643057359674, "loss": 1.8, "step": 10070 }, { "epoch": 0.1378263633939742, "grad_norm": 18.5314884185791, "learning_rate": 0.00034456826416900255, "loss": 1.5262, "step": 10080 }, { "epoch": 0.13796309589734124, "grad_norm": 11.139239311218262, "learning_rate": 0.0003449100977644083, "loss": 1.4091, "step": 10090 }, { "epoch": 0.13809982840070828, "grad_norm": 10.249932289123535, "learning_rate": 0.000345251931359814, "loss": 1.6322, "step": 10100 }, { "epoch": 0.13823656090407532, "grad_norm": 20.57862091064453, "learning_rate": 0.0003455937649552198, "loss": 1.5702, "step": 10110 }, { "epoch": 0.13837329340744234, "grad_norm": 10.986406326293945, "learning_rate": 0.0003459355985506256, "loss": 1.7418, "step": 10120 }, { "epoch": 0.13851002591080938, "grad_norm": 14.881211280822754, "learning_rate": 0.0003462774321460313, "loss": 1.6534, "step": 10130 }, { "epoch": 0.13864675841417642, "grad_norm": 15.710637092590332, "learning_rate": 0.0003466192657414371, "loss": 1.8145, "step": 10140 }, { "epoch": 0.13878349091754347, "grad_norm": 14.78042221069336, "learning_rate": 0.00034696109933684286, "loss": 1.2265, "step": 10150 }, { "epoch": 0.1389202234209105, "grad_norm": 9.199790954589844, "learning_rate": 0.00034730293293224856, "loss": 1.4805, "step": 10160 }, { "epoch": 0.13905695592427755, "grad_norm": 14.059853553771973, "learning_rate": 0.0003476447665276544, "loss": 1.485, "step": 10170 }, { "epoch": 0.13919368842764457, "grad_norm": 10.544495582580566, "learning_rate": 0.00034798660012306013, "loss": 1.4486, "step": 10180 }, { "epoch": 0.1393304209310116, "grad_norm": 10.34166431427002, "learning_rate": 0.00034832843371846584, "loss": 1.9195, "step": 10190 }, { "epoch": 0.13946715343437865, "grad_norm": 12.046464920043945, "learning_rate": 0.00034867026731387165, "loss": 1.5985, "step": 10200 }, { "epoch": 0.1396038859377457, "grad_norm": 13.741666793823242, "learning_rate": 0.00034901210090927735, "loss": 1.3294, "step": 10210 }, { "epoch": 0.13974061844111274, "grad_norm": 10.51731014251709, "learning_rate": 0.0003493539345046831, "loss": 1.426, "step": 10220 }, { "epoch": 0.13987735094447976, "grad_norm": 17.641752243041992, "learning_rate": 0.0003496957681000889, "loss": 1.6014, "step": 10230 }, { "epoch": 0.1400140834478468, "grad_norm": 19.61350440979004, "learning_rate": 0.0003500376016954946, "loss": 1.4051, "step": 10240 }, { "epoch": 0.14015081595121384, "grad_norm": 13.030654907226562, "learning_rate": 0.0003503794352909004, "loss": 1.7081, "step": 10250 }, { "epoch": 0.14028754845458088, "grad_norm": 12.085139274597168, "learning_rate": 0.0003507212688863062, "loss": 1.4308, "step": 10260 }, { "epoch": 0.14042428095794793, "grad_norm": 14.981611251831055, "learning_rate": 0.0003510631024817119, "loss": 1.45, "step": 10270 }, { "epoch": 0.14056101346131494, "grad_norm": 9.85055923461914, "learning_rate": 0.00035140493607711766, "loss": 1.6211, "step": 10280 }, { "epoch": 0.140697745964682, "grad_norm": 10.296182632446289, "learning_rate": 0.00035174676967252347, "loss": 1.5564, "step": 10290 }, { "epoch": 0.14083447846804903, "grad_norm": 10.919535636901855, "learning_rate": 0.0003520886032679292, "loss": 1.6729, "step": 10300 }, { "epoch": 0.14097121097141607, "grad_norm": 11.721278190612793, "learning_rate": 0.00035243043686333493, "loss": 1.7055, "step": 10310 }, { "epoch": 0.14110794347478312, "grad_norm": 23.15643310546875, "learning_rate": 0.0003527722704587407, "loss": 1.5536, "step": 10320 }, { "epoch": 0.14124467597815016, "grad_norm": 23.041337966918945, "learning_rate": 0.00035311410405414645, "loss": 1.313, "step": 10330 }, { "epoch": 0.14138140848151717, "grad_norm": 12.53116226196289, "learning_rate": 0.0003534559376495522, "loss": 1.4985, "step": 10340 }, { "epoch": 0.14151814098488422, "grad_norm": 9.334783554077148, "learning_rate": 0.00035379777124495797, "loss": 1.6519, "step": 10350 }, { "epoch": 0.14165487348825126, "grad_norm": 15.627293586730957, "learning_rate": 0.0003541396048403637, "loss": 1.2699, "step": 10360 }, { "epoch": 0.1417916059916183, "grad_norm": 20.586275100708008, "learning_rate": 0.0003544814384357695, "loss": 1.4654, "step": 10370 }, { "epoch": 0.14192833849498535, "grad_norm": 11.630815505981445, "learning_rate": 0.0003548232720311752, "loss": 1.7888, "step": 10380 }, { "epoch": 0.14206507099835236, "grad_norm": 21.758817672729492, "learning_rate": 0.000355165105626581, "loss": 1.6638, "step": 10390 }, { "epoch": 0.1422018035017194, "grad_norm": 14.096400260925293, "learning_rate": 0.00035550693922198676, "loss": 1.5548, "step": 10400 }, { "epoch": 0.14233853600508645, "grad_norm": 41.569393157958984, "learning_rate": 0.00035584877281739246, "loss": 1.4593, "step": 10410 }, { "epoch": 0.1424752685084535, "grad_norm": 14.316863059997559, "learning_rate": 0.0003561906064127983, "loss": 1.6478, "step": 10420 }, { "epoch": 0.14261200101182053, "grad_norm": 28.017221450805664, "learning_rate": 0.000356532440008204, "loss": 1.611, "step": 10430 }, { "epoch": 0.14274873351518758, "grad_norm": 19.83359146118164, "learning_rate": 0.00035687427360360974, "loss": 1.7225, "step": 10440 }, { "epoch": 0.1428854660185546, "grad_norm": 21.702362060546875, "learning_rate": 0.00035721610719901555, "loss": 1.69, "step": 10450 }, { "epoch": 0.14302219852192163, "grad_norm": 16.43385887145996, "learning_rate": 0.00035755794079442125, "loss": 1.9011, "step": 10460 }, { "epoch": 0.14315893102528868, "grad_norm": 16.581647872924805, "learning_rate": 0.000357899774389827, "loss": 1.5695, "step": 10470 }, { "epoch": 0.14329566352865572, "grad_norm": 16.409391403198242, "learning_rate": 0.0003582416079852328, "loss": 1.7906, "step": 10480 }, { "epoch": 0.14343239603202276, "grad_norm": 10.756877899169922, "learning_rate": 0.00035858344158063853, "loss": 1.7449, "step": 10490 }, { "epoch": 0.14356912853538978, "grad_norm": 10.972028732299805, "learning_rate": 0.0003589252751760443, "loss": 1.6717, "step": 10500 }, { "epoch": 0.14370586103875682, "grad_norm": 10.86771297454834, "learning_rate": 0.0003592671087714501, "loss": 1.627, "step": 10510 }, { "epoch": 0.14384259354212386, "grad_norm": 10.110967636108398, "learning_rate": 0.0003596089423668558, "loss": 1.473, "step": 10520 }, { "epoch": 0.1439793260454909, "grad_norm": 12.758459091186523, "learning_rate": 0.00035995077596226156, "loss": 1.4518, "step": 10530 }, { "epoch": 0.14411605854885795, "grad_norm": 10.68895149230957, "learning_rate": 0.0003602926095576674, "loss": 1.4862, "step": 10540 }, { "epoch": 0.144252791052225, "grad_norm": 12.149828910827637, "learning_rate": 0.0003606344431530731, "loss": 1.446, "step": 10550 }, { "epoch": 0.144389523555592, "grad_norm": 10.7412109375, "learning_rate": 0.00036097627674847884, "loss": 1.6953, "step": 10560 }, { "epoch": 0.14452625605895905, "grad_norm": 19.276611328125, "learning_rate": 0.0003613181103438846, "loss": 1.4364, "step": 10570 }, { "epoch": 0.1446629885623261, "grad_norm": 27.313798904418945, "learning_rate": 0.00036165994393929035, "loss": 1.6308, "step": 10580 }, { "epoch": 0.14479972106569314, "grad_norm": 9.714661598205566, "learning_rate": 0.0003620017775346961, "loss": 1.6524, "step": 10590 }, { "epoch": 0.14493645356906018, "grad_norm": 19.308204650878906, "learning_rate": 0.00036234361113010187, "loss": 1.8075, "step": 10600 }, { "epoch": 0.1450731860724272, "grad_norm": 16.1231689453125, "learning_rate": 0.00036268544472550763, "loss": 1.7264, "step": 10610 }, { "epoch": 0.14520991857579424, "grad_norm": 13.409762382507324, "learning_rate": 0.0003630272783209134, "loss": 1.6697, "step": 10620 }, { "epoch": 0.14534665107916128, "grad_norm": 9.978682518005371, "learning_rate": 0.00036336911191631914, "loss": 1.6123, "step": 10630 }, { "epoch": 0.14548338358252833, "grad_norm": 15.4763822555542, "learning_rate": 0.0003637109455117249, "loss": 1.4979, "step": 10640 }, { "epoch": 0.14562011608589537, "grad_norm": 10.482678413391113, "learning_rate": 0.00036405277910713066, "loss": 1.1983, "step": 10650 }, { "epoch": 0.14575684858926238, "grad_norm": 17.152116775512695, "learning_rate": 0.0003643946127025364, "loss": 1.4977, "step": 10660 }, { "epoch": 0.14589358109262943, "grad_norm": 15.7410249710083, "learning_rate": 0.0003647364462979422, "loss": 1.5273, "step": 10670 }, { "epoch": 0.14603031359599647, "grad_norm": 12.538641929626465, "learning_rate": 0.0003650782798933479, "loss": 1.5142, "step": 10680 }, { "epoch": 0.1461670460993635, "grad_norm": 26.14236068725586, "learning_rate": 0.0003654201134887537, "loss": 1.6507, "step": 10690 }, { "epoch": 0.14630377860273056, "grad_norm": 12.7083740234375, "learning_rate": 0.00036576194708415945, "loss": 1.6017, "step": 10700 }, { "epoch": 0.1464405111060976, "grad_norm": 27.244184494018555, "learning_rate": 0.00036610378067956516, "loss": 1.3589, "step": 10710 }, { "epoch": 0.1465772436094646, "grad_norm": 22.457351684570312, "learning_rate": 0.00036644561427497097, "loss": 1.6312, "step": 10720 }, { "epoch": 0.14671397611283166, "grad_norm": 11.927680969238281, "learning_rate": 0.00036678744787037673, "loss": 1.6611, "step": 10730 }, { "epoch": 0.1468507086161987, "grad_norm": 10.86500072479248, "learning_rate": 0.00036712928146578243, "loss": 1.6338, "step": 10740 }, { "epoch": 0.14698744111956574, "grad_norm": 8.512675285339355, "learning_rate": 0.00036747111506118824, "loss": 1.6799, "step": 10750 }, { "epoch": 0.14712417362293279, "grad_norm": 17.79874610900879, "learning_rate": 0.000367812948656594, "loss": 1.6618, "step": 10760 }, { "epoch": 0.1472609061262998, "grad_norm": 16.687694549560547, "learning_rate": 0.0003681547822519997, "loss": 1.7381, "step": 10770 }, { "epoch": 0.14739763862966684, "grad_norm": 390.3592224121094, "learning_rate": 0.0003684966158474055, "loss": 9.0358, "step": 10780 }, { "epoch": 0.1475343711330339, "grad_norm": 110.64979553222656, "learning_rate": 0.0003688384494428113, "loss": 14.8996, "step": 10790 }, { "epoch": 0.14767110363640093, "grad_norm": 21.44157600402832, "learning_rate": 0.000369180283038217, "loss": 14.1008, "step": 10800 }, { "epoch": 0.14780783613976797, "grad_norm": 25.773683547973633, "learning_rate": 0.0003695221166336228, "loss": 10.0738, "step": 10810 }, { "epoch": 0.14794456864313502, "grad_norm": 6.984274387359619, "learning_rate": 0.0003698639502290285, "loss": 9.2329, "step": 10820 }, { "epoch": 0.14808130114650203, "grad_norm": 6.734428882598877, "learning_rate": 0.00037020578382443426, "loss": 8.7633, "step": 10830 }, { "epoch": 0.14821803364986907, "grad_norm": 4.084683895111084, "learning_rate": 0.00037054761741984007, "loss": 8.7667, "step": 10840 }, { "epoch": 0.14835476615323612, "grad_norm": 4.2202677726745605, "learning_rate": 0.00037088945101524577, "loss": 8.6185, "step": 10850 }, { "epoch": 0.14849149865660316, "grad_norm": 4.026717662811279, "learning_rate": 0.00037123128461065153, "loss": 8.1368, "step": 10860 }, { "epoch": 0.1486282311599702, "grad_norm": 4.466038227081299, "learning_rate": 0.00037157311820605734, "loss": 8.3393, "step": 10870 }, { "epoch": 0.14876496366333722, "grad_norm": 2.929975748062134, "learning_rate": 0.00037191495180146305, "loss": 7.8984, "step": 10880 }, { "epoch": 0.14890169616670426, "grad_norm": 4.583565711975098, "learning_rate": 0.0003722567853968688, "loss": 8.021, "step": 10890 }, { "epoch": 0.1490384286700713, "grad_norm": 2.0006179809570312, "learning_rate": 0.0003725986189922746, "loss": 8.3599, "step": 10900 }, { "epoch": 0.14917516117343835, "grad_norm": 3.9401774406433105, "learning_rate": 0.0003729404525876803, "loss": 8.0166, "step": 10910 }, { "epoch": 0.1493118936768054, "grad_norm": 4.938408374786377, "learning_rate": 0.0003732822861830861, "loss": 7.7693, "step": 10920 }, { "epoch": 0.1494486261801724, "grad_norm": 2.6067349910736084, "learning_rate": 0.0003736241197784918, "loss": 8.3282, "step": 10930 }, { "epoch": 0.14958535868353945, "grad_norm": 2.5286943912506104, "learning_rate": 0.0003739659533738976, "loss": 7.9637, "step": 10940 }, { "epoch": 0.1497220911869065, "grad_norm": 2.4737932682037354, "learning_rate": 0.00037430778696930336, "loss": 8.2074, "step": 10950 }, { "epoch": 0.14985882369027353, "grad_norm": 3.2946319580078125, "learning_rate": 0.00037464962056470906, "loss": 7.8377, "step": 10960 }, { "epoch": 0.14999555619364058, "grad_norm": 3.346872568130493, "learning_rate": 0.00037499145416011487, "loss": 8.168, "step": 10970 }, { "epoch": 0.15013228869700762, "grad_norm": 1.7866533994674683, "learning_rate": 0.00037533328775552063, "loss": 7.8867, "step": 10980 }, { "epoch": 0.15026902120037464, "grad_norm": 3.8745367527008057, "learning_rate": 0.00037567512135092633, "loss": 7.8939, "step": 10990 }, { "epoch": 0.15040575370374168, "grad_norm": 5.064061641693115, "learning_rate": 0.00037601695494633215, "loss": 7.7408, "step": 11000 }, { "epoch": 0.15054248620710872, "grad_norm": 1.936150074005127, "learning_rate": 0.0003763587885417379, "loss": 7.7122, "step": 11010 }, { "epoch": 0.15067921871047577, "grad_norm": 2.0518929958343506, "learning_rate": 0.0003767006221371436, "loss": 7.8564, "step": 11020 }, { "epoch": 0.1508159512138428, "grad_norm": 3.8527748584747314, "learning_rate": 0.0003770424557325494, "loss": 7.8765, "step": 11030 }, { "epoch": 0.15095268371720982, "grad_norm": 6.753509521484375, "learning_rate": 0.0003773842893279552, "loss": 7.8598, "step": 11040 }, { "epoch": 0.15108941622057687, "grad_norm": 2.7371068000793457, "learning_rate": 0.0003777261229233609, "loss": 7.902, "step": 11050 }, { "epoch": 0.1512261487239439, "grad_norm": 4.338279724121094, "learning_rate": 0.0003780679565187667, "loss": 7.7241, "step": 11060 }, { "epoch": 0.15136288122731095, "grad_norm": 1.7106044292449951, "learning_rate": 0.0003784097901141724, "loss": 7.6641, "step": 11070 }, { "epoch": 0.151499613730678, "grad_norm": 3.612610101699829, "learning_rate": 0.00037875162370957816, "loss": 7.8457, "step": 11080 }, { "epoch": 0.15163634623404504, "grad_norm": 2.1689348220825195, "learning_rate": 0.00037909345730498397, "loss": 7.7566, "step": 11090 }, { "epoch": 0.15177307873741205, "grad_norm": 2.4521687030792236, "learning_rate": 0.0003794352909003897, "loss": 7.9529, "step": 11100 }, { "epoch": 0.1519098112407791, "grad_norm": 4.5285563468933105, "learning_rate": 0.00037977712449579543, "loss": 7.7577, "step": 11110 }, { "epoch": 0.15204654374414614, "grad_norm": 2.7771008014678955, "learning_rate": 0.00038011895809120125, "loss": 8.0056, "step": 11120 }, { "epoch": 0.15218327624751318, "grad_norm": 2.882286787033081, "learning_rate": 0.00038046079168660695, "loss": 7.5082, "step": 11130 }, { "epoch": 0.15232000875088023, "grad_norm": 1.4280109405517578, "learning_rate": 0.0003808026252820127, "loss": 7.8069, "step": 11140 }, { "epoch": 0.15245674125424724, "grad_norm": 2.8432204723358154, "learning_rate": 0.0003811444588774185, "loss": 7.6256, "step": 11150 }, { "epoch": 0.15259347375761428, "grad_norm": 2.938655138015747, "learning_rate": 0.0003814862924728242, "loss": 7.8434, "step": 11160 }, { "epoch": 0.15273020626098133, "grad_norm": 6.0710625648498535, "learning_rate": 0.00038182812606823, "loss": 8.2847, "step": 11170 }, { "epoch": 0.15286693876434837, "grad_norm": 3.164820671081543, "learning_rate": 0.0003821699596636358, "loss": 7.6548, "step": 11180 }, { "epoch": 0.1530036712677154, "grad_norm": 2.1235365867614746, "learning_rate": 0.0003825117932590415, "loss": 7.6973, "step": 11190 }, { "epoch": 0.15314040377108246, "grad_norm": 1.6014801263809204, "learning_rate": 0.00038285362685444726, "loss": 7.9543, "step": 11200 }, { "epoch": 0.15327713627444947, "grad_norm": 1.3251384496688843, "learning_rate": 0.000383195460449853, "loss": 7.5198, "step": 11210 }, { "epoch": 0.15341386877781651, "grad_norm": 1.8818718194961548, "learning_rate": 0.0003835372940452588, "loss": 7.5394, "step": 11220 }, { "epoch": 0.15355060128118356, "grad_norm": 2.1569952964782715, "learning_rate": 0.00038387912764066453, "loss": 7.5018, "step": 11230 }, { "epoch": 0.1536873337845506, "grad_norm": 2.5803446769714355, "learning_rate": 0.0003842209612360703, "loss": 8.0354, "step": 11240 }, { "epoch": 0.15382406628791764, "grad_norm": 2.8624868392944336, "learning_rate": 0.00038456279483147605, "loss": 7.6042, "step": 11250 }, { "epoch": 0.15396079879128466, "grad_norm": 2.314150333404541, "learning_rate": 0.0003849046284268818, "loss": 7.8723, "step": 11260 }, { "epoch": 0.1540975312946517, "grad_norm": 3.4257774353027344, "learning_rate": 0.00038524646202228757, "loss": 7.7681, "step": 11270 }, { "epoch": 0.15423426379801874, "grad_norm": 1.8302890062332153, "learning_rate": 0.0003855882956176933, "loss": 7.9158, "step": 11280 }, { "epoch": 0.1543709963013858, "grad_norm": 1.7653090953826904, "learning_rate": 0.0003859301292130991, "loss": 7.5848, "step": 11290 }, { "epoch": 0.15450772880475283, "grad_norm": 1.512311339378357, "learning_rate": 0.00038627196280850484, "loss": 7.9128, "step": 11300 }, { "epoch": 0.15464446130811985, "grad_norm": 2.581136703491211, "learning_rate": 0.0003866137964039106, "loss": 7.8683, "step": 11310 }, { "epoch": 0.1547811938114869, "grad_norm": 2.2552151679992676, "learning_rate": 0.0003869556299993163, "loss": 7.7511, "step": 11320 }, { "epoch": 0.15491792631485393, "grad_norm": 3.005530595779419, "learning_rate": 0.0003872974635947221, "loss": 7.9259, "step": 11330 }, { "epoch": 0.15505465881822098, "grad_norm": 2.665281057357788, "learning_rate": 0.0003876392971901279, "loss": 7.7316, "step": 11340 }, { "epoch": 0.15519139132158802, "grad_norm": 1.990005373954773, "learning_rate": 0.0003879811307855336, "loss": 7.7776, "step": 11350 }, { "epoch": 0.15532812382495506, "grad_norm": 1.6811819076538086, "learning_rate": 0.0003883229643809394, "loss": 7.8804, "step": 11360 }, { "epoch": 0.15546485632832208, "grad_norm": 1.9353073835372925, "learning_rate": 0.00038866479797634515, "loss": 7.4407, "step": 11370 }, { "epoch": 0.15560158883168912, "grad_norm": 2.474177598953247, "learning_rate": 0.00038900663157175085, "loss": 7.9088, "step": 11380 }, { "epoch": 0.15573832133505616, "grad_norm": 2.037338972091675, "learning_rate": 0.00038934846516715667, "loss": 7.5927, "step": 11390 }, { "epoch": 0.1558750538384232, "grad_norm": 3.367844581604004, "learning_rate": 0.0003896902987625624, "loss": 7.6658, "step": 11400 }, { "epoch": 0.15601178634179025, "grad_norm": 2.6596648693084717, "learning_rate": 0.00039003213235796813, "loss": 7.7247, "step": 11410 }, { "epoch": 0.15614851884515726, "grad_norm": 1.6661304235458374, "learning_rate": 0.00039037396595337394, "loss": 7.7731, "step": 11420 }, { "epoch": 0.1562852513485243, "grad_norm": 1.7030770778656006, "learning_rate": 0.0003907157995487797, "loss": 7.7598, "step": 11430 }, { "epoch": 0.15642198385189135, "grad_norm": 1.6239924430847168, "learning_rate": 0.0003910576331441854, "loss": 8.0107, "step": 11440 }, { "epoch": 0.1565587163552584, "grad_norm": 3.8397562503814697, "learning_rate": 0.00039139946673959116, "loss": 7.7312, "step": 11450 }, { "epoch": 0.15669544885862544, "grad_norm": 1.9741352796554565, "learning_rate": 0.0003917413003349969, "loss": 7.6421, "step": 11460 }, { "epoch": 0.15683218136199248, "grad_norm": 2.505009412765503, "learning_rate": 0.0003920831339304027, "loss": 7.7406, "step": 11470 }, { "epoch": 0.1569689138653595, "grad_norm": 1.3213168382644653, "learning_rate": 0.00039242496752580844, "loss": 7.8219, "step": 11480 }, { "epoch": 0.15710564636872654, "grad_norm": 1.6959559917449951, "learning_rate": 0.0003927668011212142, "loss": 7.8201, "step": 11490 }, { "epoch": 0.15724237887209358, "grad_norm": 1.5378286838531494, "learning_rate": 0.00039310863471661995, "loss": 7.7386, "step": 11500 }, { "epoch": 0.15737911137546062, "grad_norm": 1.9488970041275024, "learning_rate": 0.0003934504683120257, "loss": 7.8122, "step": 11510 }, { "epoch": 0.15751584387882767, "grad_norm": 2.122734308242798, "learning_rate": 0.00039379230190743147, "loss": 7.9776, "step": 11520 }, { "epoch": 0.15765257638219468, "grad_norm": 1.3230074644088745, "learning_rate": 0.0003941341355028372, "loss": 7.7009, "step": 11530 }, { "epoch": 0.15778930888556172, "grad_norm": 1.3394955396652222, "learning_rate": 0.000394475969098243, "loss": 7.5296, "step": 11540 }, { "epoch": 0.15792604138892877, "grad_norm": 2.3061344623565674, "learning_rate": 0.00039481780269364874, "loss": 7.8279, "step": 11550 }, { "epoch": 0.1580627738922958, "grad_norm": 1.5783488750457764, "learning_rate": 0.0003951596362890545, "loss": 7.686, "step": 11560 }, { "epoch": 0.15819950639566285, "grad_norm": 1.347015380859375, "learning_rate": 0.0003955014698844602, "loss": 7.7927, "step": 11570 }, { "epoch": 0.1583362388990299, "grad_norm": 1.3041143417358398, "learning_rate": 0.000395843303479866, "loss": 7.4962, "step": 11580 }, { "epoch": 0.1584729714023969, "grad_norm": 1.8868650197982788, "learning_rate": 0.0003961851370752718, "loss": 7.532, "step": 11590 }, { "epoch": 0.15860970390576395, "grad_norm": 1.5766180753707886, "learning_rate": 0.0003965269706706775, "loss": 7.6391, "step": 11600 }, { "epoch": 0.158746436409131, "grad_norm": 2.8826234340667725, "learning_rate": 0.0003968688042660833, "loss": 7.6861, "step": 11610 }, { "epoch": 0.15888316891249804, "grad_norm": 2.099113702774048, "learning_rate": 0.00039721063786148905, "loss": 7.3422, "step": 11620 }, { "epoch": 0.15901990141586508, "grad_norm": 1.919895887374878, "learning_rate": 0.00039755247145689476, "loss": 8.0319, "step": 11630 }, { "epoch": 0.1591566339192321, "grad_norm": 1.1872228384017944, "learning_rate": 0.00039789430505230057, "loss": 7.9437, "step": 11640 }, { "epoch": 0.15929336642259914, "grad_norm": 2.294769525527954, "learning_rate": 0.0003982361386477063, "loss": 7.7188, "step": 11650 }, { "epoch": 0.15943009892596618, "grad_norm": 1.9499560594558716, "learning_rate": 0.00039857797224311203, "loss": 7.7547, "step": 11660 }, { "epoch": 0.15956683142933323, "grad_norm": 3.0767321586608887, "learning_rate": 0.00039891980583851784, "loss": 7.6889, "step": 11670 }, { "epoch": 0.15970356393270027, "grad_norm": 1.379686951637268, "learning_rate": 0.00039926163943392355, "loss": 7.9531, "step": 11680 }, { "epoch": 0.1598402964360673, "grad_norm": 1.727065920829773, "learning_rate": 0.0003996034730293293, "loss": 7.8563, "step": 11690 }, { "epoch": 0.15997702893943433, "grad_norm": 1.817326545715332, "learning_rate": 0.0003999453066247351, "loss": 7.6619, "step": 11700 }, { "epoch": 0.16011376144280137, "grad_norm": 2.5574791431427, "learning_rate": 0.0004002871402201408, "loss": 7.6312, "step": 11710 }, { "epoch": 0.16025049394616842, "grad_norm": 3.1072237491607666, "learning_rate": 0.0004006289738155466, "loss": 7.7047, "step": 11720 }, { "epoch": 0.16038722644953546, "grad_norm": 2.3145663738250732, "learning_rate": 0.0004009708074109524, "loss": 7.9023, "step": 11730 }, { "epoch": 0.1605239589529025, "grad_norm": 2.0867362022399902, "learning_rate": 0.0004013126410063581, "loss": 7.9008, "step": 11740 }, { "epoch": 0.16066069145626952, "grad_norm": 2.887556314468384, "learning_rate": 0.00040165447460176385, "loss": 7.7221, "step": 11750 }, { "epoch": 0.16079742395963656, "grad_norm": 1.50668466091156, "learning_rate": 0.00040199630819716967, "loss": 7.6206, "step": 11760 }, { "epoch": 0.1609341564630036, "grad_norm": 1.4724215269088745, "learning_rate": 0.00040233814179257537, "loss": 8.1493, "step": 11770 }, { "epoch": 0.16107088896637065, "grad_norm": 1.7996517419815063, "learning_rate": 0.00040267997538798113, "loss": 7.8211, "step": 11780 }, { "epoch": 0.1612076214697377, "grad_norm": 2.0416316986083984, "learning_rate": 0.00040302180898338694, "loss": 7.8097, "step": 11790 }, { "epoch": 0.1613443539731047, "grad_norm": 1.4473448991775513, "learning_rate": 0.00040336364257879265, "loss": 7.4054, "step": 11800 }, { "epoch": 0.16148108647647175, "grad_norm": 1.7653356790542603, "learning_rate": 0.0004037054761741984, "loss": 7.492, "step": 11810 }, { "epoch": 0.1616178189798388, "grad_norm": 3.7103025913238525, "learning_rate": 0.00040404730976960416, "loss": 7.7726, "step": 11820 }, { "epoch": 0.16175455148320583, "grad_norm": 1.9469963312149048, "learning_rate": 0.0004043891433650099, "loss": 7.4125, "step": 11830 }, { "epoch": 0.16189128398657288, "grad_norm": 2.8872463703155518, "learning_rate": 0.0004047309769604157, "loss": 7.7199, "step": 11840 }, { "epoch": 0.16202801648993992, "grad_norm": 1.4419482946395874, "learning_rate": 0.00040507281055582144, "loss": 8.0038, "step": 11850 }, { "epoch": 0.16216474899330693, "grad_norm": 1.573168396949768, "learning_rate": 0.0004054146441512272, "loss": 7.9198, "step": 11860 }, { "epoch": 0.16230148149667398, "grad_norm": 1.6147557497024536, "learning_rate": 0.00040575647774663295, "loss": 7.7047, "step": 11870 }, { "epoch": 0.16243821400004102, "grad_norm": 2.3930866718292236, "learning_rate": 0.0004060983113420387, "loss": 7.6302, "step": 11880 }, { "epoch": 0.16257494650340806, "grad_norm": 1.3758771419525146, "learning_rate": 0.00040644014493744447, "loss": 7.5201, "step": 11890 }, { "epoch": 0.1627116790067751, "grad_norm": 2.3961269855499268, "learning_rate": 0.00040678197853285023, "loss": 7.6513, "step": 11900 }, { "epoch": 0.16284841151014212, "grad_norm": 2.152463436126709, "learning_rate": 0.000407123812128256, "loss": 7.849, "step": 11910 }, { "epoch": 0.16298514401350916, "grad_norm": 2.4071731567382812, "learning_rate": 0.00040746564572366175, "loss": 7.4487, "step": 11920 }, { "epoch": 0.1631218765168762, "grad_norm": 3.722414493560791, "learning_rate": 0.00040780747931906745, "loss": 7.944, "step": 11930 }, { "epoch": 0.16325860902024325, "grad_norm": 1.5190678834915161, "learning_rate": 0.00040814931291447326, "loss": 7.6708, "step": 11940 }, { "epoch": 0.1633953415236103, "grad_norm": 1.6022846698760986, "learning_rate": 0.000408491146509879, "loss": 7.7188, "step": 11950 }, { "epoch": 0.16353207402697734, "grad_norm": 1.4508044719696045, "learning_rate": 0.0004088329801052847, "loss": 7.7536, "step": 11960 }, { "epoch": 0.16366880653034435, "grad_norm": 1.542344570159912, "learning_rate": 0.0004091748137006905, "loss": 7.8254, "step": 11970 }, { "epoch": 0.1638055390337114, "grad_norm": 6.108078479766846, "learning_rate": 0.0004095166472960963, "loss": 7.7241, "step": 11980 }, { "epoch": 0.16394227153707844, "grad_norm": 1.5855591297149658, "learning_rate": 0.000409858480891502, "loss": 7.7014, "step": 11990 }, { "epoch": 0.16407900404044548, "grad_norm": 1.0199832916259766, "learning_rate": 0.00041020031448690776, "loss": 7.6563, "step": 12000 }, { "epoch": 0.16421573654381252, "grad_norm": 1.6321617364883423, "learning_rate": 0.00041054214808231357, "loss": 7.7873, "step": 12010 }, { "epoch": 0.16435246904717954, "grad_norm": 0.9785923361778259, "learning_rate": 0.0004108839816777193, "loss": 7.9457, "step": 12020 }, { "epoch": 0.16448920155054658, "grad_norm": 2.048828363418579, "learning_rate": 0.00041122581527312503, "loss": 7.4875, "step": 12030 }, { "epoch": 0.16462593405391363, "grad_norm": 1.7650846242904663, "learning_rate": 0.00041156764886853084, "loss": 7.6148, "step": 12040 }, { "epoch": 0.16476266655728067, "grad_norm": 1.324089765548706, "learning_rate": 0.00041190948246393655, "loss": 7.5606, "step": 12050 }, { "epoch": 0.1648993990606477, "grad_norm": 1.576874017715454, "learning_rate": 0.0004122513160593423, "loss": 7.8759, "step": 12060 }, { "epoch": 0.16503613156401473, "grad_norm": 1.635180115699768, "learning_rate": 0.00041259314965474806, "loss": 8.0315, "step": 12070 }, { "epoch": 0.16517286406738177, "grad_norm": 1.5436793565750122, "learning_rate": 0.0004129349832501538, "loss": 7.5552, "step": 12080 }, { "epoch": 0.1653095965707488, "grad_norm": 0.9845343232154846, "learning_rate": 0.0004132768168455596, "loss": 7.3724, "step": 12090 }, { "epoch": 0.16544632907411586, "grad_norm": 3.17570424079895, "learning_rate": 0.00041361865044096534, "loss": 7.4192, "step": 12100 }, { "epoch": 0.1655830615774829, "grad_norm": 1.1613786220550537, "learning_rate": 0.0004139604840363711, "loss": 7.594, "step": 12110 }, { "epoch": 0.16571979408084994, "grad_norm": 1.464467167854309, "learning_rate": 0.00041430231763177686, "loss": 7.4339, "step": 12120 }, { "epoch": 0.16585652658421696, "grad_norm": 1.373623251914978, "learning_rate": 0.0004146441512271826, "loss": 7.6629, "step": 12130 }, { "epoch": 0.165993259087584, "grad_norm": 1.473044514656067, "learning_rate": 0.00041498598482258837, "loss": 7.7313, "step": 12140 }, { "epoch": 0.16612999159095104, "grad_norm": 1.6703749895095825, "learning_rate": 0.00041532781841799413, "loss": 7.9061, "step": 12150 }, { "epoch": 0.16626672409431809, "grad_norm": 1.193723440170288, "learning_rate": 0.0004156696520133999, "loss": 7.6023, "step": 12160 }, { "epoch": 0.16640345659768513, "grad_norm": 1.1301517486572266, "learning_rate": 0.00041601148560880565, "loss": 7.6985, "step": 12170 }, { "epoch": 0.16654018910105214, "grad_norm": 1.7178887128829956, "learning_rate": 0.00041635331920421135, "loss": 7.666, "step": 12180 }, { "epoch": 0.1666769216044192, "grad_norm": 1.2618255615234375, "learning_rate": 0.00041669515279961716, "loss": 7.5837, "step": 12190 }, { "epoch": 0.16681365410778623, "grad_norm": 3.088564157485962, "learning_rate": 0.0004170369863950229, "loss": 7.6761, "step": 12200 }, { "epoch": 0.16695038661115327, "grad_norm": 2.488185167312622, "learning_rate": 0.0004173788199904286, "loss": 7.9463, "step": 12210 }, { "epoch": 0.16708711911452032, "grad_norm": 1.359712839126587, "learning_rate": 0.00041772065358583444, "loss": 7.7303, "step": 12220 }, { "epoch": 0.16722385161788736, "grad_norm": 2.3020639419555664, "learning_rate": 0.0004180624871812402, "loss": 7.5956, "step": 12230 }, { "epoch": 0.16736058412125437, "grad_norm": 1.0977144241333008, "learning_rate": 0.0004184043207766459, "loss": 7.7157, "step": 12240 }, { "epoch": 0.16749731662462142, "grad_norm": 2.7470591068267822, "learning_rate": 0.0004187461543720517, "loss": 7.6919, "step": 12250 }, { "epoch": 0.16763404912798846, "grad_norm": 1.3803846836090088, "learning_rate": 0.00041908798796745747, "loss": 7.4292, "step": 12260 }, { "epoch": 0.1677707816313555, "grad_norm": 2.3173372745513916, "learning_rate": 0.0004194298215628632, "loss": 7.7151, "step": 12270 }, { "epoch": 0.16790751413472255, "grad_norm": 1.7350516319274902, "learning_rate": 0.000419771655158269, "loss": 7.4656, "step": 12280 }, { "epoch": 0.16804424663808956, "grad_norm": 2.580906629562378, "learning_rate": 0.00042011348875367475, "loss": 7.5639, "step": 12290 }, { "epoch": 0.1681809791414566, "grad_norm": 1.4564954042434692, "learning_rate": 0.00042045532234908045, "loss": 7.589, "step": 12300 }, { "epoch": 0.16831771164482365, "grad_norm": 2.6639881134033203, "learning_rate": 0.00042079715594448626, "loss": 7.4669, "step": 12310 }, { "epoch": 0.1684544441481907, "grad_norm": 2.0108389854431152, "learning_rate": 0.00042113898953989197, "loss": 7.4118, "step": 12320 }, { "epoch": 0.16859117665155773, "grad_norm": 1.0486029386520386, "learning_rate": 0.0004214808231352977, "loss": 7.795, "step": 12330 }, { "epoch": 0.16872790915492478, "grad_norm": 2.643404960632324, "learning_rate": 0.00042182265673070354, "loss": 7.8394, "step": 12340 }, { "epoch": 0.1688646416582918, "grad_norm": 1.3081042766571045, "learning_rate": 0.00042216449032610924, "loss": 7.5853, "step": 12350 }, { "epoch": 0.16900137416165883, "grad_norm": 1.7795536518096924, "learning_rate": 0.000422506323921515, "loss": 7.7411, "step": 12360 }, { "epoch": 0.16913810666502588, "grad_norm": 2.9749505519866943, "learning_rate": 0.0004228481575169208, "loss": 7.4973, "step": 12370 }, { "epoch": 0.16927483916839292, "grad_norm": 1.8179799318313599, "learning_rate": 0.0004231899911123265, "loss": 7.6855, "step": 12380 }, { "epoch": 0.16941157167175996, "grad_norm": 3.4227185249328613, "learning_rate": 0.0004235318247077323, "loss": 7.6754, "step": 12390 }, { "epoch": 0.16954830417512698, "grad_norm": 1.5018532276153564, "learning_rate": 0.0004238736583031381, "loss": 7.6211, "step": 12400 }, { "epoch": 0.16968503667849402, "grad_norm": 2.4264068603515625, "learning_rate": 0.0004242154918985438, "loss": 7.7405, "step": 12410 }, { "epoch": 0.16982176918186107, "grad_norm": 1.7346466779708862, "learning_rate": 0.00042455732549394955, "loss": 7.6173, "step": 12420 }, { "epoch": 0.1699585016852281, "grad_norm": 1.6127060651779175, "learning_rate": 0.00042489915908935536, "loss": 7.4843, "step": 12430 }, { "epoch": 0.17009523418859515, "grad_norm": 1.4761632680892944, "learning_rate": 0.00042524099268476107, "loss": 7.4025, "step": 12440 }, { "epoch": 0.17023196669196217, "grad_norm": 1.797277808189392, "learning_rate": 0.0004255828262801668, "loss": 7.8282, "step": 12450 }, { "epoch": 0.1703686991953292, "grad_norm": 1.1462438106536865, "learning_rate": 0.0004259246598755726, "loss": 7.6837, "step": 12460 }, { "epoch": 0.17050543169869625, "grad_norm": 1.422111988067627, "learning_rate": 0.00042626649347097834, "loss": 7.6702, "step": 12470 }, { "epoch": 0.1706421642020633, "grad_norm": 1.3015811443328857, "learning_rate": 0.0004266083270663841, "loss": 7.6965, "step": 12480 }, { "epoch": 0.17077889670543034, "grad_norm": 2.070350170135498, "learning_rate": 0.00042695016066178986, "loss": 7.7155, "step": 12490 }, { "epoch": 0.17091562920879738, "grad_norm": 1.2648630142211914, "learning_rate": 0.0004272919942571956, "loss": 7.9188, "step": 12500 }, { "epoch": 0.1710523617121644, "grad_norm": 1.6624401807785034, "learning_rate": 0.0004276338278526014, "loss": 7.6367, "step": 12510 }, { "epoch": 0.17118909421553144, "grad_norm": 1.357123613357544, "learning_rate": 0.0004279756614480071, "loss": 7.3786, "step": 12520 }, { "epoch": 0.17132582671889848, "grad_norm": 2.6532375812530518, "learning_rate": 0.0004283174950434129, "loss": 7.7172, "step": 12530 }, { "epoch": 0.17146255922226553, "grad_norm": 1.2117388248443604, "learning_rate": 0.00042865932863881865, "loss": 7.5722, "step": 12540 }, { "epoch": 0.17159929172563257, "grad_norm": 1.1246329545974731, "learning_rate": 0.00042900116223422435, "loss": 7.7077, "step": 12550 }, { "epoch": 0.17173602422899958, "grad_norm": 2.2603840827941895, "learning_rate": 0.00042934299582963017, "loss": 7.5641, "step": 12560 }, { "epoch": 0.17187275673236663, "grad_norm": 3.0035102367401123, "learning_rate": 0.00042968482942503587, "loss": 7.6979, "step": 12570 }, { "epoch": 0.17200948923573367, "grad_norm": 2.2166268825531006, "learning_rate": 0.00043002666302044163, "loss": 7.8833, "step": 12580 }, { "epoch": 0.1721462217391007, "grad_norm": 1.2718641757965088, "learning_rate": 0.00043036849661584744, "loss": 7.7926, "step": 12590 }, { "epoch": 0.17228295424246776, "grad_norm": 1.0844566822052002, "learning_rate": 0.00043071033021125314, "loss": 7.5781, "step": 12600 }, { "epoch": 0.1724196867458348, "grad_norm": 1.7113298177719116, "learning_rate": 0.0004310521638066589, "loss": 7.6144, "step": 12610 }, { "epoch": 0.17255641924920181, "grad_norm": 1.2335397005081177, "learning_rate": 0.0004313939974020647, "loss": 7.5877, "step": 12620 }, { "epoch": 0.17269315175256886, "grad_norm": 1.2090284824371338, "learning_rate": 0.0004317358309974704, "loss": 7.7904, "step": 12630 }, { "epoch": 0.1728298842559359, "grad_norm": 1.221006989479065, "learning_rate": 0.0004320776645928762, "loss": 7.781, "step": 12640 }, { "epoch": 0.17296661675930294, "grad_norm": 0.9907422065734863, "learning_rate": 0.000432419498188282, "loss": 7.4574, "step": 12650 }, { "epoch": 0.17310334926267, "grad_norm": 1.470785140991211, "learning_rate": 0.0004327613317836877, "loss": 7.5348, "step": 12660 }, { "epoch": 0.173240081766037, "grad_norm": 1.558768630027771, "learning_rate": 0.00043310316537909345, "loss": 7.4844, "step": 12670 }, { "epoch": 0.17337681426940404, "grad_norm": 2.067192316055298, "learning_rate": 0.00043344499897449927, "loss": 7.668, "step": 12680 }, { "epoch": 0.1735135467727711, "grad_norm": 2.1631007194519043, "learning_rate": 0.00043378683256990497, "loss": 7.4792, "step": 12690 }, { "epoch": 0.17365027927613813, "grad_norm": 1.5687947273254395, "learning_rate": 0.00043412866616531073, "loss": 7.5486, "step": 12700 }, { "epoch": 0.17378701177950517, "grad_norm": 0.8811748623847961, "learning_rate": 0.0004344704997607165, "loss": 7.5867, "step": 12710 }, { "epoch": 0.1739237442828722, "grad_norm": 1.7644721269607544, "learning_rate": 0.00043481233335612224, "loss": 7.7336, "step": 12720 }, { "epoch": 0.17406047678623923, "grad_norm": 1.7388354539871216, "learning_rate": 0.000435154166951528, "loss": 7.4458, "step": 12730 }, { "epoch": 0.17419720928960628, "grad_norm": 1.344812035560608, "learning_rate": 0.00043549600054693376, "loss": 7.6335, "step": 12740 }, { "epoch": 0.17433394179297332, "grad_norm": 1.0198007822036743, "learning_rate": 0.0004358378341423395, "loss": 7.3998, "step": 12750 }, { "epoch": 0.17447067429634036, "grad_norm": 1.4077184200286865, "learning_rate": 0.0004361796677377453, "loss": 7.7471, "step": 12760 }, { "epoch": 0.1746074067997074, "grad_norm": 1.3655078411102295, "learning_rate": 0.00043652150133315104, "loss": 7.8998, "step": 12770 }, { "epoch": 0.17474413930307442, "grad_norm": 0.9903821349143982, "learning_rate": 0.0004368633349285568, "loss": 7.6989, "step": 12780 }, { "epoch": 0.17488087180644146, "grad_norm": 0.9380472302436829, "learning_rate": 0.00043720516852396255, "loss": 7.3266, "step": 12790 }, { "epoch": 0.1750176043098085, "grad_norm": 1.2113131284713745, "learning_rate": 0.0004375470021193683, "loss": 7.5215, "step": 12800 }, { "epoch": 0.17515433681317555, "grad_norm": 1.3873311281204224, "learning_rate": 0.00043788883571477407, "loss": 7.3948, "step": 12810 }, { "epoch": 0.1752910693165426, "grad_norm": 1.192718505859375, "learning_rate": 0.00043823066931017977, "loss": 7.6707, "step": 12820 }, { "epoch": 0.1754278018199096, "grad_norm": 2.1938979625701904, "learning_rate": 0.0004385725029055856, "loss": 7.2823, "step": 12830 }, { "epoch": 0.17556453432327665, "grad_norm": 2.2065722942352295, "learning_rate": 0.00043891433650099134, "loss": 7.605, "step": 12840 }, { "epoch": 0.1757012668266437, "grad_norm": 3.8292243480682373, "learning_rate": 0.00043925617009639705, "loss": 7.5461, "step": 12850 }, { "epoch": 0.17583799933001074, "grad_norm": 1.108868956565857, "learning_rate": 0.00043959800369180286, "loss": 7.7277, "step": 12860 }, { "epoch": 0.17597473183337778, "grad_norm": 1.0494130849838257, "learning_rate": 0.0004399398372872086, "loss": 7.3939, "step": 12870 }, { "epoch": 0.17611146433674482, "grad_norm": 1.2447690963745117, "learning_rate": 0.0004402816708826143, "loss": 7.9792, "step": 12880 }, { "epoch": 0.17624819684011184, "grad_norm": 1.0681244134902954, "learning_rate": 0.00044062350447802013, "loss": 7.8202, "step": 12890 }, { "epoch": 0.17638492934347888, "grad_norm": 1.6478140354156494, "learning_rate": 0.0004409653380734259, "loss": 7.7498, "step": 12900 }, { "epoch": 0.17652166184684592, "grad_norm": 2.0482141971588135, "learning_rate": 0.0004413071716688316, "loss": 7.5216, "step": 12910 }, { "epoch": 0.17665839435021297, "grad_norm": 1.473577618598938, "learning_rate": 0.0004416490052642374, "loss": 7.532, "step": 12920 }, { "epoch": 0.17679512685358, "grad_norm": 1.3951621055603027, "learning_rate": 0.0004419908388596431, "loss": 7.5572, "step": 12930 }, { "epoch": 0.17693185935694702, "grad_norm": 1.6833596229553223, "learning_rate": 0.00044233267245504887, "loss": 7.5811, "step": 12940 }, { "epoch": 0.17706859186031407, "grad_norm": 1.2268767356872559, "learning_rate": 0.0004426745060504547, "loss": 7.7978, "step": 12950 }, { "epoch": 0.1772053243636811, "grad_norm": 1.5072031021118164, "learning_rate": 0.0004430163396458604, "loss": 7.7714, "step": 12960 }, { "epoch": 0.17734205686704815, "grad_norm": 1.4829672574996948, "learning_rate": 0.00044335817324126615, "loss": 7.7503, "step": 12970 }, { "epoch": 0.1774787893704152, "grad_norm": 1.1555254459381104, "learning_rate": 0.00044370000683667196, "loss": 7.5896, "step": 12980 }, { "epoch": 0.17761552187378224, "grad_norm": 1.3358078002929688, "learning_rate": 0.00044404184043207766, "loss": 7.8906, "step": 12990 }, { "epoch": 0.17775225437714925, "grad_norm": 1.9066728353500366, "learning_rate": 0.0004443836740274834, "loss": 7.79, "step": 13000 }, { "epoch": 0.1778889868805163, "grad_norm": 1.3065485954284668, "learning_rate": 0.00044472550762288923, "loss": 7.7979, "step": 13010 }, { "epoch": 0.17802571938388334, "grad_norm": 2.30647873878479, "learning_rate": 0.00044506734121829494, "loss": 7.8973, "step": 13020 }, { "epoch": 0.17816245188725038, "grad_norm": 1.360392689704895, "learning_rate": 0.0004454091748137007, "loss": 7.5625, "step": 13030 }, { "epoch": 0.17829918439061743, "grad_norm": 2.3569953441619873, "learning_rate": 0.00044575100840910645, "loss": 7.571, "step": 13040 }, { "epoch": 0.17843591689398444, "grad_norm": 1.144227147102356, "learning_rate": 0.0004460928420045122, "loss": 7.8677, "step": 13050 }, { "epoch": 0.17857264939735149, "grad_norm": 0.9712961912155151, "learning_rate": 0.00044643467559991797, "loss": 7.8174, "step": 13060 }, { "epoch": 0.17870938190071853, "grad_norm": 1.3683278560638428, "learning_rate": 0.0004467765091953237, "loss": 7.6069, "step": 13070 }, { "epoch": 0.17884611440408557, "grad_norm": 2.1387205123901367, "learning_rate": 0.0004471183427907295, "loss": 7.9196, "step": 13080 }, { "epoch": 0.17898284690745261, "grad_norm": 2.043381452560425, "learning_rate": 0.00044746017638613525, "loss": 7.7451, "step": 13090 }, { "epoch": 0.17911957941081963, "grad_norm": 1.424304723739624, "learning_rate": 0.00044780200998154095, "loss": 7.7633, "step": 13100 }, { "epoch": 0.17925631191418667, "grad_norm": 1.0831018686294556, "learning_rate": 0.00044814384357694676, "loss": 7.8057, "step": 13110 }, { "epoch": 0.17939304441755372, "grad_norm": 1.7210713624954224, "learning_rate": 0.0004484856771723525, "loss": 7.6667, "step": 13120 }, { "epoch": 0.17952977692092076, "grad_norm": 1.3987821340560913, "learning_rate": 0.0004488275107677582, "loss": 7.8054, "step": 13130 }, { "epoch": 0.1796665094242878, "grad_norm": 1.0988763570785522, "learning_rate": 0.00044916934436316404, "loss": 7.8015, "step": 13140 }, { "epoch": 0.17980324192765484, "grad_norm": 1.4788718223571777, "learning_rate": 0.0004495111779585698, "loss": 7.7953, "step": 13150 }, { "epoch": 0.17993997443102186, "grad_norm": 1.7130382061004639, "learning_rate": 0.0004498530115539755, "loss": 7.4727, "step": 13160 }, { "epoch": 0.1800767069343889, "grad_norm": 1.6694048643112183, "learning_rate": 0.0004501948451493813, "loss": 7.4214, "step": 13170 }, { "epoch": 0.18021343943775595, "grad_norm": 1.7351166009902954, "learning_rate": 0.000450536678744787, "loss": 7.5925, "step": 13180 }, { "epoch": 0.180350171941123, "grad_norm": 1.0022963285446167, "learning_rate": 0.0004508785123401928, "loss": 7.8719, "step": 13190 }, { "epoch": 0.18048690444449003, "grad_norm": 0.9412346482276917, "learning_rate": 0.0004512203459355986, "loss": 7.5621, "step": 13200 }, { "epoch": 0.18062363694785705, "grad_norm": 0.9836140275001526, "learning_rate": 0.0004515621795310043, "loss": 7.7694, "step": 13210 }, { "epoch": 0.1807603694512241, "grad_norm": 2.0050244331359863, "learning_rate": 0.00045190401312641005, "loss": 7.6081, "step": 13220 }, { "epoch": 0.18089710195459113, "grad_norm": 1.4537687301635742, "learning_rate": 0.00045224584672181586, "loss": 7.4569, "step": 13230 }, { "epoch": 0.18103383445795818, "grad_norm": 2.3283469676971436, "learning_rate": 0.00045258768031722157, "loss": 7.7485, "step": 13240 }, { "epoch": 0.18117056696132522, "grad_norm": 2.2511589527130127, "learning_rate": 0.0004529295139126273, "loss": 7.5684, "step": 13250 }, { "epoch": 0.18130729946469226, "grad_norm": 1.5452384948730469, "learning_rate": 0.00045327134750803314, "loss": 7.6078, "step": 13260 }, { "epoch": 0.18144403196805928, "grad_norm": 1.7014293670654297, "learning_rate": 0.00045361318110343884, "loss": 7.5312, "step": 13270 }, { "epoch": 0.18158076447142632, "grad_norm": 1.3785866498947144, "learning_rate": 0.0004539550146988446, "loss": 7.4785, "step": 13280 }, { "epoch": 0.18171749697479336, "grad_norm": 1.0427156686782837, "learning_rate": 0.0004542968482942504, "loss": 7.6453, "step": 13290 }, { "epoch": 0.1818542294781604, "grad_norm": 1.5770769119262695, "learning_rate": 0.0004546386818896561, "loss": 7.7804, "step": 13300 }, { "epoch": 0.18199096198152745, "grad_norm": 1.049487829208374, "learning_rate": 0.0004549805154850619, "loss": 7.6581, "step": 13310 }, { "epoch": 0.18212769448489446, "grad_norm": 1.232624888420105, "learning_rate": 0.00045532234908046763, "loss": 7.4369, "step": 13320 }, { "epoch": 0.1822644269882615, "grad_norm": 1.8254330158233643, "learning_rate": 0.0004556641826758734, "loss": 7.7738, "step": 13330 }, { "epoch": 0.18240115949162855, "grad_norm": 1.4021592140197754, "learning_rate": 0.00045600601627127915, "loss": 7.6961, "step": 13340 }, { "epoch": 0.1825378919949956, "grad_norm": 1.9524694681167603, "learning_rate": 0.0004563478498666849, "loss": 7.6403, "step": 13350 }, { "epoch": 0.18267462449836264, "grad_norm": 1.6463279724121094, "learning_rate": 0.00045668968346209067, "loss": 7.6223, "step": 13360 }, { "epoch": 0.18281135700172968, "grad_norm": 1.4336804151535034, "learning_rate": 0.0004570315170574964, "loss": 7.647, "step": 13370 }, { "epoch": 0.1829480895050967, "grad_norm": 2.098055124282837, "learning_rate": 0.0004573733506529022, "loss": 7.5196, "step": 13380 }, { "epoch": 0.18308482200846374, "grad_norm": 1.5973213911056519, "learning_rate": 0.00045771518424830794, "loss": 7.7504, "step": 13390 }, { "epoch": 0.18322155451183078, "grad_norm": 1.2016412019729614, "learning_rate": 0.0004580570178437137, "loss": 7.7541, "step": 13400 }, { "epoch": 0.18335828701519782, "grad_norm": 0.9725880026817322, "learning_rate": 0.00045839885143911946, "loss": 7.7921, "step": 13410 }, { "epoch": 0.18349501951856487, "grad_norm": 1.4694581031799316, "learning_rate": 0.0004587406850345252, "loss": 7.5896, "step": 13420 }, { "epoch": 0.18363175202193188, "grad_norm": 1.2933915853500366, "learning_rate": 0.0004590825186299309, "loss": 7.5353, "step": 13430 }, { "epoch": 0.18376848452529893, "grad_norm": 1.2378654479980469, "learning_rate": 0.00045942435222533673, "loss": 7.8119, "step": 13440 }, { "epoch": 0.18390521702866597, "grad_norm": 1.3325870037078857, "learning_rate": 0.0004597661858207425, "loss": 7.5973, "step": 13450 }, { "epoch": 0.184041949532033, "grad_norm": 0.9088072776794434, "learning_rate": 0.0004601080194161482, "loss": 7.7953, "step": 13460 }, { "epoch": 0.18417868203540005, "grad_norm": 1.0589359998703003, "learning_rate": 0.000460449853011554, "loss": 7.6697, "step": 13470 }, { "epoch": 0.18431541453876707, "grad_norm": 1.7919000387191772, "learning_rate": 0.00046079168660695976, "loss": 7.4191, "step": 13480 }, { "epoch": 0.1844521470421341, "grad_norm": 1.6553257703781128, "learning_rate": 0.00046113352020236547, "loss": 7.7796, "step": 13490 }, { "epoch": 0.18458887954550116, "grad_norm": 0.9764618277549744, "learning_rate": 0.0004614753537977713, "loss": 7.7188, "step": 13500 }, { "epoch": 0.1847256120488682, "grad_norm": 1.6983857154846191, "learning_rate": 0.00046181718739317704, "loss": 7.6682, "step": 13510 }, { "epoch": 0.18486234455223524, "grad_norm": 0.9804957509040833, "learning_rate": 0.00046215902098858274, "loss": 7.7883, "step": 13520 }, { "epoch": 0.18499907705560228, "grad_norm": 1.344213604927063, "learning_rate": 0.00046250085458398856, "loss": 7.7394, "step": 13530 }, { "epoch": 0.1851358095589693, "grad_norm": 1.4391740560531616, "learning_rate": 0.0004628426881793943, "loss": 7.6956, "step": 13540 }, { "epoch": 0.18527254206233634, "grad_norm": 1.2659848928451538, "learning_rate": 0.0004631845217748, "loss": 7.7126, "step": 13550 }, { "epoch": 0.18540927456570339, "grad_norm": 1.2121280431747437, "learning_rate": 0.00046352635537020583, "loss": 7.7364, "step": 13560 }, { "epoch": 0.18554600706907043, "grad_norm": 2.2707576751708984, "learning_rate": 0.00046386818896561153, "loss": 7.7056, "step": 13570 }, { "epoch": 0.18568273957243747, "grad_norm": 1.6847801208496094, "learning_rate": 0.0004642100225610173, "loss": 7.4536, "step": 13580 }, { "epoch": 0.1858194720758045, "grad_norm": 2.3024399280548096, "learning_rate": 0.00046455185615642305, "loss": 7.586, "step": 13590 }, { "epoch": 0.18595620457917153, "grad_norm": 1.633246660232544, "learning_rate": 0.0004648936897518288, "loss": 7.5064, "step": 13600 }, { "epoch": 0.18609293708253857, "grad_norm": 1.4840563535690308, "learning_rate": 0.00046523552334723457, "loss": 7.6997, "step": 13610 }, { "epoch": 0.18622966958590562, "grad_norm": 1.5581241846084595, "learning_rate": 0.0004655773569426403, "loss": 7.4962, "step": 13620 }, { "epoch": 0.18636640208927266, "grad_norm": 1.7573093175888062, "learning_rate": 0.0004659191905380461, "loss": 7.6998, "step": 13630 }, { "epoch": 0.1865031345926397, "grad_norm": 1.4400055408477783, "learning_rate": 0.00046626102413345184, "loss": 7.3187, "step": 13640 }, { "epoch": 0.18663986709600672, "grad_norm": 2.254563093185425, "learning_rate": 0.0004666028577288576, "loss": 7.6618, "step": 13650 }, { "epoch": 0.18677659959937376, "grad_norm": 1.4277135133743286, "learning_rate": 0.00046694469132426336, "loss": 7.58, "step": 13660 }, { "epoch": 0.1869133321027408, "grad_norm": 1.4482396841049194, "learning_rate": 0.0004672865249196691, "loss": 7.827, "step": 13670 }, { "epoch": 0.18705006460610785, "grad_norm": 3.0553390979766846, "learning_rate": 0.0004676283585150748, "loss": 7.5328, "step": 13680 }, { "epoch": 0.1871867971094749, "grad_norm": 2.2228586673736572, "learning_rate": 0.00046797019211048063, "loss": 7.4575, "step": 13690 }, { "epoch": 0.1873235296128419, "grad_norm": 1.1275136470794678, "learning_rate": 0.0004683120257058864, "loss": 7.7328, "step": 13700 }, { "epoch": 0.18746026211620895, "grad_norm": 1.0477592945098877, "learning_rate": 0.0004686538593012921, "loss": 7.7626, "step": 13710 }, { "epoch": 0.187596994619576, "grad_norm": 1.4008793830871582, "learning_rate": 0.0004689956928966979, "loss": 7.6493, "step": 13720 }, { "epoch": 0.18773372712294303, "grad_norm": 1.1963558197021484, "learning_rate": 0.00046933752649210367, "loss": 7.3917, "step": 13730 }, { "epoch": 0.18787045962631008, "grad_norm": 1.5265346765518188, "learning_rate": 0.00046967936008750937, "loss": 7.4617, "step": 13740 }, { "epoch": 0.18800719212967712, "grad_norm": 1.1476236581802368, "learning_rate": 0.0004700211936829152, "loss": 7.8752, "step": 13750 }, { "epoch": 0.18814392463304414, "grad_norm": 1.6883585453033447, "learning_rate": 0.00047036302727832094, "loss": 7.4002, "step": 13760 }, { "epoch": 0.18828065713641118, "grad_norm": 1.1447488069534302, "learning_rate": 0.00047070486087372665, "loss": 7.3073, "step": 13770 }, { "epoch": 0.18841738963977822, "grad_norm": 0.9484491348266602, "learning_rate": 0.00047104669446913246, "loss": 7.7293, "step": 13780 }, { "epoch": 0.18855412214314526, "grad_norm": 1.7670319080352783, "learning_rate": 0.0004713885280645382, "loss": 7.653, "step": 13790 }, { "epoch": 0.1886908546465123, "grad_norm": 1.1378790140151978, "learning_rate": 0.0004717303616599439, "loss": 7.4739, "step": 13800 }, { "epoch": 0.18882758714987932, "grad_norm": 0.8853107690811157, "learning_rate": 0.00047207219525534973, "loss": 7.7371, "step": 13810 }, { "epoch": 0.18896431965324637, "grad_norm": 1.4191807508468628, "learning_rate": 0.00047241402885075544, "loss": 7.4175, "step": 13820 }, { "epoch": 0.1891010521566134, "grad_norm": 1.415773868560791, "learning_rate": 0.0004727558624461612, "loss": 7.4108, "step": 13830 }, { "epoch": 0.18923778465998045, "grad_norm": 1.8324781656265259, "learning_rate": 0.000473097696041567, "loss": 7.668, "step": 13840 }, { "epoch": 0.1893745171633475, "grad_norm": 1.8625495433807373, "learning_rate": 0.0004734395296369727, "loss": 7.8242, "step": 13850 }, { "epoch": 0.1895112496667145, "grad_norm": 1.2707642316818237, "learning_rate": 0.00047378136323237847, "loss": 7.6199, "step": 13860 }, { "epoch": 0.18964798217008155, "grad_norm": 1.171343445777893, "learning_rate": 0.0004741231968277843, "loss": 7.6018, "step": 13870 }, { "epoch": 0.1897847146734486, "grad_norm": 1.2899229526519775, "learning_rate": 0.00047446503042319, "loss": 7.4124, "step": 13880 }, { "epoch": 0.18992144717681564, "grad_norm": 1.2976281642913818, "learning_rate": 0.00047480686401859575, "loss": 7.5082, "step": 13890 }, { "epoch": 0.19005817968018268, "grad_norm": 1.9869099855422974, "learning_rate": 0.00047514869761400156, "loss": 7.65, "step": 13900 }, { "epoch": 0.19019491218354972, "grad_norm": 1.9449093341827393, "learning_rate": 0.00047549053120940726, "loss": 7.6599, "step": 13910 }, { "epoch": 0.19033164468691674, "grad_norm": 1.6711900234222412, "learning_rate": 0.000475832364804813, "loss": 7.6313, "step": 13920 }, { "epoch": 0.19046837719028378, "grad_norm": 1.6138263940811157, "learning_rate": 0.00047617419840021883, "loss": 7.527, "step": 13930 }, { "epoch": 0.19060510969365083, "grad_norm": 2.006763219833374, "learning_rate": 0.00047651603199562454, "loss": 7.6686, "step": 13940 }, { "epoch": 0.19074184219701787, "grad_norm": 1.39774489402771, "learning_rate": 0.0004768578655910303, "loss": 7.5313, "step": 13950 }, { "epoch": 0.1908785747003849, "grad_norm": 0.89859539270401, "learning_rate": 0.00047719969918643605, "loss": 7.4722, "step": 13960 }, { "epoch": 0.19101530720375193, "grad_norm": 1.407633900642395, "learning_rate": 0.0004775415327818418, "loss": 7.6125, "step": 13970 }, { "epoch": 0.19115203970711897, "grad_norm": 1.3670657873153687, "learning_rate": 0.00047788336637724757, "loss": 7.7435, "step": 13980 }, { "epoch": 0.191288772210486, "grad_norm": 1.650156855583191, "learning_rate": 0.00047822519997265333, "loss": 7.5432, "step": 13990 }, { "epoch": 0.19142550471385306, "grad_norm": 1.7431659698486328, "learning_rate": 0.0004785670335680591, "loss": 7.6303, "step": 14000 }, { "epoch": 0.1915622372172201, "grad_norm": 1.6861857175827026, "learning_rate": 0.00047890886716346484, "loss": 7.6353, "step": 14010 }, { "epoch": 0.19169896972058714, "grad_norm": 1.8527804613113403, "learning_rate": 0.0004792507007588706, "loss": 7.7308, "step": 14020 }, { "epoch": 0.19183570222395416, "grad_norm": 1.6543105840682983, "learning_rate": 0.00047959253435427636, "loss": 7.4225, "step": 14030 }, { "epoch": 0.1919724347273212, "grad_norm": 1.2010746002197266, "learning_rate": 0.0004799343679496821, "loss": 7.7016, "step": 14040 }, { "epoch": 0.19210916723068824, "grad_norm": 1.7733713388442993, "learning_rate": 0.0004802762015450879, "loss": 7.282, "step": 14050 }, { "epoch": 0.1922458997340553, "grad_norm": 0.9574399590492249, "learning_rate": 0.00048061803514049364, "loss": 7.4292, "step": 14060 }, { "epoch": 0.19238263223742233, "grad_norm": 2.0644145011901855, "learning_rate": 0.00048095986873589934, "loss": 7.4968, "step": 14070 }, { "epoch": 0.19251936474078934, "grad_norm": 2.3078396320343018, "learning_rate": 0.00048130170233130515, "loss": 7.7735, "step": 14080 }, { "epoch": 0.1926560972441564, "grad_norm": 1.1728968620300293, "learning_rate": 0.0004816435359267109, "loss": 7.6356, "step": 14090 }, { "epoch": 0.19279282974752343, "grad_norm": 1.6446079015731812, "learning_rate": 0.0004819853695221166, "loss": 7.4861, "step": 14100 }, { "epoch": 0.19292956225089047, "grad_norm": 1.8217484951019287, "learning_rate": 0.00048232720311752237, "loss": 7.4759, "step": 14110 }, { "epoch": 0.19306629475425752, "grad_norm": 1.386348009109497, "learning_rate": 0.0004826690367129282, "loss": 7.5751, "step": 14120 }, { "epoch": 0.19320302725762456, "grad_norm": 2.002746820449829, "learning_rate": 0.0004830108703083339, "loss": 7.7298, "step": 14130 }, { "epoch": 0.19333975976099158, "grad_norm": 1.8415919542312622, "learning_rate": 0.00048335270390373965, "loss": 7.493, "step": 14140 }, { "epoch": 0.19347649226435862, "grad_norm": 1.008312463760376, "learning_rate": 0.00048369453749914546, "loss": 7.4489, "step": 14150 }, { "epoch": 0.19361322476772566, "grad_norm": 1.480800986289978, "learning_rate": 0.00048403637109455116, "loss": 7.741, "step": 14160 }, { "epoch": 0.1937499572710927, "grad_norm": 0.9309971332550049, "learning_rate": 0.0004843782046899569, "loss": 7.7903, "step": 14170 }, { "epoch": 0.19388668977445975, "grad_norm": 2.402477502822876, "learning_rate": 0.0004847200382853627, "loss": 7.5069, "step": 14180 }, { "epoch": 0.19402342227782676, "grad_norm": 1.036625623703003, "learning_rate": 0.00048506187188076844, "loss": 7.8271, "step": 14190 }, { "epoch": 0.1941601547811938, "grad_norm": 1.5696754455566406, "learning_rate": 0.0004854037054761742, "loss": 7.4109, "step": 14200 }, { "epoch": 0.19429688728456085, "grad_norm": 0.9674107432365417, "learning_rate": 0.00048574553907157996, "loss": 7.452, "step": 14210 }, { "epoch": 0.1944336197879279, "grad_norm": 1.995890498161316, "learning_rate": 0.0004860873726669857, "loss": 7.6906, "step": 14220 }, { "epoch": 0.19457035229129493, "grad_norm": 1.0005987882614136, "learning_rate": 0.00048642920626239147, "loss": 7.42, "step": 14230 }, { "epoch": 0.19470708479466195, "grad_norm": 1.0055793523788452, "learning_rate": 0.00048677103985779723, "loss": 7.7921, "step": 14240 }, { "epoch": 0.194843817298029, "grad_norm": 1.5215362310409546, "learning_rate": 0.000487112873453203, "loss": 7.764, "step": 14250 }, { "epoch": 0.19498054980139604, "grad_norm": 1.671651005744934, "learning_rate": 0.00048745470704860875, "loss": 7.6553, "step": 14260 }, { "epoch": 0.19511728230476308, "grad_norm": 1.4030262231826782, "learning_rate": 0.0004877965406440145, "loss": 7.568, "step": 14270 }, { "epoch": 0.19525401480813012, "grad_norm": 2.1989121437072754, "learning_rate": 0.00048813837423942026, "loss": 7.6119, "step": 14280 }, { "epoch": 0.19539074731149716, "grad_norm": 1.1712583303451538, "learning_rate": 0.000488480207834826, "loss": 7.5571, "step": 14290 }, { "epoch": 0.19552747981486418, "grad_norm": 1.3909251689910889, "learning_rate": 0.0004888220414302318, "loss": 7.7451, "step": 14300 }, { "epoch": 0.19566421231823122, "grad_norm": 1.8176860809326172, "learning_rate": 0.0004891638750256375, "loss": 7.4988, "step": 14310 }, { "epoch": 0.19580094482159827, "grad_norm": 1.6620733737945557, "learning_rate": 0.0004895057086210432, "loss": 7.8164, "step": 14320 }, { "epoch": 0.1959376773249653, "grad_norm": 1.065706491470337, "learning_rate": 0.0004898475422164491, "loss": 7.8083, "step": 14330 }, { "epoch": 0.19607440982833235, "grad_norm": 1.4639270305633545, "learning_rate": 0.0004901893758118548, "loss": 7.6164, "step": 14340 }, { "epoch": 0.19621114233169937, "grad_norm": 1.7259130477905273, "learning_rate": 0.0004905312094072605, "loss": 7.1916, "step": 14350 }, { "epoch": 0.1963478748350664, "grad_norm": 1.2956658601760864, "learning_rate": 0.0004908730430026664, "loss": 7.3758, "step": 14360 }, { "epoch": 0.19648460733843345, "grad_norm": 1.2444957494735718, "learning_rate": 0.000491214876598072, "loss": 7.4736, "step": 14370 }, { "epoch": 0.1966213398418005, "grad_norm": 1.206840991973877, "learning_rate": 0.0004915567101934778, "loss": 7.6879, "step": 14380 }, { "epoch": 0.19675807234516754, "grad_norm": 1.2904895544052124, "learning_rate": 0.0004918985437888836, "loss": 7.5467, "step": 14390 }, { "epoch": 0.19689480484853458, "grad_norm": 1.202418327331543, "learning_rate": 0.0004922403773842893, "loss": 7.4502, "step": 14400 }, { "epoch": 0.1970315373519016, "grad_norm": 1.392156958580017, "learning_rate": 0.0004925822109796951, "loss": 7.6601, "step": 14410 }, { "epoch": 0.19716826985526864, "grad_norm": 1.8240795135498047, "learning_rate": 0.0004929240445751008, "loss": 7.6746, "step": 14420 }, { "epoch": 0.19730500235863568, "grad_norm": 1.2609188556671143, "learning_rate": 0.0004932658781705066, "loss": 7.5204, "step": 14430 }, { "epoch": 0.19744173486200273, "grad_norm": 1.5433380603790283, "learning_rate": 0.0004936077117659123, "loss": 7.7437, "step": 14440 }, { "epoch": 0.19757846736536977, "grad_norm": 1.1427932977676392, "learning_rate": 0.0004939495453613181, "loss": 7.4272, "step": 14450 }, { "epoch": 0.19771519986873679, "grad_norm": 1.1527156829833984, "learning_rate": 0.0004942913789567239, "loss": 7.3794, "step": 14460 }, { "epoch": 0.19785193237210383, "grad_norm": 1.3514530658721924, "learning_rate": 0.0004946332125521296, "loss": 7.4526, "step": 14470 }, { "epoch": 0.19798866487547087, "grad_norm": 1.3655669689178467, "learning_rate": 0.0004949750461475354, "loss": 7.5536, "step": 14480 }, { "epoch": 0.19812539737883791, "grad_norm": 3.0962419509887695, "learning_rate": 0.0004953168797429411, "loss": 7.7679, "step": 14490 }, { "epoch": 0.19826212988220496, "grad_norm": 1.7621126174926758, "learning_rate": 0.0004956587133383469, "loss": 7.8473, "step": 14500 }, { "epoch": 0.19839886238557197, "grad_norm": 2.0744900703430176, "learning_rate": 0.0004960005469337526, "loss": 7.2894, "step": 14510 }, { "epoch": 0.19853559488893902, "grad_norm": 1.5304234027862549, "learning_rate": 0.0004963423805291584, "loss": 7.7148, "step": 14520 }, { "epoch": 0.19867232739230606, "grad_norm": 1.7312275171279907, "learning_rate": 0.0004966842141245642, "loss": 7.5376, "step": 14530 }, { "epoch": 0.1988090598956731, "grad_norm": 1.744939923286438, "learning_rate": 0.0004970260477199699, "loss": 7.5272, "step": 14540 }, { "epoch": 0.19894579239904014, "grad_norm": 1.5614131689071655, "learning_rate": 0.0004973678813153757, "loss": 7.9951, "step": 14550 }, { "epoch": 0.1990825249024072, "grad_norm": 1.5183262825012207, "learning_rate": 0.0004977097149107814, "loss": 7.5381, "step": 14560 }, { "epoch": 0.1992192574057742, "grad_norm": 1.3610395193099976, "learning_rate": 0.0004980515485061872, "loss": 7.456, "step": 14570 }, { "epoch": 0.19935598990914125, "grad_norm": 1.692396879196167, "learning_rate": 0.000498393382101593, "loss": 7.4109, "step": 14580 }, { "epoch": 0.1994927224125083, "grad_norm": 1.0421873331069946, "learning_rate": 0.0004987352156969987, "loss": 7.5658, "step": 14590 }, { "epoch": 0.19962945491587533, "grad_norm": 1.0939011573791504, "learning_rate": 0.0004990770492924045, "loss": 7.5368, "step": 14600 }, { "epoch": 0.19976618741924237, "grad_norm": 1.7442255020141602, "learning_rate": 0.0004994188828878102, "loss": 7.4576, "step": 14610 }, { "epoch": 0.1999029199226094, "grad_norm": 1.2244166135787964, "learning_rate": 0.000499760716483216, "loss": 7.6439, "step": 14620 }, { "epoch": 0.20003965242597643, "grad_norm": 1.5471065044403076, "learning_rate": 0.000499999999359297, "loss": 7.6561, "step": 14630 }, { "epoch": 0.20017638492934348, "grad_norm": 1.3392573595046997, "learning_rate": 0.0004999999879690225, "loss": 7.7133, "step": 14640 }, { "epoch": 0.20031311743271052, "grad_norm": 1.1937683820724487, "learning_rate": 0.0004999999623409051, "loss": 7.6768, "step": 14650 }, { "epoch": 0.20044984993607756, "grad_norm": 0.9230870604515076, "learning_rate": 0.0004999999224749467, "loss": 7.6503, "step": 14660 }, { "epoch": 0.2005865824394446, "grad_norm": 1.346669316291809, "learning_rate": 0.0004999998683711492, "loss": 7.8074, "step": 14670 }, { "epoch": 0.20072331494281162, "grad_norm": 1.774349331855774, "learning_rate": 0.0004999998000295158, "loss": 7.7795, "step": 14680 }, { "epoch": 0.20086004744617866, "grad_norm": 1.3709367513656616, "learning_rate": 0.0004999997174500506, "loss": 7.8477, "step": 14690 }, { "epoch": 0.2009967799495457, "grad_norm": 1.9551074504852295, "learning_rate": 0.0004999996206327581, "loss": 7.5128, "step": 14700 }, { "epoch": 0.20113351245291275, "grad_norm": 1.5322149991989136, "learning_rate": 0.0004999995095776439, "loss": 7.551, "step": 14710 }, { "epoch": 0.2012702449562798, "grad_norm": 1.1008901596069336, "learning_rate": 0.0004999993842847142, "loss": 7.6051, "step": 14720 }, { "epoch": 0.2014069774596468, "grad_norm": 1.2091398239135742, "learning_rate": 0.0004999992447539762, "loss": 7.8459, "step": 14730 }, { "epoch": 0.20154370996301385, "grad_norm": 1.889969825744629, "learning_rate": 0.0004999990909854379, "loss": 7.6744, "step": 14740 }, { "epoch": 0.2016804424663809, "grad_norm": 1.4230974912643433, "learning_rate": 0.000499998922979108, "loss": 7.6047, "step": 14750 }, { "epoch": 0.20181717496974794, "grad_norm": 1.8369837999343872, "learning_rate": 0.0004999987407349962, "loss": 7.8929, "step": 14760 }, { "epoch": 0.20195390747311498, "grad_norm": 2.4211385250091553, "learning_rate": 0.0004999985442531126, "loss": 7.327, "step": 14770 }, { "epoch": 0.20209063997648202, "grad_norm": 1.6277596950531006, "learning_rate": 0.0004999983335334687, "loss": 7.704, "step": 14780 }, { "epoch": 0.20222737247984904, "grad_norm": 2.7869935035705566, "learning_rate": 0.0004999981085760763, "loss": 7.3244, "step": 14790 }, { "epoch": 0.20236410498321608, "grad_norm": 1.3529331684112549, "learning_rate": 0.0004999978693809483, "loss": 7.5668, "step": 14800 }, { "epoch": 0.20250083748658312, "grad_norm": 1.9865466356277466, "learning_rate": 0.0004999976159480983, "loss": 7.6249, "step": 14810 }, { "epoch": 0.20263756998995017, "grad_norm": 2.6009113788604736, "learning_rate": 0.0004999973482775407, "loss": 7.4117, "step": 14820 }, { "epoch": 0.2027743024933172, "grad_norm": 1.1652166843414307, "learning_rate": 0.0004999970663692907, "loss": 7.6656, "step": 14830 }, { "epoch": 0.20291103499668423, "grad_norm": 1.8276073932647705, "learning_rate": 0.0004999967702233644, "loss": 7.4885, "step": 14840 }, { "epoch": 0.20304776750005127, "grad_norm": 1.151456356048584, "learning_rate": 0.0004999964598397788, "loss": 7.7155, "step": 14850 }, { "epoch": 0.2031845000034183, "grad_norm": 2.2286081314086914, "learning_rate": 0.0004999961352185515, "loss": 7.7497, "step": 14860 }, { "epoch": 0.20332123250678535, "grad_norm": 1.204865574836731, "learning_rate": 0.0004999957963597009, "loss": 7.4957, "step": 14870 }, { "epoch": 0.2034579650101524, "grad_norm": 1.1882474422454834, "learning_rate": 0.0004999954432632464, "loss": 7.4574, "step": 14880 }, { "epoch": 0.2035946975135194, "grad_norm": 1.6119710206985474, "learning_rate": 0.000499995075929208, "loss": 7.3141, "step": 14890 }, { "epoch": 0.20373143001688646, "grad_norm": 1.4046107530593872, "learning_rate": 0.0004999946943576066, "loss": 7.7013, "step": 14900 }, { "epoch": 0.2038681625202535, "grad_norm": 1.304362416267395, "learning_rate": 0.0004999942985484641, "loss": 7.7196, "step": 14910 }, { "epoch": 0.20400489502362054, "grad_norm": 1.2674434185028076, "learning_rate": 0.000499993888501803, "loss": 7.7315, "step": 14920 }, { "epoch": 0.20414162752698758, "grad_norm": 1.692635178565979, "learning_rate": 0.0004999934642176465, "loss": 7.8176, "step": 14930 }, { "epoch": 0.20427836003035463, "grad_norm": 1.3801699876785278, "learning_rate": 0.000499993025696019, "loss": 7.2262, "step": 14940 }, { "epoch": 0.20441509253372164, "grad_norm": 1.2219935655593872, "learning_rate": 0.0004999925729369454, "loss": 7.5089, "step": 14950 }, { "epoch": 0.20455182503708869, "grad_norm": 1.4449214935302734, "learning_rate": 0.0004999921059404513, "loss": 7.6968, "step": 14960 }, { "epoch": 0.20468855754045573, "grad_norm": 2.7732129096984863, "learning_rate": 0.0004999916247065634, "loss": 7.6006, "step": 14970 }, { "epoch": 0.20482529004382277, "grad_norm": 3.2656924724578857, "learning_rate": 0.0004999911292353092, "loss": 7.6729, "step": 14980 }, { "epoch": 0.20496202254718981, "grad_norm": 1.443055272102356, "learning_rate": 0.0004999906195267168, "loss": 7.5016, "step": 14990 }, { "epoch": 0.20509875505055683, "grad_norm": 1.0140571594238281, "learning_rate": 0.0004999900955808154, "loss": 7.4937, "step": 15000 }, { "epoch": 0.20523548755392387, "grad_norm": 3.1747992038726807, "learning_rate": 0.0004999895573976346, "loss": 7.4306, "step": 15010 }, { "epoch": 0.20537222005729092, "grad_norm": 1.215908169746399, "learning_rate": 0.0004999890049772052, "loss": 7.7125, "step": 15020 }, { "epoch": 0.20550895256065796, "grad_norm": 1.110507845878601, "learning_rate": 0.0004999884383195586, "loss": 7.5348, "step": 15030 }, { "epoch": 0.205645685064025, "grad_norm": 1.6382216215133667, "learning_rate": 0.0004999878574247272, "loss": 7.6145, "step": 15040 }, { "epoch": 0.20578241756739205, "grad_norm": 4.129947185516357, "learning_rate": 0.000499987262292744, "loss": 7.7989, "step": 15050 }, { "epoch": 0.20591915007075906, "grad_norm": 1.3069157600402832, "learning_rate": 0.0004999866529236428, "loss": 7.8352, "step": 15060 }, { "epoch": 0.2060558825741261, "grad_norm": 1.7389343976974487, "learning_rate": 0.0004999860293174584, "loss": 7.7363, "step": 15070 }, { "epoch": 0.20619261507749315, "grad_norm": 1.0254788398742676, "learning_rate": 0.0004999853914742263, "loss": 7.8296, "step": 15080 }, { "epoch": 0.2063293475808602, "grad_norm": 2.0985782146453857, "learning_rate": 0.0004999847393939829, "loss": 7.5365, "step": 15090 }, { "epoch": 0.20646608008422723, "grad_norm": 1.8527929782867432, "learning_rate": 0.0004999840730767651, "loss": 7.5257, "step": 15100 }, { "epoch": 0.20660281258759425, "grad_norm": 0.9170514941215515, "learning_rate": 0.0004999833925226112, "loss": 7.7084, "step": 15110 }, { "epoch": 0.2067395450909613, "grad_norm": 2.1775989532470703, "learning_rate": 0.0004999826977315595, "loss": 7.6824, "step": 15120 }, { "epoch": 0.20687627759432833, "grad_norm": 1.2314571142196655, "learning_rate": 0.0004999819887036501, "loss": 7.7253, "step": 15130 }, { "epoch": 0.20701301009769538, "grad_norm": 1.4025800228118896, "learning_rate": 0.000499981265438923, "loss": 7.6292, "step": 15140 }, { "epoch": 0.20714974260106242, "grad_norm": 1.71263587474823, "learning_rate": 0.0004999805279374196, "loss": 7.671, "step": 15150 }, { "epoch": 0.20728647510442946, "grad_norm": 1.5630078315734863, "learning_rate": 0.0004999797761991818, "loss": 7.581, "step": 15160 }, { "epoch": 0.20742320760779648, "grad_norm": 1.1573787927627563, "learning_rate": 0.0004999790102242523, "loss": 7.6343, "step": 15170 }, { "epoch": 0.20755994011116352, "grad_norm": 1.0965474843978882, "learning_rate": 0.0004999782300126749, "loss": 7.5611, "step": 15180 }, { "epoch": 0.20769667261453056, "grad_norm": 1.2594008445739746, "learning_rate": 0.0004999774355644941, "loss": 7.5811, "step": 15190 }, { "epoch": 0.2078334051178976, "grad_norm": 1.0546543598175049, "learning_rate": 0.0004999766268797549, "loss": 7.4438, "step": 15200 }, { "epoch": 0.20797013762126465, "grad_norm": 2.028377056121826, "learning_rate": 0.0004999758039585036, "loss": 7.4323, "step": 15210 }, { "epoch": 0.20810687012463167, "grad_norm": 0.8068011999130249, "learning_rate": 0.0004999749668007868, "loss": 7.4795, "step": 15220 }, { "epoch": 0.2082436026279987, "grad_norm": 1.7283040285110474, "learning_rate": 0.0004999741154066524, "loss": 7.5924, "step": 15230 }, { "epoch": 0.20838033513136575, "grad_norm": 0.958118200302124, "learning_rate": 0.0004999732497761488, "loss": 7.6255, "step": 15240 }, { "epoch": 0.2085170676347328, "grad_norm": 1.2964122295379639, "learning_rate": 0.0004999723699093253, "loss": 7.7001, "step": 15250 }, { "epoch": 0.20865380013809984, "grad_norm": 1.391613483428955, "learning_rate": 0.0004999714758062321, "loss": 7.5503, "step": 15260 }, { "epoch": 0.20879053264146685, "grad_norm": 1.166948676109314, "learning_rate": 0.0004999705674669199, "loss": 7.572, "step": 15270 }, { "epoch": 0.2089272651448339, "grad_norm": 0.951365053653717, "learning_rate": 0.0004999696448914407, "loss": 7.5182, "step": 15280 }, { "epoch": 0.20906399764820094, "grad_norm": 1.387058138847351, "learning_rate": 0.0004999687080798469, "loss": 7.9005, "step": 15290 }, { "epoch": 0.20920073015156798, "grad_norm": 1.4033368825912476, "learning_rate": 0.0004999677570321917, "loss": 7.5963, "step": 15300 }, { "epoch": 0.20933746265493502, "grad_norm": 1.2538713216781616, "learning_rate": 0.0004999667917485297, "loss": 8.0428, "step": 15310 }, { "epoch": 0.20947419515830207, "grad_norm": 1.1099900007247925, "learning_rate": 0.0004999658122289154, "loss": 7.2384, "step": 15320 }, { "epoch": 0.20961092766166908, "grad_norm": 0.9349867701530457, "learning_rate": 0.0004999648184734049, "loss": 7.7424, "step": 15330 }, { "epoch": 0.20974766016503613, "grad_norm": 1.4465713500976562, "learning_rate": 0.0004999638104820547, "loss": 7.5456, "step": 15340 }, { "epoch": 0.20988439266840317, "grad_norm": 1.7285183668136597, "learning_rate": 0.0004999627882549221, "loss": 7.5252, "step": 15350 }, { "epoch": 0.2100211251717702, "grad_norm": 1.0939253568649292, "learning_rate": 0.0004999617517920655, "loss": 7.5353, "step": 15360 }, { "epoch": 0.21015785767513726, "grad_norm": 0.9228522777557373, "learning_rate": 0.0004999607010935438, "loss": 7.5033, "step": 15370 }, { "epoch": 0.21029459017850427, "grad_norm": 1.3933048248291016, "learning_rate": 0.0004999596361594168, "loss": 7.5495, "step": 15380 }, { "epoch": 0.2104313226818713, "grad_norm": 1.3380231857299805, "learning_rate": 0.0004999585569897454, "loss": 7.3999, "step": 15390 }, { "epoch": 0.21056805518523836, "grad_norm": 1.073216199874878, "learning_rate": 0.0004999574635845908, "loss": 7.4231, "step": 15400 }, { "epoch": 0.2107047876886054, "grad_norm": 1.3760813474655151, "learning_rate": 0.0004999563559440153, "loss": 7.6736, "step": 15410 }, { "epoch": 0.21084152019197244, "grad_norm": 1.8634544610977173, "learning_rate": 0.0004999552340680821, "loss": 7.5686, "step": 15420 }, { "epoch": 0.21097825269533949, "grad_norm": 1.0422987937927246, "learning_rate": 0.0004999540979568551, "loss": 7.5359, "step": 15430 }, { "epoch": 0.2111149851987065, "grad_norm": 1.4450855255126953, "learning_rate": 0.0004999529476103988, "loss": 7.5173, "step": 15440 }, { "epoch": 0.21125171770207354, "grad_norm": 1.0691494941711426, "learning_rate": 0.000499951783028779, "loss": 7.5267, "step": 15450 }, { "epoch": 0.2113884502054406, "grad_norm": 1.180245280265808, "learning_rate": 0.0004999506042120618, "loss": 7.4969, "step": 15460 }, { "epoch": 0.21152518270880763, "grad_norm": 1.56626558303833, "learning_rate": 0.0004999494111603143, "loss": 7.9151, "step": 15470 }, { "epoch": 0.21166191521217467, "grad_norm": 1.4788089990615845, "learning_rate": 0.0004999482038736046, "loss": 7.7536, "step": 15480 }, { "epoch": 0.2117986477155417, "grad_norm": 0.984645664691925, "learning_rate": 0.0004999469823520015, "loss": 7.7292, "step": 15490 }, { "epoch": 0.21193538021890873, "grad_norm": 1.4337348937988281, "learning_rate": 0.0004999457465955744, "loss": 7.3812, "step": 15500 }, { "epoch": 0.21207211272227577, "grad_norm": 1.3156750202178955, "learning_rate": 0.0004999444966043938, "loss": 7.6313, "step": 15510 }, { "epoch": 0.21220884522564282, "grad_norm": 1.1287020444869995, "learning_rate": 0.0004999432323785309, "loss": 7.6992, "step": 15520 }, { "epoch": 0.21234557772900986, "grad_norm": 1.3650383949279785, "learning_rate": 0.0004999419539180576, "loss": 7.683, "step": 15530 }, { "epoch": 0.2124823102323769, "grad_norm": 1.1633238792419434, "learning_rate": 0.0004999406612230466, "loss": 7.4766, "step": 15540 }, { "epoch": 0.21261904273574392, "grad_norm": 1.0635794401168823, "learning_rate": 0.0004999393542935718, "loss": 7.4721, "step": 15550 }, { "epoch": 0.21275577523911096, "grad_norm": 1.2035280466079712, "learning_rate": 0.0004999380331297076, "loss": 7.6812, "step": 15560 }, { "epoch": 0.212892507742478, "grad_norm": 1.1730732917785645, "learning_rate": 0.0004999366977315291, "loss": 7.8173, "step": 15570 }, { "epoch": 0.21302924024584505, "grad_norm": 1.0516618490219116, "learning_rate": 0.0004999353480991123, "loss": 7.6396, "step": 15580 }, { "epoch": 0.2131659727492121, "grad_norm": 1.1227385997772217, "learning_rate": 0.0004999339842325343, "loss": 7.6056, "step": 15590 }, { "epoch": 0.2133027052525791, "grad_norm": 1.8907544612884521, "learning_rate": 0.0004999326061318725, "loss": 7.5749, "step": 15600 }, { "epoch": 0.21343943775594615, "grad_norm": 1.8664382696151733, "learning_rate": 0.0004999312137972056, "loss": 7.7756, "step": 15610 }, { "epoch": 0.2135761702593132, "grad_norm": 1.393991231918335, "learning_rate": 0.0004999298072286128, "loss": 7.3106, "step": 15620 }, { "epoch": 0.21371290276268023, "grad_norm": 1.803459882736206, "learning_rate": 0.0004999283864261742, "loss": 7.6343, "step": 15630 }, { "epoch": 0.21384963526604728, "grad_norm": 1.0090181827545166, "learning_rate": 0.0004999269513899709, "loss": 7.3786, "step": 15640 }, { "epoch": 0.2139863677694143, "grad_norm": 3.93381667137146, "learning_rate": 0.0004999255021200844, "loss": 7.5104, "step": 15650 }, { "epoch": 0.21412310027278134, "grad_norm": 1.0392241477966309, "learning_rate": 0.0004999240386165972, "loss": 7.6986, "step": 15660 }, { "epoch": 0.21425983277614838, "grad_norm": 3.512235164642334, "learning_rate": 0.0004999225608795927, "loss": 7.6961, "step": 15670 }, { "epoch": 0.21439656527951542, "grad_norm": 1.4677050113677979, "learning_rate": 0.0004999210689091554, "loss": 7.735, "step": 15680 }, { "epoch": 0.21453329778288246, "grad_norm": 1.2735625505447388, "learning_rate": 0.0004999195627053697, "loss": 7.6804, "step": 15690 }, { "epoch": 0.2146700302862495, "grad_norm": 1.1657488346099854, "learning_rate": 0.0004999180422683218, "loss": 7.5332, "step": 15700 }, { "epoch": 0.21480676278961652, "grad_norm": 1.5052639245986938, "learning_rate": 0.0004999165075980982, "loss": 7.7408, "step": 15710 }, { "epoch": 0.21494349529298357, "grad_norm": 1.6795746088027954, "learning_rate": 0.0004999149586947862, "loss": 7.6645, "step": 15720 }, { "epoch": 0.2150802277963506, "grad_norm": 1.35969078540802, "learning_rate": 0.000499913395558474, "loss": 7.482, "step": 15730 }, { "epoch": 0.21521696029971765, "grad_norm": 2.1430962085723877, "learning_rate": 0.0004999118181892509, "loss": 7.6319, "step": 15740 }, { "epoch": 0.2153536928030847, "grad_norm": 1.3019132614135742, "learning_rate": 0.0004999102265872064, "loss": 7.6508, "step": 15750 }, { "epoch": 0.2154904253064517, "grad_norm": 1.6118258237838745, "learning_rate": 0.0004999086207524312, "loss": 7.8143, "step": 15760 }, { "epoch": 0.21562715780981875, "grad_norm": 0.9388249516487122, "learning_rate": 0.0004999070006850168, "loss": 7.6001, "step": 15770 }, { "epoch": 0.2157638903131858, "grad_norm": 1.1383295059204102, "learning_rate": 0.0004999053663850556, "loss": 7.7476, "step": 15780 }, { "epoch": 0.21590062281655284, "grad_norm": 1.1523910760879517, "learning_rate": 0.0004999037178526405, "loss": 7.5888, "step": 15790 }, { "epoch": 0.21603735531991988, "grad_norm": 1.31768798828125, "learning_rate": 0.0004999020550878655, "loss": 7.7263, "step": 15800 }, { "epoch": 0.21617408782328693, "grad_norm": 1.0293883085250854, "learning_rate": 0.0004999003780908252, "loss": 7.3973, "step": 15810 }, { "epoch": 0.21631082032665394, "grad_norm": 1.2941296100616455, "learning_rate": 0.0004998986868616152, "loss": 7.7379, "step": 15820 }, { "epoch": 0.21644755283002098, "grad_norm": 0.9610562920570374, "learning_rate": 0.0004998969814003318, "loss": 7.4903, "step": 15830 }, { "epoch": 0.21658428533338803, "grad_norm": 1.2600958347320557, "learning_rate": 0.000499895261707072, "loss": 7.6558, "step": 15840 }, { "epoch": 0.21672101783675507, "grad_norm": 1.0415008068084717, "learning_rate": 0.000499893527781934, "loss": 7.6034, "step": 15850 }, { "epoch": 0.2168577503401221, "grad_norm": 1.021538257598877, "learning_rate": 0.0004998917796250162, "loss": 7.5295, "step": 15860 }, { "epoch": 0.21699448284348913, "grad_norm": 1.1037408113479614, "learning_rate": 0.0004998900172364185, "loss": 7.5957, "step": 15870 }, { "epoch": 0.21713121534685617, "grad_norm": 1.139426350593567, "learning_rate": 0.0004998882406162411, "loss": 7.7252, "step": 15880 }, { "epoch": 0.21726794785022321, "grad_norm": 0.8971556425094604, "learning_rate": 0.0004998864497645851, "loss": 7.7017, "step": 15890 }, { "epoch": 0.21740468035359026, "grad_norm": 1.0901399850845337, "learning_rate": 0.0004998846446815526, "loss": 7.6542, "step": 15900 }, { "epoch": 0.2175414128569573, "grad_norm": 1.1722744703292847, "learning_rate": 0.0004998828253672465, "loss": 7.7027, "step": 15910 }, { "epoch": 0.21767814536032434, "grad_norm": 1.3913863897323608, "learning_rate": 0.0004998809918217703, "loss": 7.3794, "step": 15920 }, { "epoch": 0.21781487786369136, "grad_norm": 1.291461706161499, "learning_rate": 0.0004998791440452284, "loss": 7.5542, "step": 15930 }, { "epoch": 0.2179516103670584, "grad_norm": 1.1549406051635742, "learning_rate": 0.000499877282037726, "loss": 7.7846, "step": 15940 }, { "epoch": 0.21808834287042544, "grad_norm": 1.244102954864502, "learning_rate": 0.0004998754057993693, "loss": 7.3989, "step": 15950 }, { "epoch": 0.2182250753737925, "grad_norm": 2.1532487869262695, "learning_rate": 0.000499873515330265, "loss": 7.8104, "step": 15960 }, { "epoch": 0.21836180787715953, "grad_norm": 1.1172634363174438, "learning_rate": 0.0004998716106305208, "loss": 7.734, "step": 15970 }, { "epoch": 0.21849854038052655, "grad_norm": 1.0520926713943481, "learning_rate": 0.0004998696917002453, "loss": 7.6268, "step": 15980 }, { "epoch": 0.2186352728838936, "grad_norm": 0.8580245971679688, "learning_rate": 0.0004998677585395476, "loss": 7.8198, "step": 15990 }, { "epoch": 0.21877200538726063, "grad_norm": 0.9490774273872375, "learning_rate": 0.0004998658111485379, "loss": 7.4563, "step": 16000 }, { "epoch": 0.21890873789062767, "grad_norm": 2.0900259017944336, "learning_rate": 0.0004998638495273271, "loss": 7.4614, "step": 16010 }, { "epoch": 0.21904547039399472, "grad_norm": 1.6525896787643433, "learning_rate": 0.000499861873676027, "loss": 7.5939, "step": 16020 }, { "epoch": 0.21918220289736173, "grad_norm": 1.1235463619232178, "learning_rate": 0.0004998598835947498, "loss": 7.6287, "step": 16030 }, { "epoch": 0.21931893540072878, "grad_norm": 1.12869131565094, "learning_rate": 0.0004998578792836093, "loss": 7.474, "step": 16040 }, { "epoch": 0.21945566790409582, "grad_norm": 1.3941190242767334, "learning_rate": 0.0004998558607427194, "loss": 7.4624, "step": 16050 }, { "epoch": 0.21959240040746286, "grad_norm": 1.3902074098587036, "learning_rate": 0.000499853827972195, "loss": 7.7352, "step": 16060 }, { "epoch": 0.2197291329108299, "grad_norm": 1.7761439085006714, "learning_rate": 0.000499851780972152, "loss": 7.6208, "step": 16070 }, { "epoch": 0.21986586541419695, "grad_norm": 1.0167367458343506, "learning_rate": 0.0004998497197427069, "loss": 7.8174, "step": 16080 }, { "epoch": 0.22000259791756396, "grad_norm": 1.3227018117904663, "learning_rate": 0.0004998476442839771, "loss": 7.5334, "step": 16090 }, { "epoch": 0.220139330420931, "grad_norm": 1.0175882577896118, "learning_rate": 0.0004998455545960808, "loss": 7.6797, "step": 16100 }, { "epoch": 0.22027606292429805, "grad_norm": 1.3937046527862549, "learning_rate": 0.000499843450679137, "loss": 7.5925, "step": 16110 }, { "epoch": 0.2204127954276651, "grad_norm": 1.9769172668457031, "learning_rate": 0.0004998413325332657, "loss": 7.5807, "step": 16120 }, { "epoch": 0.22054952793103214, "grad_norm": 1.7836180925369263, "learning_rate": 0.0004998392001585873, "loss": 7.5465, "step": 16130 }, { "epoch": 0.22068626043439915, "grad_norm": 1.5882618427276611, "learning_rate": 0.0004998370535552233, "loss": 7.7325, "step": 16140 }, { "epoch": 0.2208229929377662, "grad_norm": 1.339685082435608, "learning_rate": 0.000499834892723296, "loss": 7.8771, "step": 16150 }, { "epoch": 0.22095972544113324, "grad_norm": 0.9258803129196167, "learning_rate": 0.0004998327176629284, "loss": 7.5771, "step": 16160 }, { "epoch": 0.22109645794450028, "grad_norm": 1.2222076654434204, "learning_rate": 0.0004998305283742445, "loss": 7.8058, "step": 16170 }, { "epoch": 0.22123319044786732, "grad_norm": 3.3808753490448, "learning_rate": 0.0004998283248573688, "loss": 8.0244, "step": 16180 }, { "epoch": 0.22136992295123437, "grad_norm": 0.9095616936683655, "learning_rate": 0.000499826107112427, "loss": 7.6714, "step": 16190 }, { "epoch": 0.22150665545460138, "grad_norm": 2.7385363578796387, "learning_rate": 0.0004998238751395452, "loss": 7.9515, "step": 16200 }, { "epoch": 0.22164338795796842, "grad_norm": 2.2244482040405273, "learning_rate": 0.0004998216289388506, "loss": 7.6441, "step": 16210 }, { "epoch": 0.22178012046133547, "grad_norm": 1.7264529466629028, "learning_rate": 0.000499819368510471, "loss": 7.5918, "step": 16220 }, { "epoch": 0.2219168529647025, "grad_norm": 1.190238356590271, "learning_rate": 0.0004998170938545354, "loss": 7.3616, "step": 16230 }, { "epoch": 0.22205358546806955, "grad_norm": 3.1430206298828125, "learning_rate": 0.0004998148049711733, "loss": 7.446, "step": 16240 }, { "epoch": 0.22219031797143657, "grad_norm": 1.1752808094024658, "learning_rate": 0.0004998125018605148, "loss": 7.4311, "step": 16250 }, { "epoch": 0.2223270504748036, "grad_norm": 1.6480495929718018, "learning_rate": 0.0004998101845226913, "loss": 7.7055, "step": 16260 }, { "epoch": 0.22246378297817065, "grad_norm": 1.904813289642334, "learning_rate": 0.0004998078529578347, "loss": 7.7378, "step": 16270 }, { "epoch": 0.2226005154815377, "grad_norm": 2.4978225231170654, "learning_rate": 0.0004998055071660779, "loss": 7.576, "step": 16280 }, { "epoch": 0.22273724798490474, "grad_norm": 1.0353540182113647, "learning_rate": 0.0004998031471475542, "loss": 7.8159, "step": 16290 }, { "epoch": 0.22287398048827176, "grad_norm": 1.3757723569869995, "learning_rate": 0.0004998007729023982, "loss": 7.6083, "step": 16300 }, { "epoch": 0.2230107129916388, "grad_norm": 1.8960983753204346, "learning_rate": 0.0004997983844307452, "loss": 7.4831, "step": 16310 }, { "epoch": 0.22314744549500584, "grad_norm": 1.6326848268508911, "learning_rate": 0.0004997959817327311, "loss": 7.575, "step": 16320 }, { "epoch": 0.22328417799837288, "grad_norm": 1.1441831588745117, "learning_rate": 0.0004997935648084928, "loss": 7.5895, "step": 16330 }, { "epoch": 0.22342091050173993, "grad_norm": 1.4432674646377563, "learning_rate": 0.000499791133658168, "loss": 7.7187, "step": 16340 }, { "epoch": 0.22355764300510697, "grad_norm": 1.2116012573242188, "learning_rate": 0.0004997886882818948, "loss": 7.4678, "step": 16350 }, { "epoch": 0.22369437550847399, "grad_norm": 1.1084117889404297, "learning_rate": 0.000499786228679813, "loss": 7.357, "step": 16360 }, { "epoch": 0.22383110801184103, "grad_norm": 1.4304208755493164, "learning_rate": 0.0004997837548520623, "loss": 7.5934, "step": 16370 }, { "epoch": 0.22396784051520807, "grad_norm": 3.857400894165039, "learning_rate": 0.0004997812667987837, "loss": 7.4302, "step": 16380 }, { "epoch": 0.22410457301857512, "grad_norm": 1.3161513805389404, "learning_rate": 0.0004997787645201191, "loss": 7.903, "step": 16390 }, { "epoch": 0.22424130552194216, "grad_norm": 2.2459518909454346, "learning_rate": 0.0004997762480162106, "loss": 7.5914, "step": 16400 }, { "epoch": 0.22437803802530917, "grad_norm": 1.5359729528427124, "learning_rate": 0.0004997737172872018, "loss": 7.4401, "step": 16410 }, { "epoch": 0.22451477052867622, "grad_norm": 1.3358901739120483, "learning_rate": 0.0004997711723332367, "loss": 7.5418, "step": 16420 }, { "epoch": 0.22465150303204326, "grad_norm": 1.4082071781158447, "learning_rate": 0.0004997686131544604, "loss": 7.3643, "step": 16430 }, { "epoch": 0.2247882355354103, "grad_norm": 0.9140551090240479, "learning_rate": 0.0004997660397510186, "loss": 7.7474, "step": 16440 }, { "epoch": 0.22492496803877735, "grad_norm": 1.1706935167312622, "learning_rate": 0.0004997634521230576, "loss": 7.6625, "step": 16450 }, { "epoch": 0.2250617005421444, "grad_norm": 1.521277904510498, "learning_rate": 0.0004997608502707252, "loss": 7.5485, "step": 16460 }, { "epoch": 0.2251984330455114, "grad_norm": 1.9024567604064941, "learning_rate": 0.0004997582341941693, "loss": 7.7431, "step": 16470 }, { "epoch": 0.22533516554887845, "grad_norm": 1.8511314392089844, "learning_rate": 0.0004997556038935389, "loss": 7.5796, "step": 16480 }, { "epoch": 0.2254718980522455, "grad_norm": 1.8124295473098755, "learning_rate": 0.0004997529593689838, "loss": 7.5599, "step": 16490 }, { "epoch": 0.22560863055561253, "grad_norm": 1.4068995714187622, "learning_rate": 0.0004997503006206546, "loss": 7.4016, "step": 16500 }, { "epoch": 0.22574536305897958, "grad_norm": 1.2300580739974976, "learning_rate": 0.0004997476276487029, "loss": 7.6999, "step": 16510 }, { "epoch": 0.2258820955623466, "grad_norm": 1.2043216228485107, "learning_rate": 0.0004997449404532808, "loss": 7.8317, "step": 16520 }, { "epoch": 0.22601882806571363, "grad_norm": 1.1960924863815308, "learning_rate": 0.0004997422390345413, "loss": 7.7065, "step": 16530 }, { "epoch": 0.22615556056908068, "grad_norm": 1.447993516921997, "learning_rate": 0.0004997395233926383, "loss": 7.4902, "step": 16540 }, { "epoch": 0.22629229307244772, "grad_norm": 1.5161387920379639, "learning_rate": 0.0004997367935277265, "loss": 7.7298, "step": 16550 }, { "epoch": 0.22642902557581476, "grad_norm": 1.2044456005096436, "learning_rate": 0.0004997340494399613, "loss": 7.9792, "step": 16560 }, { "epoch": 0.2265657580791818, "grad_norm": 1.0110689401626587, "learning_rate": 0.0004997312911294989, "loss": 7.6096, "step": 16570 }, { "epoch": 0.22670249058254882, "grad_norm": 1.0162736177444458, "learning_rate": 0.0004997285185964966, "loss": 7.5033, "step": 16580 }, { "epoch": 0.22683922308591586, "grad_norm": 1.7953959703445435, "learning_rate": 0.0004997257318411121, "loss": 7.5724, "step": 16590 }, { "epoch": 0.2269759555892829, "grad_norm": 1.1831449270248413, "learning_rate": 0.0004997229308635043, "loss": 7.7779, "step": 16600 }, { "epoch": 0.22711268809264995, "grad_norm": 1.1019762754440308, "learning_rate": 0.0004997201156638326, "loss": 7.8203, "step": 16610 }, { "epoch": 0.227249420596017, "grad_norm": 0.9311403632164001, "learning_rate": 0.0004997172862422573, "loss": 7.7793, "step": 16620 }, { "epoch": 0.227386153099384, "grad_norm": 0.903088390827179, "learning_rate": 0.0004997144425989397, "loss": 7.6249, "step": 16630 }, { "epoch": 0.22752288560275105, "grad_norm": 1.138128638267517, "learning_rate": 0.0004997115847340415, "loss": 7.5607, "step": 16640 }, { "epoch": 0.2276596181061181, "grad_norm": 1.8119481801986694, "learning_rate": 0.0004997087126477256, "loss": 7.6668, "step": 16650 }, { "epoch": 0.22779635060948514, "grad_norm": 0.7382458448410034, "learning_rate": 0.0004997058263401556, "loss": 7.7236, "step": 16660 }, { "epoch": 0.22793308311285218, "grad_norm": 1.1723310947418213, "learning_rate": 0.0004997029258114959, "loss": 7.2304, "step": 16670 }, { "epoch": 0.2280698156162192, "grad_norm": 1.469382643699646, "learning_rate": 0.0004997000110619116, "loss": 7.6582, "step": 16680 }, { "epoch": 0.22820654811958624, "grad_norm": 2.0325839519500732, "learning_rate": 0.0004996970820915687, "loss": 7.5904, "step": 16690 }, { "epoch": 0.22834328062295328, "grad_norm": 0.8636789321899414, "learning_rate": 0.0004996941389006341, "loss": 7.6259, "step": 16700 }, { "epoch": 0.22848001312632032, "grad_norm": 1.4707423448562622, "learning_rate": 0.0004996911814892752, "loss": 7.4047, "step": 16710 }, { "epoch": 0.22861674562968737, "grad_norm": 1.0815513134002686, "learning_rate": 0.0004996882098576608, "loss": 7.5967, "step": 16720 }, { "epoch": 0.2287534781330544, "grad_norm": 1.1037485599517822, "learning_rate": 0.0004996852240059599, "loss": 7.5346, "step": 16730 }, { "epoch": 0.22889021063642143, "grad_norm": 1.4246469736099243, "learning_rate": 0.0004996822239343425, "loss": 7.7791, "step": 16740 }, { "epoch": 0.22902694313978847, "grad_norm": 1.318568468093872, "learning_rate": 0.0004996792096429796, "loss": 7.3951, "step": 16750 }, { "epoch": 0.2291636756431555, "grad_norm": 1.526110053062439, "learning_rate": 0.0004996761811320427, "loss": 7.7363, "step": 16760 }, { "epoch": 0.22930040814652256, "grad_norm": 1.8099855184555054, "learning_rate": 0.0004996731384017044, "loss": 7.4855, "step": 16770 }, { "epoch": 0.2294371406498896, "grad_norm": 1.5521479845046997, "learning_rate": 0.000499670081452138, "loss": 7.7136, "step": 16780 }, { "epoch": 0.2295738731532566, "grad_norm": 1.708254098892212, "learning_rate": 0.0004996670102835176, "loss": 7.6643, "step": 16790 }, { "epoch": 0.22971060565662366, "grad_norm": 1.3490891456604004, "learning_rate": 0.0004996639248960181, "loss": 7.5908, "step": 16800 }, { "epoch": 0.2298473381599907, "grad_norm": 1.4135653972625732, "learning_rate": 0.0004996608252898151, "loss": 7.5042, "step": 16810 }, { "epoch": 0.22998407066335774, "grad_norm": 0.8796528577804565, "learning_rate": 0.0004996577114650852, "loss": 7.4422, "step": 16820 }, { "epoch": 0.23012080316672479, "grad_norm": 1.3997764587402344, "learning_rate": 0.0004996545834220058, "loss": 7.3054, "step": 16830 }, { "epoch": 0.23025753567009183, "grad_norm": 1.4947447776794434, "learning_rate": 0.000499651441160755, "loss": 7.8937, "step": 16840 }, { "epoch": 0.23039426817345884, "grad_norm": 1.2583764791488647, "learning_rate": 0.0004996482846815118, "loss": 7.6587, "step": 16850 }, { "epoch": 0.2305310006768259, "grad_norm": 1.4415098428726196, "learning_rate": 0.0004996451139844558, "loss": 7.5908, "step": 16860 }, { "epoch": 0.23066773318019293, "grad_norm": 1.18305242061615, "learning_rate": 0.0004996419290697678, "loss": 7.5666, "step": 16870 }, { "epoch": 0.23080446568355997, "grad_norm": 1.2788137197494507, "learning_rate": 0.0004996387299376291, "loss": 7.4888, "step": 16880 }, { "epoch": 0.23094119818692702, "grad_norm": 1.4855389595031738, "learning_rate": 0.0004996355165882218, "loss": 7.8352, "step": 16890 }, { "epoch": 0.23107793069029403, "grad_norm": 1.6463127136230469, "learning_rate": 0.0004996322890217289, "loss": 7.4445, "step": 16900 }, { "epoch": 0.23121466319366107, "grad_norm": 1.213494896888733, "learning_rate": 0.0004996290472383345, "loss": 7.7073, "step": 16910 }, { "epoch": 0.23135139569702812, "grad_norm": 2.2788050174713135, "learning_rate": 0.0004996257912382229, "loss": 7.6018, "step": 16920 }, { "epoch": 0.23148812820039516, "grad_norm": 1.2364375591278076, "learning_rate": 0.0004996225210215796, "loss": 7.4524, "step": 16930 }, { "epoch": 0.2316248607037622, "grad_norm": 1.7353298664093018, "learning_rate": 0.0004996192365885911, "loss": 7.5885, "step": 16940 }, { "epoch": 0.23176159320712925, "grad_norm": 2.71201491355896, "learning_rate": 0.0004996159379394441, "loss": 7.6224, "step": 16950 }, { "epoch": 0.23189832571049626, "grad_norm": 1.2162377834320068, "learning_rate": 0.0004996126250743267, "loss": 7.6389, "step": 16960 }, { "epoch": 0.2320350582138633, "grad_norm": 1.263377070426941, "learning_rate": 0.0004996092979934274, "loss": 7.1596, "step": 16970 }, { "epoch": 0.23217179071723035, "grad_norm": 1.7487269639968872, "learning_rate": 0.0004996059566969357, "loss": 7.65, "step": 16980 }, { "epoch": 0.2323085232205974, "grad_norm": 1.0032119750976562, "learning_rate": 0.0004996026011850421, "loss": 7.7791, "step": 16990 }, { "epoch": 0.23244525572396443, "grad_norm": 1.1720120906829834, "learning_rate": 0.0004995992314579375, "loss": 7.5145, "step": 17000 }, { "epoch": 0.23258198822733145, "grad_norm": 1.528854250907898, "learning_rate": 0.0004995958475158139, "loss": 7.7441, "step": 17010 }, { "epoch": 0.2327187207306985, "grad_norm": 1.0786011219024658, "learning_rate": 0.0004995924493588639, "loss": 7.4493, "step": 17020 }, { "epoch": 0.23285545323406553, "grad_norm": 1.6595299243927002, "learning_rate": 0.0004995890369872812, "loss": 7.4657, "step": 17030 }, { "epoch": 0.23299218573743258, "grad_norm": 0.9923824071884155, "learning_rate": 0.0004995856104012601, "loss": 7.6375, "step": 17040 }, { "epoch": 0.23312891824079962, "grad_norm": 1.1913948059082031, "learning_rate": 0.0004995821696009957, "loss": 7.6429, "step": 17050 }, { "epoch": 0.23326565074416664, "grad_norm": 1.1072484254837036, "learning_rate": 0.0004995787145866839, "loss": 7.5299, "step": 17060 }, { "epoch": 0.23340238324753368, "grad_norm": 1.1915336847305298, "learning_rate": 0.0004995752453585215, "loss": 7.4848, "step": 17070 }, { "epoch": 0.23353911575090072, "grad_norm": 1.1566133499145508, "learning_rate": 0.0004995717619167061, "loss": 7.4999, "step": 17080 }, { "epoch": 0.23367584825426777, "grad_norm": 2.9328649044036865, "learning_rate": 0.0004995682642614362, "loss": 7.6671, "step": 17090 }, { "epoch": 0.2338125807576348, "grad_norm": 1.3721643686294556, "learning_rate": 0.0004995647523929109, "loss": 7.4577, "step": 17100 }, { "epoch": 0.23394931326100185, "grad_norm": 1.7993414402008057, "learning_rate": 0.0004995612263113302, "loss": 7.6085, "step": 17110 }, { "epoch": 0.23408604576436887, "grad_norm": 2.3915202617645264, "learning_rate": 0.0004995576860168948, "loss": 7.3015, "step": 17120 }, { "epoch": 0.2342227782677359, "grad_norm": 2.156964063644409, "learning_rate": 0.0004995541315098066, "loss": 7.6494, "step": 17130 }, { "epoch": 0.23435951077110295, "grad_norm": 0.9662595987319946, "learning_rate": 0.0004995505627902677, "loss": 7.7282, "step": 17140 }, { "epoch": 0.23449624327447, "grad_norm": 1.2331767082214355, "learning_rate": 0.0004995469798584816, "loss": 7.6755, "step": 17150 }, { "epoch": 0.23463297577783704, "grad_norm": 1.1265321969985962, "learning_rate": 0.0004995433827146523, "loss": 7.6562, "step": 17160 }, { "epoch": 0.23476970828120405, "grad_norm": 1.102457046508789, "learning_rate": 0.0004995397713589846, "loss": 7.62, "step": 17170 }, { "epoch": 0.2349064407845711, "grad_norm": 1.1306484937667847, "learning_rate": 0.0004995361457916842, "loss": 7.6732, "step": 17180 }, { "epoch": 0.23504317328793814, "grad_norm": 1.399902105331421, "learning_rate": 0.0004995325060129577, "loss": 7.8059, "step": 17190 }, { "epoch": 0.23517990579130518, "grad_norm": 1.2082411050796509, "learning_rate": 0.0004995288520230121, "loss": 7.7291, "step": 17200 }, { "epoch": 0.23531663829467223, "grad_norm": 2.1196179389953613, "learning_rate": 0.0004995251838220557, "loss": 7.6976, "step": 17210 }, { "epoch": 0.23545337079803927, "grad_norm": 1.0572993755340576, "learning_rate": 0.0004995215014102973, "loss": 7.5168, "step": 17220 }, { "epoch": 0.23559010330140628, "grad_norm": 1.3581883907318115, "learning_rate": 0.000499517804787947, "loss": 7.5785, "step": 17230 }, { "epoch": 0.23572683580477333, "grad_norm": 1.5770994424819946, "learning_rate": 0.0004995140939552149, "loss": 7.3086, "step": 17240 }, { "epoch": 0.23586356830814037, "grad_norm": 1.6240068674087524, "learning_rate": 0.0004995103689123124, "loss": 7.6069, "step": 17250 }, { "epoch": 0.2360003008115074, "grad_norm": 0.9829373955726624, "learning_rate": 0.0004995066296594517, "loss": 7.2296, "step": 17260 }, { "epoch": 0.23613703331487446, "grad_norm": 1.5297476053237915, "learning_rate": 0.0004995028761968458, "loss": 7.6845, "step": 17270 }, { "epoch": 0.23627376581824147, "grad_norm": 1.2574121952056885, "learning_rate": 0.0004994991085247085, "loss": 7.6241, "step": 17280 }, { "epoch": 0.23641049832160851, "grad_norm": 1.1922798156738281, "learning_rate": 0.0004994953266432542, "loss": 7.6309, "step": 17290 }, { "epoch": 0.23654723082497556, "grad_norm": 1.0852086544036865, "learning_rate": 0.0004994915305526985, "loss": 7.5423, "step": 17300 }, { "epoch": 0.2366839633283426, "grad_norm": 1.1103323698043823, "learning_rate": 0.0004994877202532574, "loss": 7.6227, "step": 17310 }, { "epoch": 0.23682069583170964, "grad_norm": 0.9171180725097656, "learning_rate": 0.000499483895745148, "loss": 7.5227, "step": 17320 }, { "epoch": 0.2369574283350767, "grad_norm": 1.5112558603286743, "learning_rate": 0.0004994800570285881, "loss": 7.5785, "step": 17330 }, { "epoch": 0.2370941608384437, "grad_norm": 0.9502248167991638, "learning_rate": 0.0004994762041037964, "loss": 7.2428, "step": 17340 }, { "epoch": 0.23723089334181074, "grad_norm": 1.277797818183899, "learning_rate": 0.0004994723369709921, "loss": 7.6478, "step": 17350 }, { "epoch": 0.2373676258451778, "grad_norm": 2.031177282333374, "learning_rate": 0.0004994684556303957, "loss": 7.7064, "step": 17360 }, { "epoch": 0.23750435834854483, "grad_norm": 1.7412000894546509, "learning_rate": 0.000499464560082228, "loss": 7.4481, "step": 17370 }, { "epoch": 0.23764109085191187, "grad_norm": 2.0976850986480713, "learning_rate": 0.0004994606503267111, "loss": 7.3797, "step": 17380 }, { "epoch": 0.2377778233552789, "grad_norm": 2.0378684997558594, "learning_rate": 0.0004994567263640675, "loss": 7.5213, "step": 17390 }, { "epoch": 0.23791455585864593, "grad_norm": 1.4081956148147583, "learning_rate": 0.0004994527881945209, "loss": 7.4145, "step": 17400 }, { "epoch": 0.23805128836201297, "grad_norm": 1.6314576864242554, "learning_rate": 0.0004994488358182952, "loss": 7.6, "step": 17410 }, { "epoch": 0.23818802086538002, "grad_norm": 1.183703899383545, "learning_rate": 0.0004994448692356158, "loss": 7.4324, "step": 17420 }, { "epoch": 0.23832475336874706, "grad_norm": 2.0515799522399902, "learning_rate": 0.0004994408884467085, "loss": 7.5592, "step": 17430 }, { "epoch": 0.23846148587211408, "grad_norm": 2.3891284465789795, "learning_rate": 0.0004994368934518, "loss": 7.8482, "step": 17440 }, { "epoch": 0.23859821837548112, "grad_norm": 1.6547729969024658, "learning_rate": 0.0004994328842511179, "loss": 7.7361, "step": 17450 }, { "epoch": 0.23873495087884816, "grad_norm": 1.6558972597122192, "learning_rate": 0.0004994288608448904, "loss": 7.5202, "step": 17460 }, { "epoch": 0.2388716833822152, "grad_norm": 1.6894476413726807, "learning_rate": 0.0004994248232333468, "loss": 7.5482, "step": 17470 }, { "epoch": 0.23900841588558225, "grad_norm": 0.9562554955482483, "learning_rate": 0.0004994207714167168, "loss": 7.5962, "step": 17480 }, { "epoch": 0.2391451483889493, "grad_norm": 1.6707277297973633, "learning_rate": 0.0004994167053952315, "loss": 7.576, "step": 17490 }, { "epoch": 0.2392818808923163, "grad_norm": 1.3159422874450684, "learning_rate": 0.0004994126251691222, "loss": 7.4792, "step": 17500 }, { "epoch": 0.23941861339568335, "grad_norm": 1.3596025705337524, "learning_rate": 0.0004994085307386214, "loss": 7.8598, "step": 17510 }, { "epoch": 0.2395553458990504, "grad_norm": 5.077762603759766, "learning_rate": 0.0004994044221039621, "loss": 7.5309, "step": 17520 }, { "epoch": 0.23969207840241744, "grad_norm": 0.9942232966423035, "learning_rate": 0.0004994002992653785, "loss": 7.5143, "step": 17530 }, { "epoch": 0.23982881090578448, "grad_norm": 1.0613266229629517, "learning_rate": 0.0004993961622231053, "loss": 7.6259, "step": 17540 }, { "epoch": 0.2399655434091515, "grad_norm": 0.9910652041435242, "learning_rate": 0.0004993920109773783, "loss": 7.4964, "step": 17550 }, { "epoch": 0.24010227591251854, "grad_norm": 1.0180689096450806, "learning_rate": 0.0004993878455284336, "loss": 7.568, "step": 17560 }, { "epoch": 0.24023900841588558, "grad_norm": 1.601413607597351, "learning_rate": 0.0004993836658765086, "loss": 7.4972, "step": 17570 }, { "epoch": 0.24037574091925262, "grad_norm": 1.156459927558899, "learning_rate": 0.0004993794720218414, "loss": 7.5186, "step": 17580 }, { "epoch": 0.24051247342261967, "grad_norm": 0.9947330951690674, "learning_rate": 0.0004993752639646708, "loss": 7.8382, "step": 17590 }, { "epoch": 0.2406492059259867, "grad_norm": 1.088765025138855, "learning_rate": 0.0004993710417052364, "loss": 7.4074, "step": 17600 }, { "epoch": 0.24078593842935372, "grad_norm": 1.4964425563812256, "learning_rate": 0.0004993668052437786, "loss": 7.5098, "step": 17610 }, { "epoch": 0.24092267093272077, "grad_norm": 1.868786334991455, "learning_rate": 0.0004993625545805388, "loss": 7.9174, "step": 17620 }, { "epoch": 0.2410594034360878, "grad_norm": 0.8424546718597412, "learning_rate": 0.0004993582897157591, "loss": 7.6975, "step": 17630 }, { "epoch": 0.24119613593945485, "grad_norm": 1.9922494888305664, "learning_rate": 0.0004993540106496822, "loss": 7.6307, "step": 17640 }, { "epoch": 0.2413328684428219, "grad_norm": 1.746551275253296, "learning_rate": 0.0004993497173825521, "loss": 7.9207, "step": 17650 }, { "epoch": 0.2414696009461889, "grad_norm": 1.8150897026062012, "learning_rate": 0.000499345409914613, "loss": 7.513, "step": 17660 }, { "epoch": 0.24160633344955595, "grad_norm": 1.308751106262207, "learning_rate": 0.0004993410882461104, "loss": 7.4026, "step": 17670 }, { "epoch": 0.241743065952923, "grad_norm": 1.3014847040176392, "learning_rate": 0.0004993367523772904, "loss": 7.6403, "step": 17680 }, { "epoch": 0.24187979845629004, "grad_norm": 1.5570166110992432, "learning_rate": 0.0004993324023083999, "loss": 7.4895, "step": 17690 }, { "epoch": 0.24201653095965708, "grad_norm": 1.445112943649292, "learning_rate": 0.0004993280380396867, "loss": 7.6968, "step": 17700 }, { "epoch": 0.24215326346302413, "grad_norm": 1.1583020687103271, "learning_rate": 0.0004993236595713992, "loss": 7.4075, "step": 17710 }, { "epoch": 0.24228999596639114, "grad_norm": 1.521379828453064, "learning_rate": 0.0004993192669037871, "loss": 7.7281, "step": 17720 }, { "epoch": 0.24242672846975818, "grad_norm": 1.8517979383468628, "learning_rate": 0.0004993148600371001, "loss": 7.7228, "step": 17730 }, { "epoch": 0.24256346097312523, "grad_norm": 1.5239951610565186, "learning_rate": 0.0004993104389715894, "loss": 7.5966, "step": 17740 }, { "epoch": 0.24270019347649227, "grad_norm": 2.019524335861206, "learning_rate": 0.000499306003707507, "loss": 7.4834, "step": 17750 }, { "epoch": 0.2428369259798593, "grad_norm": 1.2037359476089478, "learning_rate": 0.0004993015542451052, "loss": 7.8414, "step": 17760 }, { "epoch": 0.24297365848322633, "grad_norm": 1.273065447807312, "learning_rate": 0.0004992970905846374, "loss": 7.7198, "step": 17770 }, { "epoch": 0.24311039098659337, "grad_norm": 1.1162126064300537, "learning_rate": 0.000499292612726358, "loss": 7.4269, "step": 17780 }, { "epoch": 0.24324712348996042, "grad_norm": 1.2985968589782715, "learning_rate": 0.0004992881206705218, "loss": 7.5769, "step": 17790 }, { "epoch": 0.24338385599332746, "grad_norm": 0.9630876779556274, "learning_rate": 0.000499283614417385, "loss": 7.6913, "step": 17800 }, { "epoch": 0.2435205884966945, "grad_norm": 1.0058718919754028, "learning_rate": 0.0004992790939672039, "loss": 7.8062, "step": 17810 }, { "epoch": 0.24365732100006152, "grad_norm": 0.9029175043106079, "learning_rate": 0.0004992745593202359, "loss": 7.6533, "step": 17820 }, { "epoch": 0.24379405350342856, "grad_norm": 1.547226071357727, "learning_rate": 0.0004992700104767395, "loss": 7.6794, "step": 17830 }, { "epoch": 0.2439307860067956, "grad_norm": 1.0150505304336548, "learning_rate": 0.0004992654474369737, "loss": 7.5395, "step": 17840 }, { "epoch": 0.24406751851016265, "grad_norm": 1.9977400302886963, "learning_rate": 0.0004992608702011983, "loss": 7.7536, "step": 17850 }, { "epoch": 0.2442042510135297, "grad_norm": 1.2806378602981567, "learning_rate": 0.0004992562787696739, "loss": 7.7047, "step": 17860 }, { "epoch": 0.24434098351689673, "grad_norm": 1.1984838247299194, "learning_rate": 0.0004992516731426622, "loss": 7.5169, "step": 17870 }, { "epoch": 0.24447771602026375, "grad_norm": 1.6974879503250122, "learning_rate": 0.0004992470533204254, "loss": 7.6614, "step": 17880 }, { "epoch": 0.2446144485236308, "grad_norm": 0.8744622468948364, "learning_rate": 0.0004992424193032266, "loss": 7.7827, "step": 17890 }, { "epoch": 0.24475118102699783, "grad_norm": 2.391650915145874, "learning_rate": 0.0004992377710913296, "loss": 7.4182, "step": 17900 }, { "epoch": 0.24488791353036488, "grad_norm": 2.0696475505828857, "learning_rate": 0.0004992331086849993, "loss": 7.7412, "step": 17910 }, { "epoch": 0.24502464603373192, "grad_norm": 1.6297105550765991, "learning_rate": 0.0004992284320845012, "loss": 7.7722, "step": 17920 }, { "epoch": 0.24516137853709893, "grad_norm": 1.8902825117111206, "learning_rate": 0.0004992237412901016, "loss": 7.4053, "step": 17930 }, { "epoch": 0.24529811104046598, "grad_norm": 1.546518087387085, "learning_rate": 0.0004992190363020677, "loss": 7.6861, "step": 17940 }, { "epoch": 0.24543484354383302, "grad_norm": 1.6953368186950684, "learning_rate": 0.0004992143171206673, "loss": 7.6293, "step": 17950 }, { "epoch": 0.24557157604720006, "grad_norm": 1.2948001623153687, "learning_rate": 0.0004992095837461693, "loss": 7.4759, "step": 17960 }, { "epoch": 0.2457083085505671, "grad_norm": 1.053482174873352, "learning_rate": 0.0004992048361788432, "loss": 7.7023, "step": 17970 }, { "epoch": 0.24584504105393415, "grad_norm": 1.3777285814285278, "learning_rate": 0.0004992000744189595, "loss": 7.654, "step": 17980 }, { "epoch": 0.24598177355730116, "grad_norm": 0.9334141612052917, "learning_rate": 0.0004991952984667894, "loss": 7.4395, "step": 17990 }, { "epoch": 0.2461185060606682, "grad_norm": 2.861698865890503, "learning_rate": 0.0004991905083226045, "loss": 7.4961, "step": 18000 }, { "epoch": 0.24625523856403525, "grad_norm": 1.4427947998046875, "learning_rate": 0.0004991857039866783, "loss": 8.0102, "step": 18010 }, { "epoch": 0.2463919710674023, "grad_norm": 1.1613116264343262, "learning_rate": 0.0004991808854592838, "loss": 7.5297, "step": 18020 }, { "epoch": 0.24652870357076934, "grad_norm": 1.1471240520477295, "learning_rate": 0.0004991760527406956, "loss": 7.4064, "step": 18030 }, { "epoch": 0.24666543607413635, "grad_norm": 1.0103375911712646, "learning_rate": 0.0004991712058311891, "loss": 7.627, "step": 18040 }, { "epoch": 0.2468021685775034, "grad_norm": 2.04421067237854, "learning_rate": 0.0004991663447310402, "loss": 7.6435, "step": 18050 }, { "epoch": 0.24693890108087044, "grad_norm": 1.0508228540420532, "learning_rate": 0.0004991614694405258, "loss": 7.6659, "step": 18060 }, { "epoch": 0.24707563358423748, "grad_norm": 1.8803502321243286, "learning_rate": 0.0004991565799599234, "loss": 7.3963, "step": 18070 }, { "epoch": 0.24721236608760452, "grad_norm": 1.5828639268875122, "learning_rate": 0.0004991516762895117, "loss": 8.0035, "step": 18080 }, { "epoch": 0.24734909859097154, "grad_norm": 1.7288298606872559, "learning_rate": 0.0004991467584295697, "loss": 7.6161, "step": 18090 }, { "epoch": 0.24748583109433858, "grad_norm": 1.2636524438858032, "learning_rate": 0.0004991418263803778, "loss": 7.7176, "step": 18100 }, { "epoch": 0.24762256359770562, "grad_norm": 1.4811689853668213, "learning_rate": 0.0004991368801422166, "loss": 7.5216, "step": 18110 }, { "epoch": 0.24775929610107267, "grad_norm": 0.9490172863006592, "learning_rate": 0.000499131919715368, "loss": 7.4924, "step": 18120 }, { "epoch": 0.2478960286044397, "grad_norm": 1.2112376689910889, "learning_rate": 0.0004991269451001144, "loss": 7.5763, "step": 18130 }, { "epoch": 0.24803276110780675, "grad_norm": 1.3092041015625, "learning_rate": 0.000499121956296739, "loss": 7.5601, "step": 18140 }, { "epoch": 0.24816949361117377, "grad_norm": 1.1529910564422607, "learning_rate": 0.0004991169533055262, "loss": 7.4689, "step": 18150 }, { "epoch": 0.2483062261145408, "grad_norm": 0.9728964567184448, "learning_rate": 0.0004991119361267606, "loss": 7.6297, "step": 18160 }, { "epoch": 0.24844295861790786, "grad_norm": 1.4188202619552612, "learning_rate": 0.0004991069047607283, "loss": 7.6246, "step": 18170 }, { "epoch": 0.2485796911212749, "grad_norm": 1.3714021444320679, "learning_rate": 0.0004991018592077156, "loss": 7.6241, "step": 18180 }, { "epoch": 0.24871642362464194, "grad_norm": 1.2750308513641357, "learning_rate": 0.0004990967994680099, "loss": 7.4694, "step": 18190 }, { "epoch": 0.24885315612800896, "grad_norm": 1.5938801765441895, "learning_rate": 0.0004990917255418992, "loss": 7.4994, "step": 18200 }, { "epoch": 0.248989888631376, "grad_norm": 1.4693644046783447, "learning_rate": 0.0004990866374296729, "loss": 7.2834, "step": 18210 }, { "epoch": 0.24912662113474304, "grad_norm": 1.0844910144805908, "learning_rate": 0.0004990815351316203, "loss": 7.5503, "step": 18220 }, { "epoch": 0.24926335363811009, "grad_norm": 1.563923716545105, "learning_rate": 0.0004990764186480322, "loss": 7.5576, "step": 18230 }, { "epoch": 0.24940008614147713, "grad_norm": 2.6698007583618164, "learning_rate": 0.0004990712879792001, "loss": 7.4747, "step": 18240 }, { "epoch": 0.24953681864484417, "grad_norm": 1.254726529121399, "learning_rate": 0.000499066143125416, "loss": 7.4097, "step": 18250 }, { "epoch": 0.2496735511482112, "grad_norm": 1.0834953784942627, "learning_rate": 0.000499060984086973, "loss": 7.6616, "step": 18260 }, { "epoch": 0.24981028365157823, "grad_norm": 2.7741966247558594, "learning_rate": 0.0004990558108641649, "loss": 7.7878, "step": 18270 }, { "epoch": 0.24994701615494527, "grad_norm": 1.1853107213974, "learning_rate": 0.0004990506234572862, "loss": 7.6015, "step": 18280 }, { "epoch": 0.2500837486583123, "grad_norm": 2.8047690391540527, "learning_rate": 0.0004990454218666326, "loss": 7.563, "step": 18290 }, { "epoch": 0.25022048116167933, "grad_norm": 2.4032540321350098, "learning_rate": 0.0004990402060925001, "loss": 7.7209, "step": 18300 }, { "epoch": 0.2503572136650464, "grad_norm": 1.518367886543274, "learning_rate": 0.0004990349761351859, "loss": 7.4292, "step": 18310 }, { "epoch": 0.2504939461684134, "grad_norm": 1.6145540475845337, "learning_rate": 0.0004990297319949879, "loss": 7.5854, "step": 18320 }, { "epoch": 0.25063067867178046, "grad_norm": 1.4985220432281494, "learning_rate": 0.0004990244736722044, "loss": 7.8196, "step": 18330 }, { "epoch": 0.2507674111751475, "grad_norm": 1.506056785583496, "learning_rate": 0.0004990192011671352, "loss": 7.622, "step": 18340 }, { "epoch": 0.25090414367851455, "grad_norm": 0.9292852282524109, "learning_rate": 0.0004990139144800805, "loss": 7.7615, "step": 18350 }, { "epoch": 0.2510408761818816, "grad_norm": 0.8954073786735535, "learning_rate": 0.0004990086136113414, "loss": 7.5586, "step": 18360 }, { "epoch": 0.25117760868524863, "grad_norm": 1.1528100967407227, "learning_rate": 0.0004990032985612197, "loss": 7.4725, "step": 18370 }, { "epoch": 0.2513143411886157, "grad_norm": 1.0185798406600952, "learning_rate": 0.0004989979693300183, "loss": 7.5603, "step": 18380 }, { "epoch": 0.25145107369198266, "grad_norm": 2.3067831993103027, "learning_rate": 0.0004989926259180405, "loss": 7.7242, "step": 18390 }, { "epoch": 0.2515878061953497, "grad_norm": 1.786482810974121, "learning_rate": 0.0004989872683255906, "loss": 7.6488, "step": 18400 }, { "epoch": 0.25172453869871675, "grad_norm": 2.527886390686035, "learning_rate": 0.0004989818965529739, "loss": 7.7224, "step": 18410 }, { "epoch": 0.2518612712020838, "grad_norm": 1.1691285371780396, "learning_rate": 0.0004989765106004963, "loss": 8.1893, "step": 18420 }, { "epoch": 0.25199800370545083, "grad_norm": 1.325483798980713, "learning_rate": 0.0004989711104684644, "loss": 7.8334, "step": 18430 }, { "epoch": 0.2521347362088179, "grad_norm": 1.7393540143966675, "learning_rate": 0.0004989656961571858, "loss": 7.7125, "step": 18440 }, { "epoch": 0.2522714687121849, "grad_norm": 0.875489354133606, "learning_rate": 0.0004989602676669689, "loss": 7.7722, "step": 18450 }, { "epoch": 0.25240820121555196, "grad_norm": 0.889697253704071, "learning_rate": 0.0004989548249981228, "loss": 7.5542, "step": 18460 }, { "epoch": 0.252544933718919, "grad_norm": 1.8215510845184326, "learning_rate": 0.0004989493681509576, "loss": 7.5526, "step": 18470 }, { "epoch": 0.25268166622228605, "grad_norm": 1.2027640342712402, "learning_rate": 0.0004989438971257839, "loss": 7.3843, "step": 18480 }, { "epoch": 0.2528183987256531, "grad_norm": 1.0095560550689697, "learning_rate": 0.0004989384119229134, "loss": 7.4792, "step": 18490 }, { "epoch": 0.2529551312290201, "grad_norm": 1.1353298425674438, "learning_rate": 0.0004989329125426584, "loss": 7.4908, "step": 18500 }, { "epoch": 0.2530918637323871, "grad_norm": 1.3815096616744995, "learning_rate": 0.0004989273989853322, "loss": 7.424, "step": 18510 }, { "epoch": 0.25322859623575417, "grad_norm": 1.3633958101272583, "learning_rate": 0.0004989218712512488, "loss": 7.6854, "step": 18520 }, { "epoch": 0.2533653287391212, "grad_norm": 3.1032068729400635, "learning_rate": 0.0004989163293407228, "loss": 7.6902, "step": 18530 }, { "epoch": 0.25350206124248825, "grad_norm": 1.138085126876831, "learning_rate": 0.0004989107732540701, "loss": 7.4756, "step": 18540 }, { "epoch": 0.2536387937458553, "grad_norm": 1.4157466888427734, "learning_rate": 0.000498905202991607, "loss": 7.607, "step": 18550 }, { "epoch": 0.25377552624922234, "grad_norm": 0.8694984912872314, "learning_rate": 0.0004988996185536507, "loss": 7.4777, "step": 18560 }, { "epoch": 0.2539122587525894, "grad_norm": 1.9181503057479858, "learning_rate": 0.0004988940199405193, "loss": 7.637, "step": 18570 }, { "epoch": 0.2540489912559564, "grad_norm": 1.0184822082519531, "learning_rate": 0.0004988884071525317, "loss": 7.7024, "step": 18580 }, { "epoch": 0.25418572375932347, "grad_norm": 1.1445610523223877, "learning_rate": 0.0004988827801900074, "loss": 7.3784, "step": 18590 }, { "epoch": 0.2543224562626905, "grad_norm": 1.0557870864868164, "learning_rate": 0.0004988771390532671, "loss": 7.5648, "step": 18600 }, { "epoch": 0.2544591887660575, "grad_norm": 1.8417881727218628, "learning_rate": 0.0004988714837426318, "loss": 7.7678, "step": 18610 }, { "epoch": 0.25459592126942454, "grad_norm": 1.5882240533828735, "learning_rate": 0.0004988658142584236, "loss": 7.3079, "step": 18620 }, { "epoch": 0.2547326537727916, "grad_norm": 1.2581908702850342, "learning_rate": 0.0004988601306009656, "loss": 7.6291, "step": 18630 }, { "epoch": 0.2548693862761586, "grad_norm": 2.574525833129883, "learning_rate": 0.0004988544327705815, "loss": 7.7305, "step": 18640 }, { "epoch": 0.25500611877952567, "grad_norm": 1.7145907878875732, "learning_rate": 0.0004988487207675954, "loss": 7.3895, "step": 18650 }, { "epoch": 0.2551428512828927, "grad_norm": 2.7912418842315674, "learning_rate": 0.0004988429945923332, "loss": 7.6493, "step": 18660 }, { "epoch": 0.25527958378625976, "grad_norm": 1.1909515857696533, "learning_rate": 0.0004988372542451205, "loss": 7.5213, "step": 18670 }, { "epoch": 0.2554163162896268, "grad_norm": 2.81895112991333, "learning_rate": 0.0004988314997262845, "loss": 7.6657, "step": 18680 }, { "epoch": 0.25555304879299384, "grad_norm": 1.8433021306991577, "learning_rate": 0.0004988257310361528, "loss": 7.4374, "step": 18690 }, { "epoch": 0.2556897812963609, "grad_norm": 1.6108155250549316, "learning_rate": 0.0004988199481750539, "loss": 7.6122, "step": 18700 }, { "epoch": 0.25582651379972793, "grad_norm": 0.8539618253707886, "learning_rate": 0.0004988141511433174, "loss": 7.5768, "step": 18710 }, { "epoch": 0.2559632463030949, "grad_norm": 1.460782527923584, "learning_rate": 0.0004988083399412732, "loss": 7.3499, "step": 18720 }, { "epoch": 0.25609997880646196, "grad_norm": 2.672835111618042, "learning_rate": 0.0004988025145692523, "loss": 7.3533, "step": 18730 }, { "epoch": 0.256236711309829, "grad_norm": 1.1979910135269165, "learning_rate": 0.0004987966750275865, "loss": 7.508, "step": 18740 }, { "epoch": 0.25637344381319604, "grad_norm": 1.2061172723770142, "learning_rate": 0.0004987908213166084, "loss": 7.7994, "step": 18750 }, { "epoch": 0.2565101763165631, "grad_norm": 0.7515447735786438, "learning_rate": 0.0004987849534366512, "loss": 7.6909, "step": 18760 }, { "epoch": 0.25664690881993013, "grad_norm": 2.599733829498291, "learning_rate": 0.0004987790713880493, "loss": 7.3721, "step": 18770 }, { "epoch": 0.2567836413232972, "grad_norm": 2.4723501205444336, "learning_rate": 0.0004987731751711377, "loss": 7.6163, "step": 18780 }, { "epoch": 0.2569203738266642, "grad_norm": 2.013160228729248, "learning_rate": 0.000498767264786252, "loss": 7.3091, "step": 18790 }, { "epoch": 0.25705710633003126, "grad_norm": 0.8060458898544312, "learning_rate": 0.000498761340233729, "loss": 7.6019, "step": 18800 }, { "epoch": 0.2571938388333983, "grad_norm": 1.763651728630066, "learning_rate": 0.0004987554015139059, "loss": 7.5888, "step": 18810 }, { "epoch": 0.25733057133676535, "grad_norm": 1.272605061531067, "learning_rate": 0.0004987494486271212, "loss": 7.5816, "step": 18820 }, { "epoch": 0.25746730384013233, "grad_norm": 1.183836817741394, "learning_rate": 0.0004987434815737137, "loss": 7.5767, "step": 18830 }, { "epoch": 0.2576040363434994, "grad_norm": 1.0573456287384033, "learning_rate": 0.0004987375003540233, "loss": 7.5608, "step": 18840 }, { "epoch": 0.2577407688468664, "grad_norm": 1.0954632759094238, "learning_rate": 0.0004987315049683905, "loss": 7.6816, "step": 18850 }, { "epoch": 0.25787750135023346, "grad_norm": 1.009200096130371, "learning_rate": 0.0004987254954171572, "loss": 7.788, "step": 18860 }, { "epoch": 0.2580142338536005, "grad_norm": 1.085654616355896, "learning_rate": 0.0004987194717006651, "loss": 7.5547, "step": 18870 }, { "epoch": 0.25815096635696755, "grad_norm": 1.0437016487121582, "learning_rate": 0.0004987134338192576, "loss": 7.4333, "step": 18880 }, { "epoch": 0.2582876988603346, "grad_norm": 1.5986424684524536, "learning_rate": 0.0004987073817732784, "loss": 7.5467, "step": 18890 }, { "epoch": 0.25842443136370163, "grad_norm": 1.0487112998962402, "learning_rate": 0.0004987013155630721, "loss": 7.69, "step": 18900 }, { "epoch": 0.2585611638670687, "grad_norm": 2.571275234222412, "learning_rate": 0.0004986952351889846, "loss": 7.6635, "step": 18910 }, { "epoch": 0.2586978963704357, "grad_norm": 1.9795666933059692, "learning_rate": 0.0004986891406513617, "loss": 7.4428, "step": 18920 }, { "epoch": 0.25883462887380276, "grad_norm": 1.9832561016082764, "learning_rate": 0.0004986830319505508, "loss": 7.5544, "step": 18930 }, { "epoch": 0.25897136137716975, "grad_norm": 0.8950990438461304, "learning_rate": 0.0004986769090868997, "loss": 7.5495, "step": 18940 }, { "epoch": 0.2591080938805368, "grad_norm": 1.085995078086853, "learning_rate": 0.0004986707720607569, "loss": 7.5754, "step": 18950 }, { "epoch": 0.25924482638390384, "grad_norm": 1.8640674352645874, "learning_rate": 0.0004986646208724724, "loss": 7.3726, "step": 18960 }, { "epoch": 0.2593815588872709, "grad_norm": 0.9890807867050171, "learning_rate": 0.000498658455522396, "loss": 7.4576, "step": 18970 }, { "epoch": 0.2595182913906379, "grad_norm": 1.4317502975463867, "learning_rate": 0.0004986522760108791, "loss": 7.7612, "step": 18980 }, { "epoch": 0.25965502389400497, "grad_norm": 1.3388575315475464, "learning_rate": 0.0004986460823382736, "loss": 7.7161, "step": 18990 }, { "epoch": 0.259791756397372, "grad_norm": 1.1328420639038086, "learning_rate": 0.0004986398745049322, "loss": 7.7845, "step": 19000 }, { "epoch": 0.25992848890073905, "grad_norm": 1.488783836364746, "learning_rate": 0.0004986336525112085, "loss": 7.5807, "step": 19010 }, { "epoch": 0.2600652214041061, "grad_norm": 1.4142296314239502, "learning_rate": 0.0004986274163574568, "loss": 7.7371, "step": 19020 }, { "epoch": 0.26020195390747314, "grad_norm": 1.0146241188049316, "learning_rate": 0.0004986211660440323, "loss": 7.723, "step": 19030 }, { "epoch": 0.2603386864108401, "grad_norm": 1.527423620223999, "learning_rate": 0.000498614901571291, "loss": 7.7952, "step": 19040 }, { "epoch": 0.26047541891420717, "grad_norm": 0.9216548204421997, "learning_rate": 0.0004986086229395894, "loss": 7.5917, "step": 19050 }, { "epoch": 0.2606121514175742, "grad_norm": 1.4338055849075317, "learning_rate": 0.0004986023301492854, "loss": 7.5374, "step": 19060 }, { "epoch": 0.26074888392094125, "grad_norm": 1.3670905828475952, "learning_rate": 0.0004985960232007374, "loss": 7.7806, "step": 19070 }, { "epoch": 0.2608856164243083, "grad_norm": 1.3187617063522339, "learning_rate": 0.0004985897020943044, "loss": 7.5838, "step": 19080 }, { "epoch": 0.26102234892767534, "grad_norm": 1.0275932550430298, "learning_rate": 0.0004985833668303463, "loss": 7.4804, "step": 19090 }, { "epoch": 0.2611590814310424, "grad_norm": 2.0806632041931152, "learning_rate": 0.0004985770174092241, "loss": 7.5682, "step": 19100 }, { "epoch": 0.2612958139344094, "grad_norm": 3.0882813930511475, "learning_rate": 0.0004985706538312995, "loss": 7.8413, "step": 19110 }, { "epoch": 0.26143254643777647, "grad_norm": 0.852920413017273, "learning_rate": 0.0004985642760969347, "loss": 7.7353, "step": 19120 }, { "epoch": 0.2615692789411435, "grad_norm": 1.038973093032837, "learning_rate": 0.0004985578842064929, "loss": 7.6217, "step": 19130 }, { "epoch": 0.26170601144451056, "grad_norm": 1.0493212938308716, "learning_rate": 0.0004985514781603383, "loss": 7.6705, "step": 19140 }, { "epoch": 0.26184274394787754, "grad_norm": 1.1961615085601807, "learning_rate": 0.0004985450579588357, "loss": 7.6945, "step": 19150 }, { "epoch": 0.2619794764512446, "grad_norm": 0.7871989011764526, "learning_rate": 0.0004985386236023507, "loss": 7.4728, "step": 19160 }, { "epoch": 0.26211620895461163, "grad_norm": 1.3630914688110352, "learning_rate": 0.0004985321750912498, "loss": 7.8285, "step": 19170 }, { "epoch": 0.26225294145797867, "grad_norm": 1.1370784044265747, "learning_rate": 0.0004985257124259, "loss": 7.377, "step": 19180 }, { "epoch": 0.2623896739613457, "grad_norm": 1.3412201404571533, "learning_rate": 0.0004985192356066697, "loss": 7.8013, "step": 19190 }, { "epoch": 0.26252640646471276, "grad_norm": 1.1350793838500977, "learning_rate": 0.0004985127446339275, "loss": 7.5267, "step": 19200 }, { "epoch": 0.2626631389680798, "grad_norm": 1.5464365482330322, "learning_rate": 0.0004985062395080434, "loss": 7.4268, "step": 19210 }, { "epoch": 0.26279987147144684, "grad_norm": 1.1094636917114258, "learning_rate": 0.0004984997202293875, "loss": 7.4027, "step": 19220 }, { "epoch": 0.2629366039748139, "grad_norm": 1.2168954610824585, "learning_rate": 0.0004984931867983313, "loss": 7.6271, "step": 19230 }, { "epoch": 0.26307333647818093, "grad_norm": 0.9779899716377258, "learning_rate": 0.0004984866392152467, "loss": 7.6609, "step": 19240 }, { "epoch": 0.263210068981548, "grad_norm": 1.3866450786590576, "learning_rate": 0.000498480077480507, "loss": 7.4178, "step": 19250 }, { "epoch": 0.26334680148491496, "grad_norm": 1.2122762203216553, "learning_rate": 0.0004984735015944854, "loss": 7.5023, "step": 19260 }, { "epoch": 0.263483533988282, "grad_norm": 1.0081615447998047, "learning_rate": 0.0004984669115575567, "loss": 7.4282, "step": 19270 }, { "epoch": 0.26362026649164905, "grad_norm": 0.8919296264648438, "learning_rate": 0.0004984603073700961, "loss": 7.3331, "step": 19280 }, { "epoch": 0.2637569989950161, "grad_norm": 1.633852243423462, "learning_rate": 0.0004984536890324798, "loss": 7.4298, "step": 19290 }, { "epoch": 0.26389373149838313, "grad_norm": 2.549999713897705, "learning_rate": 0.0004984470565450846, "loss": 7.4851, "step": 19300 }, { "epoch": 0.2640304640017502, "grad_norm": 1.8197540044784546, "learning_rate": 0.0004984404099082885, "loss": 7.5104, "step": 19310 }, { "epoch": 0.2641671965051172, "grad_norm": 1.2609333992004395, "learning_rate": 0.0004984337491224697, "loss": 7.6528, "step": 19320 }, { "epoch": 0.26430392900848426, "grad_norm": 4.450305461883545, "learning_rate": 0.0004984270741880077, "loss": 7.5925, "step": 19330 }, { "epoch": 0.2644406615118513, "grad_norm": 0.8734875321388245, "learning_rate": 0.0004984203851052827, "loss": 7.5106, "step": 19340 }, { "epoch": 0.26457739401521835, "grad_norm": 2.737673282623291, "learning_rate": 0.0004984136818746755, "loss": 7.5874, "step": 19350 }, { "epoch": 0.2647141265185854, "grad_norm": 1.005854845046997, "learning_rate": 0.0004984069644965681, "loss": 7.6826, "step": 19360 }, { "epoch": 0.2648508590219524, "grad_norm": 1.4372435808181763, "learning_rate": 0.0004984002329713427, "loss": 7.402, "step": 19370 }, { "epoch": 0.2649875915253194, "grad_norm": 1.1452444791793823, "learning_rate": 0.0004983934872993831, "loss": 7.7139, "step": 19380 }, { "epoch": 0.26512432402868646, "grad_norm": 1.6979820728302002, "learning_rate": 0.0004983867274810731, "loss": 7.4611, "step": 19390 }, { "epoch": 0.2652610565320535, "grad_norm": 1.0635861158370972, "learning_rate": 0.0004983799535167979, "loss": 7.5728, "step": 19400 }, { "epoch": 0.26539778903542055, "grad_norm": 1.1610472202301025, "learning_rate": 0.0004983731654069431, "loss": 7.6051, "step": 19410 }, { "epoch": 0.2655345215387876, "grad_norm": 2.2420661449432373, "learning_rate": 0.0004983663631518956, "loss": 7.4945, "step": 19420 }, { "epoch": 0.26567125404215464, "grad_norm": 1.0775768756866455, "learning_rate": 0.0004983595467520425, "loss": 7.7576, "step": 19430 }, { "epoch": 0.2658079865455217, "grad_norm": 1.257126808166504, "learning_rate": 0.0004983527162077722, "loss": 7.689, "step": 19440 }, { "epoch": 0.2659447190488887, "grad_norm": 1.505805253982544, "learning_rate": 0.0004983458715194735, "loss": 7.4938, "step": 19450 }, { "epoch": 0.26608145155225577, "grad_norm": 1.3868049383163452, "learning_rate": 0.0004983390126875364, "loss": 7.4117, "step": 19460 }, { "epoch": 0.2662181840556228, "grad_norm": 0.9352881908416748, "learning_rate": 0.0004983321397123515, "loss": 7.2707, "step": 19470 }, { "epoch": 0.2663549165589898, "grad_norm": 1.0881398916244507, "learning_rate": 0.0004983252525943101, "loss": 7.6262, "step": 19480 }, { "epoch": 0.26649164906235684, "grad_norm": 0.8698849678039551, "learning_rate": 0.0004983183513338047, "loss": 7.5132, "step": 19490 }, { "epoch": 0.2666283815657239, "grad_norm": 1.3366549015045166, "learning_rate": 0.0004983114359312279, "loss": 7.7698, "step": 19500 }, { "epoch": 0.2667651140690909, "grad_norm": 1.4325461387634277, "learning_rate": 0.000498304506386974, "loss": 7.7149, "step": 19510 }, { "epoch": 0.26690184657245797, "grad_norm": 0.961284875869751, "learning_rate": 0.0004982975627014373, "loss": 7.4696, "step": 19520 }, { "epoch": 0.267038579075825, "grad_norm": 1.1256675720214844, "learning_rate": 0.0004982906048750135, "loss": 7.7533, "step": 19530 }, { "epoch": 0.26717531157919205, "grad_norm": 1.8364591598510742, "learning_rate": 0.0004982836329080987, "loss": 7.4593, "step": 19540 }, { "epoch": 0.2673120440825591, "grad_norm": 1.4638603925704956, "learning_rate": 0.00049827664680109, "loss": 7.4116, "step": 19550 }, { "epoch": 0.26744877658592614, "grad_norm": 1.0733866691589355, "learning_rate": 0.0004982696465543853, "loss": 7.8438, "step": 19560 }, { "epoch": 0.2675855090892932, "grad_norm": 1.0742638111114502, "learning_rate": 0.0004982626321683833, "loss": 7.6717, "step": 19570 }, { "epoch": 0.2677222415926602, "grad_norm": 1.4034807682037354, "learning_rate": 0.0004982556036434834, "loss": 7.3971, "step": 19580 }, { "epoch": 0.2678589740960272, "grad_norm": 1.409775733947754, "learning_rate": 0.0004982485609800859, "loss": 7.4644, "step": 19590 }, { "epoch": 0.26799570659939426, "grad_norm": 0.8849064707756042, "learning_rate": 0.000498241504178592, "loss": 7.5859, "step": 19600 }, { "epoch": 0.2681324391027613, "grad_norm": 0.9795355200767517, "learning_rate": 0.0004982344332394033, "loss": 7.4367, "step": 19610 }, { "epoch": 0.26826917160612834, "grad_norm": 1.0349668264389038, "learning_rate": 0.0004982273481629228, "loss": 7.4512, "step": 19620 }, { "epoch": 0.2684059041094954, "grad_norm": 1.088031530380249, "learning_rate": 0.000498220248949554, "loss": 7.4375, "step": 19630 }, { "epoch": 0.26854263661286243, "grad_norm": 1.1928681135177612, "learning_rate": 0.000498213135599701, "loss": 7.9873, "step": 19640 }, { "epoch": 0.26867936911622947, "grad_norm": 1.7776224613189697, "learning_rate": 0.0004982060081137692, "loss": 7.6241, "step": 19650 }, { "epoch": 0.2688161016195965, "grad_norm": 1.575730323791504, "learning_rate": 0.0004981988664921642, "loss": 7.4505, "step": 19660 }, { "epoch": 0.26895283412296356, "grad_norm": 1.433039903640747, "learning_rate": 0.0004981917107352928, "loss": 7.6071, "step": 19670 }, { "epoch": 0.2690895666263306, "grad_norm": 1.1992459297180176, "learning_rate": 0.0004981845408435628, "loss": 7.5929, "step": 19680 }, { "epoch": 0.26922629912969764, "grad_norm": 0.9770108461380005, "learning_rate": 0.0004981773568173822, "loss": 7.611, "step": 19690 }, { "epoch": 0.26936303163306463, "grad_norm": 1.2122894525527954, "learning_rate": 0.0004981701586571604, "loss": 7.5346, "step": 19700 }, { "epoch": 0.2694997641364317, "grad_norm": 0.9945498704910278, "learning_rate": 0.0004981629463633072, "loss": 7.8027, "step": 19710 }, { "epoch": 0.2696364966397987, "grad_norm": 1.0399521589279175, "learning_rate": 0.0004981557199362334, "loss": 7.479, "step": 19720 }, { "epoch": 0.26977322914316576, "grad_norm": 1.2424966096878052, "learning_rate": 0.0004981484793763504, "loss": 7.6401, "step": 19730 }, { "epoch": 0.2699099616465328, "grad_norm": 2.1341049671173096, "learning_rate": 0.0004981412246840707, "loss": 7.4754, "step": 19740 }, { "epoch": 0.27004669414989985, "grad_norm": 1.6211200952529907, "learning_rate": 0.0004981339558598075, "loss": 7.6133, "step": 19750 }, { "epoch": 0.2701834266532669, "grad_norm": 0.9914249777793884, "learning_rate": 0.0004981266729039748, "loss": 7.6855, "step": 19760 }, { "epoch": 0.27032015915663393, "grad_norm": 0.8406767249107361, "learning_rate": 0.0004981193758169873, "loss": 7.6548, "step": 19770 }, { "epoch": 0.270456891660001, "grad_norm": 1.1495450735092163, "learning_rate": 0.0004981120645992606, "loss": 7.5118, "step": 19780 }, { "epoch": 0.270593624163368, "grad_norm": 0.9575298428535461, "learning_rate": 0.000498104739251211, "loss": 7.6373, "step": 19790 }, { "epoch": 0.270730356666735, "grad_norm": 0.8655173778533936, "learning_rate": 0.0004980973997732557, "loss": 7.7616, "step": 19800 }, { "epoch": 0.27086708917010205, "grad_norm": 1.1935997009277344, "learning_rate": 0.0004980900461658129, "loss": 7.8345, "step": 19810 }, { "epoch": 0.2710038216734691, "grad_norm": 1.0169240236282349, "learning_rate": 0.0004980826784293011, "loss": 7.6897, "step": 19820 }, { "epoch": 0.27114055417683613, "grad_norm": 2.1210179328918457, "learning_rate": 0.0004980752965641401, "loss": 7.5665, "step": 19830 }, { "epoch": 0.2712772866802032, "grad_norm": 0.9713811874389648, "learning_rate": 0.0004980679005707503, "loss": 7.5976, "step": 19840 }, { "epoch": 0.2714140191835702, "grad_norm": 1.8234552145004272, "learning_rate": 0.0004980604904495529, "loss": 7.6533, "step": 19850 }, { "epoch": 0.27155075168693726, "grad_norm": 1.3019297122955322, "learning_rate": 0.0004980530662009697, "loss": 7.7706, "step": 19860 }, { "epoch": 0.2716874841903043, "grad_norm": 1.3346588611602783, "learning_rate": 0.0004980456278254238, "loss": 7.4038, "step": 19870 }, { "epoch": 0.27182421669367135, "grad_norm": 1.3418840169906616, "learning_rate": 0.0004980381753233388, "loss": 7.3368, "step": 19880 }, { "epoch": 0.2719609491970384, "grad_norm": 0.8198357820510864, "learning_rate": 0.000498030708695139, "loss": 7.7295, "step": 19890 }, { "epoch": 0.27209768170040544, "grad_norm": 0.8247920274734497, "learning_rate": 0.0004980232279412496, "loss": 7.4744, "step": 19900 }, { "epoch": 0.2722344142037724, "grad_norm": 1.005321979522705, "learning_rate": 0.0004980157330620969, "loss": 7.8276, "step": 19910 }, { "epoch": 0.27237114670713947, "grad_norm": 1.1090692281723022, "learning_rate": 0.0004980082240581076, "loss": 7.7562, "step": 19920 }, { "epoch": 0.2725078792105065, "grad_norm": 1.1789923906326294, "learning_rate": 0.0004980007009297092, "loss": 7.555, "step": 19930 }, { "epoch": 0.27264461171387355, "grad_norm": 1.2827520370483398, "learning_rate": 0.0004979931636773302, "loss": 7.7103, "step": 19940 }, { "epoch": 0.2727813442172406, "grad_norm": 1.0432400703430176, "learning_rate": 0.0004979856123014001, "loss": 7.6918, "step": 19950 }, { "epoch": 0.27291807672060764, "grad_norm": 0.9965600967407227, "learning_rate": 0.0004979780468023487, "loss": 7.4085, "step": 19960 }, { "epoch": 0.2730548092239747, "grad_norm": 2.1248323917388916, "learning_rate": 0.0004979704671806071, "loss": 7.6046, "step": 19970 }, { "epoch": 0.2731915417273417, "grad_norm": 1.7571266889572144, "learning_rate": 0.0004979628734366067, "loss": 7.687, "step": 19980 }, { "epoch": 0.27332827423070877, "grad_norm": 1.004249095916748, "learning_rate": 0.0004979552655707802, "loss": 7.8079, "step": 19990 }, { "epoch": 0.2734650067340758, "grad_norm": 1.3590046167373657, "learning_rate": 0.0004979476435835607, "loss": 7.5404, "step": 20000 }, { "epoch": 0.27360173923744285, "grad_norm": 2.4883275032043457, "learning_rate": 0.0004979400074753824, "loss": 7.4841, "step": 20010 }, { "epoch": 0.27373847174080984, "grad_norm": 0.7996664643287659, "learning_rate": 0.0004979323572466801, "loss": 7.5286, "step": 20020 }, { "epoch": 0.2738752042441769, "grad_norm": 1.7309789657592773, "learning_rate": 0.0004979246928978896, "loss": 7.4589, "step": 20030 }, { "epoch": 0.2740119367475439, "grad_norm": 1.0447498559951782, "learning_rate": 0.0004979170144294474, "loss": 7.9396, "step": 20040 }, { "epoch": 0.27414866925091097, "grad_norm": 1.6407573223114014, "learning_rate": 0.0004979093218417907, "loss": 7.3339, "step": 20050 }, { "epoch": 0.274285401754278, "grad_norm": 1.3365434408187866, "learning_rate": 0.0004979016151353576, "loss": 7.4588, "step": 20060 }, { "epoch": 0.27442213425764506, "grad_norm": 1.8197389841079712, "learning_rate": 0.0004978938943105871, "loss": 7.6544, "step": 20070 }, { "epoch": 0.2745588667610121, "grad_norm": 2.0383524894714355, "learning_rate": 0.0004978861593679188, "loss": 7.6077, "step": 20080 }, { "epoch": 0.27469559926437914, "grad_norm": 1.1416860818862915, "learning_rate": 0.0004978784103077933, "loss": 7.5389, "step": 20090 }, { "epoch": 0.2748323317677462, "grad_norm": 1.6994792222976685, "learning_rate": 0.0004978706471306519, "loss": 7.7571, "step": 20100 }, { "epoch": 0.27496906427111323, "grad_norm": 1.2149602174758911, "learning_rate": 0.0004978628698369367, "loss": 7.7571, "step": 20110 }, { "epoch": 0.27510579677448027, "grad_norm": 1.2822175025939941, "learning_rate": 0.0004978550784270907, "loss": 7.5895, "step": 20120 }, { "epoch": 0.27524252927784726, "grad_norm": 2.452815532684326, "learning_rate": 0.0004978472729015576, "loss": 7.6769, "step": 20130 }, { "epoch": 0.2753792617812143, "grad_norm": 1.0059230327606201, "learning_rate": 0.0004978394532607818, "loss": 7.7844, "step": 20140 }, { "epoch": 0.27551599428458134, "grad_norm": 1.294081211090088, "learning_rate": 0.0004978316195052089, "loss": 7.4871, "step": 20150 }, { "epoch": 0.2756527267879484, "grad_norm": 1.3385692834854126, "learning_rate": 0.0004978237716352849, "loss": 7.5956, "step": 20160 }, { "epoch": 0.27578945929131543, "grad_norm": 2.300252676010132, "learning_rate": 0.0004978159096514567, "loss": 7.6928, "step": 20170 }, { "epoch": 0.2759261917946825, "grad_norm": 0.9943827390670776, "learning_rate": 0.000497808033554172, "loss": 7.6687, "step": 20180 }, { "epoch": 0.2760629242980495, "grad_norm": 1.247268795967102, "learning_rate": 0.0004978001433438795, "loss": 7.5391, "step": 20190 }, { "epoch": 0.27619965680141656, "grad_norm": 0.7971326112747192, "learning_rate": 0.0004977922390210285, "loss": 7.6313, "step": 20200 }, { "epoch": 0.2763363893047836, "grad_norm": 1.3838236331939697, "learning_rate": 0.0004977843205860692, "loss": 7.4892, "step": 20210 }, { "epoch": 0.27647312180815065, "grad_norm": 1.0520755052566528, "learning_rate": 0.0004977763880394526, "loss": 7.7471, "step": 20220 }, { "epoch": 0.2766098543115177, "grad_norm": 1.3102856874465942, "learning_rate": 0.0004977684413816304, "loss": 7.7712, "step": 20230 }, { "epoch": 0.2767465868148847, "grad_norm": 2.286975622177124, "learning_rate": 0.000497760480613055, "loss": 7.7502, "step": 20240 }, { "epoch": 0.2768833193182517, "grad_norm": 1.3343194723129272, "learning_rate": 0.0004977525057341801, "loss": 7.4653, "step": 20250 }, { "epoch": 0.27702005182161876, "grad_norm": 1.3526962995529175, "learning_rate": 0.0004977445167454597, "loss": 7.6746, "step": 20260 }, { "epoch": 0.2771567843249858, "grad_norm": 1.6299359798431396, "learning_rate": 0.0004977365136473489, "loss": 7.3793, "step": 20270 }, { "epoch": 0.27729351682835285, "grad_norm": 1.3243741989135742, "learning_rate": 0.0004977284964403033, "loss": 7.7158, "step": 20280 }, { "epoch": 0.2774302493317199, "grad_norm": 2.0590124130249023, "learning_rate": 0.0004977204651247796, "loss": 7.6332, "step": 20290 }, { "epoch": 0.27756698183508693, "grad_norm": 1.166724681854248, "learning_rate": 0.0004977124197012352, "loss": 7.5327, "step": 20300 }, { "epoch": 0.277703714338454, "grad_norm": 1.5874590873718262, "learning_rate": 0.0004977043601701284, "loss": 7.5519, "step": 20310 }, { "epoch": 0.277840446841821, "grad_norm": 1.7997357845306396, "learning_rate": 0.000497696286531918, "loss": 7.8602, "step": 20320 }, { "epoch": 0.27797717934518806, "grad_norm": 1.7734184265136719, "learning_rate": 0.0004976881987870639, "loss": 7.4926, "step": 20330 }, { "epoch": 0.2781139118485551, "grad_norm": 1.4426478147506714, "learning_rate": 0.0004976800969360267, "loss": 7.9464, "step": 20340 }, { "epoch": 0.2782506443519221, "grad_norm": 1.6332911252975464, "learning_rate": 0.0004976719809792679, "loss": 7.7419, "step": 20350 }, { "epoch": 0.27838737685528914, "grad_norm": 1.4556647539138794, "learning_rate": 0.0004976638509172495, "loss": 7.8302, "step": 20360 }, { "epoch": 0.2785241093586562, "grad_norm": 1.501417636871338, "learning_rate": 0.0004976557067504347, "loss": 7.4249, "step": 20370 }, { "epoch": 0.2786608418620232, "grad_norm": 2.909414529800415, "learning_rate": 0.0004976475484792873, "loss": 7.3654, "step": 20380 }, { "epoch": 0.27879757436539027, "grad_norm": 1.5272256135940552, "learning_rate": 0.0004976393761042719, "loss": 7.5208, "step": 20390 }, { "epoch": 0.2789343068687573, "grad_norm": 0.8874494433403015, "learning_rate": 0.0004976311896258539, "loss": 7.6445, "step": 20400 }, { "epoch": 0.27907103937212435, "grad_norm": 0.9266309142112732, "learning_rate": 0.0004976229890444996, "loss": 7.6613, "step": 20410 }, { "epoch": 0.2792077718754914, "grad_norm": 1.1000813245773315, "learning_rate": 0.0004976147743606759, "loss": 7.367, "step": 20420 }, { "epoch": 0.27934450437885844, "grad_norm": 1.2573210000991821, "learning_rate": 0.0004976065455748508, "loss": 7.5322, "step": 20430 }, { "epoch": 0.2794812368822255, "grad_norm": 1.2112901210784912, "learning_rate": 0.0004975983026874928, "loss": 7.6456, "step": 20440 }, { "epoch": 0.27961796938559247, "grad_norm": 1.485504150390625, "learning_rate": 0.0004975900456990714, "loss": 7.4593, "step": 20450 }, { "epoch": 0.2797547018889595, "grad_norm": 0.91875821352005, "learning_rate": 0.000497581774610057, "loss": 7.5477, "step": 20460 }, { "epoch": 0.27989143439232655, "grad_norm": 0.9273548126220703, "learning_rate": 0.0004975734894209204, "loss": 7.619, "step": 20470 }, { "epoch": 0.2800281668956936, "grad_norm": 1.7320644855499268, "learning_rate": 0.0004975651901321335, "loss": 7.4483, "step": 20480 }, { "epoch": 0.28016489939906064, "grad_norm": 1.010109782218933, "learning_rate": 0.0004975568767441691, "loss": 7.5588, "step": 20490 }, { "epoch": 0.2803016319024277, "grad_norm": 1.741013526916504, "learning_rate": 0.0004975485492575006, "loss": 7.6027, "step": 20500 }, { "epoch": 0.2804383644057947, "grad_norm": 1.0165437459945679, "learning_rate": 0.0004975402076726022, "loss": 7.4613, "step": 20510 }, { "epoch": 0.28057509690916177, "grad_norm": 1.6405638456344604, "learning_rate": 0.000497531851989949, "loss": 7.2853, "step": 20520 }, { "epoch": 0.2807118294125288, "grad_norm": 1.2808644771575928, "learning_rate": 0.000497523482210017, "loss": 7.6954, "step": 20530 }, { "epoch": 0.28084856191589586, "grad_norm": 1.1908671855926514, "learning_rate": 0.0004975150983332826, "loss": 7.5689, "step": 20540 }, { "epoch": 0.2809852944192629, "grad_norm": 0.8995406031608582, "learning_rate": 0.0004975067003602234, "loss": 7.627, "step": 20550 }, { "epoch": 0.2811220269226299, "grad_norm": 1.0684261322021484, "learning_rate": 0.0004974982882913178, "loss": 7.5281, "step": 20560 }, { "epoch": 0.28125875942599693, "grad_norm": 1.6807153224945068, "learning_rate": 0.0004974898621270446, "loss": 7.4729, "step": 20570 }, { "epoch": 0.281395491929364, "grad_norm": 2.0755155086517334, "learning_rate": 0.000497481421867884, "loss": 7.6712, "step": 20580 }, { "epoch": 0.281532224432731, "grad_norm": 0.9151678085327148, "learning_rate": 0.0004974729675143164, "loss": 7.5725, "step": 20590 }, { "epoch": 0.28166895693609806, "grad_norm": 1.4273039102554321, "learning_rate": 0.0004974644990668234, "loss": 7.6342, "step": 20600 }, { "epoch": 0.2818056894394651, "grad_norm": 1.84743332862854, "learning_rate": 0.0004974560165258875, "loss": 7.5015, "step": 20610 }, { "epoch": 0.28194242194283214, "grad_norm": 1.4666941165924072, "learning_rate": 0.0004974475198919915, "loss": 7.4302, "step": 20620 }, { "epoch": 0.2820791544461992, "grad_norm": 1.5212656259536743, "learning_rate": 0.0004974390091656195, "loss": 7.4887, "step": 20630 }, { "epoch": 0.28221588694956623, "grad_norm": 1.0102906227111816, "learning_rate": 0.0004974304843472559, "loss": 7.546, "step": 20640 }, { "epoch": 0.2823526194529333, "grad_norm": 1.0500200986862183, "learning_rate": 0.0004974219454373865, "loss": 7.5581, "step": 20650 }, { "epoch": 0.2824893519563003, "grad_norm": 1.6918556690216064, "learning_rate": 0.0004974133924364975, "loss": 7.9263, "step": 20660 }, { "epoch": 0.2826260844596673, "grad_norm": 0.9494574666023254, "learning_rate": 0.000497404825345076, "loss": 7.8934, "step": 20670 }, { "epoch": 0.28276281696303435, "grad_norm": 1.2979458570480347, "learning_rate": 0.00049739624416361, "loss": 7.6, "step": 20680 }, { "epoch": 0.2828995494664014, "grad_norm": 1.4372116327285767, "learning_rate": 0.000497387648892588, "loss": 7.6873, "step": 20690 }, { "epoch": 0.28303628196976843, "grad_norm": 1.2325751781463623, "learning_rate": 0.0004973790395324996, "loss": 7.654, "step": 20700 }, { "epoch": 0.2831730144731355, "grad_norm": 1.155476450920105, "learning_rate": 0.0004973704160838353, "loss": 7.6268, "step": 20710 }, { "epoch": 0.2833097469765025, "grad_norm": 2.799118757247925, "learning_rate": 0.0004973617785470859, "loss": 7.5478, "step": 20720 }, { "epoch": 0.28344647947986956, "grad_norm": 0.92950439453125, "learning_rate": 0.0004973531269227435, "loss": 7.7843, "step": 20730 }, { "epoch": 0.2835832119832366, "grad_norm": 2.4676458835601807, "learning_rate": 0.000497344461211301, "loss": 7.8175, "step": 20740 }, { "epoch": 0.28371994448660365, "grad_norm": 1.6255677938461304, "learning_rate": 0.0004973357814132516, "loss": 7.6611, "step": 20750 }, { "epoch": 0.2838566769899707, "grad_norm": 1.4175864458084106, "learning_rate": 0.0004973270875290898, "loss": 7.3763, "step": 20760 }, { "epoch": 0.28399340949333773, "grad_norm": 1.1816003322601318, "learning_rate": 0.0004973183795593106, "loss": 7.5768, "step": 20770 }, { "epoch": 0.2841301419967047, "grad_norm": 1.2825109958648682, "learning_rate": 0.0004973096575044101, "loss": 7.6592, "step": 20780 }, { "epoch": 0.28426687450007176, "grad_norm": 0.8258045315742493, "learning_rate": 0.0004973009213648849, "loss": 7.4307, "step": 20790 }, { "epoch": 0.2844036070034388, "grad_norm": 1.243392825126648, "learning_rate": 0.0004972921711412327, "loss": 7.4711, "step": 20800 }, { "epoch": 0.28454033950680585, "grad_norm": 1.465818166732788, "learning_rate": 0.0004972834068339516, "loss": 7.6562, "step": 20810 }, { "epoch": 0.2846770720101729, "grad_norm": 1.9042543172836304, "learning_rate": 0.0004972746284435408, "loss": 7.7319, "step": 20820 }, { "epoch": 0.28481380451353994, "grad_norm": 1.500483512878418, "learning_rate": 0.0004972658359705005, "loss": 7.6941, "step": 20830 }, { "epoch": 0.284950537016907, "grad_norm": 1.4203672409057617, "learning_rate": 0.0004972570294153311, "loss": 7.3376, "step": 20840 }, { "epoch": 0.285087269520274, "grad_norm": 1.0232772827148438, "learning_rate": 0.0004972482087785343, "loss": 7.5406, "step": 20850 }, { "epoch": 0.28522400202364107, "grad_norm": 1.3998348712921143, "learning_rate": 0.0004972393740606126, "loss": 7.4506, "step": 20860 }, { "epoch": 0.2853607345270081, "grad_norm": 1.41843843460083, "learning_rate": 0.0004972305252620688, "loss": 7.6403, "step": 20870 }, { "epoch": 0.28549746703037515, "grad_norm": 1.3154149055480957, "learning_rate": 0.0004972216623834072, "loss": 7.7467, "step": 20880 }, { "epoch": 0.28563419953374214, "grad_norm": 2.0276525020599365, "learning_rate": 0.0004972127854251323, "loss": 7.6178, "step": 20890 }, { "epoch": 0.2857709320371092, "grad_norm": 1.5476179122924805, "learning_rate": 0.0004972038943877497, "loss": 7.5514, "step": 20900 }, { "epoch": 0.2859076645404762, "grad_norm": 1.0229291915893555, "learning_rate": 0.0004971949892717659, "loss": 7.6103, "step": 20910 }, { "epoch": 0.28604439704384327, "grad_norm": 1.2844929695129395, "learning_rate": 0.000497186070077688, "loss": 7.6228, "step": 20920 }, { "epoch": 0.2861811295472103, "grad_norm": 1.1317518949508667, "learning_rate": 0.0004971771368060238, "loss": 7.6276, "step": 20930 }, { "epoch": 0.28631786205057735, "grad_norm": 1.2644041776657104, "learning_rate": 0.0004971681894572822, "loss": 7.6429, "step": 20940 }, { "epoch": 0.2864545945539444, "grad_norm": 1.5923590660095215, "learning_rate": 0.0004971592280319728, "loss": 7.8271, "step": 20950 }, { "epoch": 0.28659132705731144, "grad_norm": 1.2820199728012085, "learning_rate": 0.0004971502525306058, "loss": 7.609, "step": 20960 }, { "epoch": 0.2867280595606785, "grad_norm": 1.5135822296142578, "learning_rate": 0.0004971412629536927, "loss": 7.5199, "step": 20970 }, { "epoch": 0.2868647920640455, "grad_norm": 1.1106150150299072, "learning_rate": 0.0004971322593017451, "loss": 7.355, "step": 20980 }, { "epoch": 0.28700152456741257, "grad_norm": 0.9072845578193665, "learning_rate": 0.000497123241575276, "loss": 7.547, "step": 20990 }, { "epoch": 0.28713825707077956, "grad_norm": 1.260132074356079, "learning_rate": 0.0004971142097747988, "loss": 7.5685, "step": 21000 }, { "epoch": 0.2872749895741466, "grad_norm": 1.4929306507110596, "learning_rate": 0.0004971051639008281, "loss": 7.4572, "step": 21010 }, { "epoch": 0.28741172207751364, "grad_norm": 1.4889700412750244, "learning_rate": 0.0004970961039538789, "loss": 7.8794, "step": 21020 }, { "epoch": 0.2875484545808807, "grad_norm": 1.2362583875656128, "learning_rate": 0.0004970870299344671, "loss": 7.663, "step": 21030 }, { "epoch": 0.28768518708424773, "grad_norm": 1.0872116088867188, "learning_rate": 0.0004970779418431096, "loss": 7.4514, "step": 21040 }, { "epoch": 0.28782191958761477, "grad_norm": 1.0790685415267944, "learning_rate": 0.0004970688396803241, "loss": 7.3774, "step": 21050 }, { "epoch": 0.2879586520909818, "grad_norm": 1.1298840045928955, "learning_rate": 0.0004970597234466288, "loss": 7.4769, "step": 21060 }, { "epoch": 0.28809538459434886, "grad_norm": 1.600275993347168, "learning_rate": 0.000497050593142543, "loss": 7.4679, "step": 21070 }, { "epoch": 0.2882321170977159, "grad_norm": 1.725088119506836, "learning_rate": 0.0004970414487685866, "loss": 7.7805, "step": 21080 }, { "epoch": 0.28836884960108294, "grad_norm": 1.0276761054992676, "learning_rate": 0.0004970322903252803, "loss": 7.6288, "step": 21090 }, { "epoch": 0.28850558210445, "grad_norm": 1.230541706085205, "learning_rate": 0.0004970231178131459, "loss": 7.5259, "step": 21100 }, { "epoch": 0.288642314607817, "grad_norm": 1.2832725048065186, "learning_rate": 0.0004970139312327057, "loss": 7.5624, "step": 21110 }, { "epoch": 0.288779047111184, "grad_norm": 1.2616713047027588, "learning_rate": 0.0004970047305844828, "loss": 7.8494, "step": 21120 }, { "epoch": 0.28891577961455106, "grad_norm": 1.287491798400879, "learning_rate": 0.0004969955158690012, "loss": 7.7271, "step": 21130 }, { "epoch": 0.2890525121179181, "grad_norm": 1.1623451709747314, "learning_rate": 0.0004969862870867858, "loss": 7.583, "step": 21140 }, { "epoch": 0.28918924462128515, "grad_norm": 0.8549175262451172, "learning_rate": 0.0004969770442383622, "loss": 7.5524, "step": 21150 }, { "epoch": 0.2893259771246522, "grad_norm": 0.9322597980499268, "learning_rate": 0.0004969677873242567, "loss": 7.6051, "step": 21160 }, { "epoch": 0.28946270962801923, "grad_norm": 1.9617844820022583, "learning_rate": 0.0004969585163449965, "loss": 7.4069, "step": 21170 }, { "epoch": 0.2895994421313863, "grad_norm": 1.2075450420379639, "learning_rate": 0.0004969492313011097, "loss": 7.8355, "step": 21180 }, { "epoch": 0.2897361746347533, "grad_norm": 1.5133360624313354, "learning_rate": 0.0004969399321931249, "loss": 7.7394, "step": 21190 }, { "epoch": 0.28987290713812036, "grad_norm": 1.1054067611694336, "learning_rate": 0.0004969306190215719, "loss": 7.6824, "step": 21200 }, { "epoch": 0.29000963964148735, "grad_norm": 1.0213055610656738, "learning_rate": 0.000496921291786981, "loss": 7.5766, "step": 21210 }, { "epoch": 0.2901463721448544, "grad_norm": 1.8038002252578735, "learning_rate": 0.0004969119504898834, "loss": 7.6298, "step": 21220 }, { "epoch": 0.29028310464822143, "grad_norm": 0.8905671238899231, "learning_rate": 0.0004969025951308112, "loss": 7.6946, "step": 21230 }, { "epoch": 0.2904198371515885, "grad_norm": 0.8280547857284546, "learning_rate": 0.0004968932257102969, "loss": 7.3814, "step": 21240 }, { "epoch": 0.2905565696549555, "grad_norm": 1.004645824432373, "learning_rate": 0.0004968838422288745, "loss": 7.4204, "step": 21250 }, { "epoch": 0.29069330215832256, "grad_norm": 2.137692928314209, "learning_rate": 0.0004968744446870782, "loss": 7.6156, "step": 21260 }, { "epoch": 0.2908300346616896, "grad_norm": 1.231776237487793, "learning_rate": 0.0004968650330854431, "loss": 7.5239, "step": 21270 }, { "epoch": 0.29096676716505665, "grad_norm": 1.2408658266067505, "learning_rate": 0.0004968556074245054, "loss": 7.6778, "step": 21280 }, { "epoch": 0.2911034996684237, "grad_norm": 2.151196002960205, "learning_rate": 0.0004968461677048019, "loss": 7.5871, "step": 21290 }, { "epoch": 0.29124023217179074, "grad_norm": 1.287174940109253, "learning_rate": 0.00049683671392687, "loss": 7.646, "step": 21300 }, { "epoch": 0.2913769646751578, "grad_norm": 2.3477256298065186, "learning_rate": 0.0004968272460912483, "loss": 7.6328, "step": 21310 }, { "epoch": 0.29151369717852477, "grad_norm": 1.7617255449295044, "learning_rate": 0.0004968177641984759, "loss": 7.9931, "step": 21320 }, { "epoch": 0.2916504296818918, "grad_norm": 1.5670039653778076, "learning_rate": 0.0004968082682490929, "loss": 7.5497, "step": 21330 }, { "epoch": 0.29178716218525885, "grad_norm": 1.1893914937973022, "learning_rate": 0.0004967987582436401, "loss": 7.3884, "step": 21340 }, { "epoch": 0.2919238946886259, "grad_norm": 1.0963246822357178, "learning_rate": 0.000496789234182659, "loss": 7.8492, "step": 21350 }, { "epoch": 0.29206062719199294, "grad_norm": 0.9825773239135742, "learning_rate": 0.0004967796960666921, "loss": 7.7086, "step": 21360 }, { "epoch": 0.29219735969536, "grad_norm": 1.2223416566848755, "learning_rate": 0.0004967701438962825, "loss": 7.6217, "step": 21370 }, { "epoch": 0.292334092198727, "grad_norm": 1.1826258897781372, "learning_rate": 0.0004967605776719743, "loss": 7.5679, "step": 21380 }, { "epoch": 0.29247082470209407, "grad_norm": 0.9588093161582947, "learning_rate": 0.0004967509973943124, "loss": 7.6948, "step": 21390 }, { "epoch": 0.2926075572054611, "grad_norm": 1.3670471906661987, "learning_rate": 0.0004967414030638423, "loss": 7.7538, "step": 21400 }, { "epoch": 0.29274428970882815, "grad_norm": 1.3896490335464478, "learning_rate": 0.0004967317946811105, "loss": 7.5638, "step": 21410 }, { "epoch": 0.2928810222121952, "grad_norm": 1.3514158725738525, "learning_rate": 0.000496722172246664, "loss": 7.6154, "step": 21420 }, { "epoch": 0.2930177547155622, "grad_norm": 1.0378562211990356, "learning_rate": 0.0004967125357610509, "loss": 7.5417, "step": 21430 }, { "epoch": 0.2931544872189292, "grad_norm": 1.1021080017089844, "learning_rate": 0.0004967028852248201, "loss": 7.4605, "step": 21440 }, { "epoch": 0.29329121972229627, "grad_norm": 1.0730279684066772, "learning_rate": 0.0004966932206385212, "loss": 7.5788, "step": 21450 }, { "epoch": 0.2934279522256633, "grad_norm": 2.125345468521118, "learning_rate": 0.0004966835420027046, "loss": 7.4683, "step": 21460 }, { "epoch": 0.29356468472903036, "grad_norm": 1.4634140729904175, "learning_rate": 0.0004966738493179213, "loss": 7.3127, "step": 21470 }, { "epoch": 0.2937014172323974, "grad_norm": 0.8421869277954102, "learning_rate": 0.0004966641425847237, "loss": 7.5909, "step": 21480 }, { "epoch": 0.29383814973576444, "grad_norm": 1.5282779932022095, "learning_rate": 0.0004966544218036644, "loss": 7.5171, "step": 21490 }, { "epoch": 0.2939748822391315, "grad_norm": 1.9688568115234375, "learning_rate": 0.0004966446869752969, "loss": 7.7753, "step": 21500 }, { "epoch": 0.29411161474249853, "grad_norm": 1.3762260675430298, "learning_rate": 0.0004966349381001759, "loss": 7.6515, "step": 21510 }, { "epoch": 0.29424834724586557, "grad_norm": 0.8300673365592957, "learning_rate": 0.0004966251751788564, "loss": 7.4791, "step": 21520 }, { "epoch": 0.2943850797492326, "grad_norm": 0.8949795365333557, "learning_rate": 0.0004966153982118944, "loss": 7.3503, "step": 21530 }, { "epoch": 0.2945218122525996, "grad_norm": 1.065881371498108, "learning_rate": 0.0004966056071998468, "loss": 7.5667, "step": 21540 }, { "epoch": 0.29465854475596664, "grad_norm": 0.9811255931854248, "learning_rate": 0.0004965958021432711, "loss": 7.267, "step": 21550 }, { "epoch": 0.2947952772593337, "grad_norm": 1.5365058183670044, "learning_rate": 0.0004965859830427259, "loss": 7.6759, "step": 21560 }, { "epoch": 0.29493200976270073, "grad_norm": 0.8463452458381653, "learning_rate": 0.0004965761498987704, "loss": 7.3243, "step": 21570 }, { "epoch": 0.2950687422660678, "grad_norm": 1.1197779178619385, "learning_rate": 0.0004965663027119644, "loss": 7.6133, "step": 21580 }, { "epoch": 0.2952054747694348, "grad_norm": 0.9679453372955322, "learning_rate": 0.0004965564414828688, "loss": 7.5909, "step": 21590 }, { "epoch": 0.29534220727280186, "grad_norm": 2.858574151992798, "learning_rate": 0.0004965465662120453, "loss": 7.718, "step": 21600 }, { "epoch": 0.2954789397761689, "grad_norm": 1.1184970140457153, "learning_rate": 0.0004965366769000562, "loss": 7.537, "step": 21610 }, { "epoch": 0.29561567227953595, "grad_norm": 1.0173935890197754, "learning_rate": 0.0004965267735474648, "loss": 7.6048, "step": 21620 }, { "epoch": 0.295752404782903, "grad_norm": 1.0611188411712646, "learning_rate": 0.000496516856154835, "loss": 7.5459, "step": 21630 }, { "epoch": 0.29588913728627003, "grad_norm": 1.71181321144104, "learning_rate": 0.0004965069247227317, "loss": 7.626, "step": 21640 }, { "epoch": 0.296025869789637, "grad_norm": 1.0521358251571655, "learning_rate": 0.0004964969792517204, "loss": 7.7396, "step": 21650 }, { "epoch": 0.29616260229300406, "grad_norm": 2.6172139644622803, "learning_rate": 0.0004964870197423678, "loss": 7.3092, "step": 21660 }, { "epoch": 0.2962993347963711, "grad_norm": 1.3285287618637085, "learning_rate": 0.0004964770461952407, "loss": 7.565, "step": 21670 }, { "epoch": 0.29643606729973815, "grad_norm": 1.4774571657180786, "learning_rate": 0.0004964670586109075, "loss": 7.7143, "step": 21680 }, { "epoch": 0.2965727998031052, "grad_norm": 3.7425875663757324, "learning_rate": 0.0004964570569899367, "loss": 7.3335, "step": 21690 }, { "epoch": 0.29670953230647223, "grad_norm": 0.9004661440849304, "learning_rate": 0.0004964470413328979, "loss": 7.5967, "step": 21700 }, { "epoch": 0.2968462648098393, "grad_norm": 1.4648523330688477, "learning_rate": 0.0004964370116403618, "loss": 7.6157, "step": 21710 }, { "epoch": 0.2969829973132063, "grad_norm": 1.2757813930511475, "learning_rate": 0.0004964269679128995, "loss": 7.6421, "step": 21720 }, { "epoch": 0.29711972981657336, "grad_norm": 2.0282530784606934, "learning_rate": 0.0004964169101510828, "loss": 7.7283, "step": 21730 }, { "epoch": 0.2972564623199404, "grad_norm": 1.8177502155303955, "learning_rate": 0.0004964068383554847, "loss": 7.546, "step": 21740 }, { "epoch": 0.29739319482330745, "grad_norm": 1.678907871246338, "learning_rate": 0.0004963967525266787, "loss": 7.5386, "step": 21750 }, { "epoch": 0.29752992732667444, "grad_norm": 0.9534834623336792, "learning_rate": 0.0004963866526652392, "loss": 7.8093, "step": 21760 }, { "epoch": 0.2976666598300415, "grad_norm": 1.3022220134735107, "learning_rate": 0.0004963765387717415, "loss": 7.8176, "step": 21770 }, { "epoch": 0.2978033923334085, "grad_norm": 0.8647000193595886, "learning_rate": 0.0004963664108467616, "loss": 7.8624, "step": 21780 }, { "epoch": 0.29794012483677557, "grad_norm": 1.4787452220916748, "learning_rate": 0.0004963562688908761, "loss": 7.5475, "step": 21790 }, { "epoch": 0.2980768573401426, "grad_norm": 1.8761168718338013, "learning_rate": 0.0004963461129046629, "loss": 7.7855, "step": 21800 }, { "epoch": 0.29821358984350965, "grad_norm": 2.099518299102783, "learning_rate": 0.0004963359428887002, "loss": 7.6297, "step": 21810 }, { "epoch": 0.2983503223468767, "grad_norm": 1.3141016960144043, "learning_rate": 0.0004963257588435671, "loss": 7.8019, "step": 21820 }, { "epoch": 0.29848705485024374, "grad_norm": 1.7803168296813965, "learning_rate": 0.0004963155607698439, "loss": 7.4475, "step": 21830 }, { "epoch": 0.2986237873536108, "grad_norm": 2.68129563331604, "learning_rate": 0.0004963053486681111, "loss": 7.4701, "step": 21840 }, { "epoch": 0.2987605198569778, "grad_norm": 1.615450382232666, "learning_rate": 0.0004962951225389505, "loss": 7.7067, "step": 21850 }, { "epoch": 0.2988972523603448, "grad_norm": 1.5532591342926025, "learning_rate": 0.0004962848823829444, "loss": 7.6458, "step": 21860 }, { "epoch": 0.29903398486371185, "grad_norm": 1.295749306678772, "learning_rate": 0.0004962746282006759, "loss": 7.7909, "step": 21870 }, { "epoch": 0.2991707173670789, "grad_norm": 6.601253032684326, "learning_rate": 0.0004962643599927293, "loss": 18.9345, "step": 21880 }, { "epoch": 0.29930744987044594, "grad_norm": 6.740636348724365, "learning_rate": 0.000496254077759689, "loss": 15.8633, "step": 21890 }, { "epoch": 0.299444182373813, "grad_norm": 3.5881645679473877, "learning_rate": 0.0004962437815021408, "loss": 12.5653, "step": 21900 }, { "epoch": 0.29958091487718, "grad_norm": 1.8371007442474365, "learning_rate": 0.0004962334712206711, "loss": 11.2716, "step": 21910 }, { "epoch": 0.29971764738054707, "grad_norm": 2.1315572261810303, "learning_rate": 0.000496223146915867, "loss": 9.5452, "step": 21920 }, { "epoch": 0.2998543798839141, "grad_norm": 1.038441777229309, "learning_rate": 0.0004962128085883165, "loss": 8.9695, "step": 21930 }, { "epoch": 0.29999111238728116, "grad_norm": 3.2511916160583496, "learning_rate": 0.0004962024562386085, "loss": 8.4956, "step": 21940 }, { "epoch": 0.3001278448906482, "grad_norm": 15.531521797180176, "learning_rate": 0.0004961920898673324, "loss": 8.3104, "step": 21950 }, { "epoch": 0.30026457739401524, "grad_norm": 0.8910256624221802, "learning_rate": 0.0004961817094750787, "loss": 8.2401, "step": 21960 }, { "epoch": 0.30040130989738223, "grad_norm": 0.7143228650093079, "learning_rate": 0.0004961713150624385, "loss": 7.9429, "step": 21970 }, { "epoch": 0.3005380424007493, "grad_norm": 133.02801513671875, "learning_rate": 0.0004961609066300039, "loss": 7.8712, "step": 21980 }, { "epoch": 0.3006747749041163, "grad_norm": 0.7196051478385925, "learning_rate": 0.0004961504841783675, "loss": 7.5074, "step": 21990 }, { "epoch": 0.30081150740748336, "grad_norm": 2.485644817352295, "learning_rate": 0.000496140047708123, "loss": 7.9809, "step": 22000 }, { "epoch": 0.3009482399108504, "grad_norm": 0.9477162957191467, "learning_rate": 0.0004961295972198647, "loss": 7.5806, "step": 22010 }, { "epoch": 0.30108497241421744, "grad_norm": 1.145803689956665, "learning_rate": 0.0004961191327141879, "loss": 7.5845, "step": 22020 }, { "epoch": 0.3012217049175845, "grad_norm": 1.0574067831039429, "learning_rate": 0.0004961086541916883, "loss": 7.6488, "step": 22030 }, { "epoch": 0.30135843742095153, "grad_norm": 0.8460545539855957, "learning_rate": 0.000496098161652963, "loss": 7.5907, "step": 22040 }, { "epoch": 0.3014951699243186, "grad_norm": 1.0098189115524292, "learning_rate": 0.0004960876550986093, "loss": 7.8058, "step": 22050 }, { "epoch": 0.3016319024276856, "grad_norm": 7.491360664367676, "learning_rate": 0.0004960771345292258, "loss": 7.854, "step": 22060 }, { "epoch": 0.30176863493105266, "grad_norm": 46.177406311035156, "learning_rate": 0.0004960665999454115, "loss": 7.8065, "step": 22070 }, { "epoch": 0.30190536743441965, "grad_norm": 1.0033138990402222, "learning_rate": 0.0004960560513477662, "loss": 7.3718, "step": 22080 }, { "epoch": 0.3020420999377867, "grad_norm": 7.36407995223999, "learning_rate": 0.000496045488736891, "loss": 7.5807, "step": 22090 }, { "epoch": 0.30217883244115373, "grad_norm": 1.2977375984191895, "learning_rate": 0.0004960349121133873, "loss": 7.5375, "step": 22100 }, { "epoch": 0.3023155649445208, "grad_norm": 1.3324681520462036, "learning_rate": 0.0004960243214778574, "loss": 7.5386, "step": 22110 }, { "epoch": 0.3024522974478878, "grad_norm": 0.944195032119751, "learning_rate": 0.0004960137168309045, "loss": 7.3938, "step": 22120 }, { "epoch": 0.30258902995125486, "grad_norm": 0.7994658946990967, "learning_rate": 0.0004960030981731326, "loss": 7.6251, "step": 22130 }, { "epoch": 0.3027257624546219, "grad_norm": 1.263100266456604, "learning_rate": 0.0004959924655051465, "loss": 7.6924, "step": 22140 }, { "epoch": 0.30286249495798895, "grad_norm": 1.007727861404419, "learning_rate": 0.0004959818188275515, "loss": 7.0558, "step": 22150 }, { "epoch": 0.302999227461356, "grad_norm": 1.648787021636963, "learning_rate": 0.0004959711581409541, "loss": 7.7305, "step": 22160 }, { "epoch": 0.30313595996472303, "grad_norm": 0.8217088580131531, "learning_rate": 0.0004959604834459614, "loss": 7.628, "step": 22170 }, { "epoch": 0.3032726924680901, "grad_norm": 0.657628059387207, "learning_rate": 0.0004959497947431814, "loss": 7.3583, "step": 22180 }, { "epoch": 0.30340942497145706, "grad_norm": 3.681713581085205, "learning_rate": 0.0004959390920332228, "loss": 7.4684, "step": 22190 }, { "epoch": 0.3035461574748241, "grad_norm": 1.1022849082946777, "learning_rate": 0.0004959283753166951, "loss": 7.144, "step": 22200 }, { "epoch": 0.30368288997819115, "grad_norm": 0.7933871150016785, "learning_rate": 0.0004959176445942088, "loss": 7.3543, "step": 22210 }, { "epoch": 0.3038196224815582, "grad_norm": 1.4769515991210938, "learning_rate": 0.0004959068998663747, "loss": 7.4791, "step": 22220 }, { "epoch": 0.30395635498492524, "grad_norm": 1.8218293190002441, "learning_rate": 0.0004958961411338051, "loss": 7.3615, "step": 22230 }, { "epoch": 0.3040930874882923, "grad_norm": 12.15921688079834, "learning_rate": 0.0004958853683971125, "loss": 7.4153, "step": 22240 }, { "epoch": 0.3042298199916593, "grad_norm": 1.3606412410736084, "learning_rate": 0.0004958745816569105, "loss": 7.1605, "step": 22250 }, { "epoch": 0.30436655249502637, "grad_norm": 9.15967082977295, "learning_rate": 0.0004958637809138132, "loss": 7.5385, "step": 22260 }, { "epoch": 0.3045032849983934, "grad_norm": 0.8956049680709839, "learning_rate": 0.0004958529661684361, "loss": 7.2263, "step": 22270 }, { "epoch": 0.30464001750176045, "grad_norm": 2.366168975830078, "learning_rate": 0.0004958421374213948, "loss": 7.2662, "step": 22280 }, { "epoch": 0.3047767500051275, "grad_norm": 1.0101776123046875, "learning_rate": 0.0004958312946733063, "loss": 7.3858, "step": 22290 }, { "epoch": 0.3049134825084945, "grad_norm": 0.9402345418930054, "learning_rate": 0.0004958204379247878, "loss": 7.1464, "step": 22300 }, { "epoch": 0.3050502150118615, "grad_norm": 0.8818811774253845, "learning_rate": 0.0004958095671764579, "loss": 7.076, "step": 22310 }, { "epoch": 0.30518694751522857, "grad_norm": 1.1012849807739258, "learning_rate": 0.0004957986824289354, "loss": 7.5061, "step": 22320 }, { "epoch": 0.3053236800185956, "grad_norm": 0.8010635375976562, "learning_rate": 0.0004957877836828404, "loss": 7.5797, "step": 22330 }, { "epoch": 0.30546041252196265, "grad_norm": 2.0261106491088867, "learning_rate": 0.0004957768709387936, "loss": 7.2541, "step": 22340 }, { "epoch": 0.3055971450253297, "grad_norm": 1.4661730527877808, "learning_rate": 0.0004957659441974163, "loss": 7.4538, "step": 22350 }, { "epoch": 0.30573387752869674, "grad_norm": 6.2621846199035645, "learning_rate": 0.0004957550034593312, "loss": 7.3922, "step": 22360 }, { "epoch": 0.3058706100320638, "grad_norm": 4.0746636390686035, "learning_rate": 0.000495744048725161, "loss": 6.8482, "step": 22370 }, { "epoch": 0.3060073425354308, "grad_norm": 1.377954125404358, "learning_rate": 0.0004957330799955298, "loss": 7.9746, "step": 22380 }, { "epoch": 0.30614407503879787, "grad_norm": 0.9292669892311096, "learning_rate": 0.0004957220972710621, "loss": 7.4144, "step": 22390 }, { "epoch": 0.3062808075421649, "grad_norm": 0.7743853330612183, "learning_rate": 0.0004957111005523834, "loss": 7.3329, "step": 22400 }, { "epoch": 0.3064175400455319, "grad_norm": 0.9709311723709106, "learning_rate": 0.0004957000898401202, "loss": 7.098, "step": 22410 }, { "epoch": 0.30655427254889894, "grad_norm": 1.9182208776474, "learning_rate": 0.0004956890651348994, "loss": 7.1539, "step": 22420 }, { "epoch": 0.306691005052266, "grad_norm": 0.7224949598312378, "learning_rate": 0.0004956780264373489, "loss": 7.3439, "step": 22430 }, { "epoch": 0.30682773755563303, "grad_norm": 1.9363861083984375, "learning_rate": 0.0004956669737480975, "loss": 7.296, "step": 22440 }, { "epoch": 0.30696447005900007, "grad_norm": 1.1828608512878418, "learning_rate": 0.0004956559070677744, "loss": 7.164, "step": 22450 }, { "epoch": 0.3071012025623671, "grad_norm": 6.078996658325195, "learning_rate": 0.0004956448263970101, "loss": 7.3016, "step": 22460 }, { "epoch": 0.30723793506573416, "grad_norm": 1.248881459236145, "learning_rate": 0.0004956337317364355, "loss": 7.0449, "step": 22470 }, { "epoch": 0.3073746675691012, "grad_norm": 1.265293836593628, "learning_rate": 0.0004956226230866827, "loss": 7.3457, "step": 22480 }, { "epoch": 0.30751140007246824, "grad_norm": 1.1208103895187378, "learning_rate": 0.000495611500448384, "loss": 7.3496, "step": 22490 }, { "epoch": 0.3076481325758353, "grad_norm": 1.3777629137039185, "learning_rate": 0.000495600363822173, "loss": 6.9754, "step": 22500 }, { "epoch": 0.30778486507920233, "grad_norm": 1.4913195371627808, "learning_rate": 0.000495589213208684, "loss": 6.9779, "step": 22510 }, { "epoch": 0.3079215975825693, "grad_norm": 1.7328215837478638, "learning_rate": 0.0004955780486085522, "loss": 7.5889, "step": 22520 }, { "epoch": 0.30805833008593636, "grad_norm": 0.9502978920936584, "learning_rate": 0.0004955668700224131, "loss": 7.1691, "step": 22530 }, { "epoch": 0.3081950625893034, "grad_norm": 1.8323484659194946, "learning_rate": 0.0004955556774509035, "loss": 7.5316, "step": 22540 }, { "epoch": 0.30833179509267045, "grad_norm": 1.9783068895339966, "learning_rate": 0.0004955444708946607, "loss": 6.9228, "step": 22550 }, { "epoch": 0.3084685275960375, "grad_norm": 1.2133979797363281, "learning_rate": 0.0004955332503543233, "loss": 6.9797, "step": 22560 }, { "epoch": 0.30860526009940453, "grad_norm": 1.5311623811721802, "learning_rate": 0.0004955220158305299, "loss": 7.2303, "step": 22570 }, { "epoch": 0.3087419926027716, "grad_norm": 3.7309796810150146, "learning_rate": 0.0004955107673239206, "loss": 7.4214, "step": 22580 }, { "epoch": 0.3088787251061386, "grad_norm": 65.77469635009766, "learning_rate": 0.0004954995048351359, "loss": 7.2505, "step": 22590 }, { "epoch": 0.30901545760950566, "grad_norm": 4.507416725158691, "learning_rate": 0.0004954882283648171, "loss": 7.0395, "step": 22600 }, { "epoch": 0.3091521901128727, "grad_norm": 1.601737380027771, "learning_rate": 0.0004954769379136067, "loss": 7.2147, "step": 22610 }, { "epoch": 0.3092889226162397, "grad_norm": 5.945789337158203, "learning_rate": 0.0004954656334821474, "loss": 7.1229, "step": 22620 }, { "epoch": 0.30942565511960674, "grad_norm": 4.9604926109313965, "learning_rate": 0.0004954543150710833, "loss": 6.6119, "step": 22630 }, { "epoch": 0.3095623876229738, "grad_norm": 8.274415016174316, "learning_rate": 0.0004954429826810588, "loss": 7.4582, "step": 22640 }, { "epoch": 0.3096991201263408, "grad_norm": 27.140918731689453, "learning_rate": 0.0004954316363127194, "loss": 7.1886, "step": 22650 }, { "epoch": 0.30983585262970786, "grad_norm": 1.5120258331298828, "learning_rate": 0.0004954202759667112, "loss": 7.1801, "step": 22660 }, { "epoch": 0.3099725851330749, "grad_norm": 1.2484490871429443, "learning_rate": 0.0004954089016436812, "loss": 7.1805, "step": 22670 }, { "epoch": 0.31010931763644195, "grad_norm": 33.18446731567383, "learning_rate": 0.0004953975133442772, "loss": 6.9486, "step": 22680 }, { "epoch": 0.310246050139809, "grad_norm": 1.069797158241272, "learning_rate": 0.0004953861110691478, "loss": 7.2143, "step": 22690 }, { "epoch": 0.31038278264317604, "grad_norm": 1.4453977346420288, "learning_rate": 0.0004953746948189423, "loss": 7.1528, "step": 22700 }, { "epoch": 0.3105195151465431, "grad_norm": 1.244874119758606, "learning_rate": 0.0004953632645943111, "loss": 6.9834, "step": 22710 }, { "epoch": 0.3106562476499101, "grad_norm": 1.0179945230484009, "learning_rate": 0.0004953518203959049, "loss": 7.3642, "step": 22720 }, { "epoch": 0.3107929801532771, "grad_norm": 1.5933221578598022, "learning_rate": 0.0004953403622243754, "loss": 7.3414, "step": 22730 }, { "epoch": 0.31092971265664415, "grad_norm": 1.5052528381347656, "learning_rate": 0.0004953288900803755, "loss": 7.1509, "step": 22740 }, { "epoch": 0.3110664451600112, "grad_norm": 1.9668631553649902, "learning_rate": 0.0004953174039645584, "loss": 6.5492, "step": 22750 }, { "epoch": 0.31120317766337824, "grad_norm": 0.8722209334373474, "learning_rate": 0.0004953059038775782, "loss": 7.1269, "step": 22760 }, { "epoch": 0.3113399101667453, "grad_norm": 1.6821621656417847, "learning_rate": 0.0004952943898200898, "loss": 7.2468, "step": 22770 }, { "epoch": 0.3114766426701123, "grad_norm": 2.015514373779297, "learning_rate": 0.000495282861792749, "loss": 7.0428, "step": 22780 }, { "epoch": 0.31161337517347937, "grad_norm": 1.5154774188995361, "learning_rate": 0.0004952713197962126, "loss": 6.839, "step": 22790 }, { "epoch": 0.3117501076768464, "grad_norm": 1.872422456741333, "learning_rate": 0.0004952597638311374, "loss": 7.6409, "step": 22800 }, { "epoch": 0.31188684018021345, "grad_norm": 1.399365782737732, "learning_rate": 0.0004952481938981819, "loss": 7.0278, "step": 22810 }, { "epoch": 0.3120235726835805, "grad_norm": 1.3899379968643188, "learning_rate": 0.0004952366099980049, "loss": 7.0061, "step": 22820 }, { "epoch": 0.31216030518694754, "grad_norm": 1.4765256643295288, "learning_rate": 0.0004952250121312662, "loss": 7.4722, "step": 22830 }, { "epoch": 0.3122970376903145, "grad_norm": 1.484244704246521, "learning_rate": 0.0004952134002986264, "loss": 7.2616, "step": 22840 }, { "epoch": 0.31243377019368157, "grad_norm": 1.395766019821167, "learning_rate": 0.0004952017745007464, "loss": 7.2436, "step": 22850 }, { "epoch": 0.3125705026970486, "grad_norm": 1.011981725692749, "learning_rate": 0.0004951901347382887, "loss": 6.8466, "step": 22860 }, { "epoch": 0.31270723520041566, "grad_norm": 2.1152660846710205, "learning_rate": 0.0004951784810119162, "loss": 7.0538, "step": 22870 }, { "epoch": 0.3128439677037827, "grad_norm": 1.2034504413604736, "learning_rate": 0.0004951668133222924, "loss": 6.8961, "step": 22880 }, { "epoch": 0.31298070020714974, "grad_norm": 37.016563415527344, "learning_rate": 0.0004951551316700819, "loss": 7.1276, "step": 22890 }, { "epoch": 0.3131174327105168, "grad_norm": 48.28156280517578, "learning_rate": 0.0004951434360559499, "loss": 6.8887, "step": 22900 }, { "epoch": 0.31325416521388383, "grad_norm": 2.4688615798950195, "learning_rate": 0.0004951317264805624, "loss": 7.0312, "step": 22910 }, { "epoch": 0.31339089771725087, "grad_norm": 17.312650680541992, "learning_rate": 0.0004951200029445867, "loss": 7.1617, "step": 22920 }, { "epoch": 0.3135276302206179, "grad_norm": 2.07910418510437, "learning_rate": 0.00049510826544869, "loss": 7.3182, "step": 22930 }, { "epoch": 0.31366436272398496, "grad_norm": 2.3362700939178467, "learning_rate": 0.0004950965139935411, "loss": 7.171, "step": 22940 }, { "epoch": 0.31380109522735194, "grad_norm": 1.2108018398284912, "learning_rate": 0.0004950847485798089, "loss": 7.331, "step": 22950 }, { "epoch": 0.313937827730719, "grad_norm": 1.4394996166229248, "learning_rate": 0.0004950729692081638, "loss": 6.8477, "step": 22960 }, { "epoch": 0.31407456023408603, "grad_norm": 1.2740774154663086, "learning_rate": 0.0004950611758792765, "loss": 7.2159, "step": 22970 }, { "epoch": 0.3142112927374531, "grad_norm": 1.1493074893951416, "learning_rate": 0.0004950493685938187, "loss": 6.85, "step": 22980 }, { "epoch": 0.3143480252408201, "grad_norm": 1.1161880493164062, "learning_rate": 0.0004950375473524628, "loss": 6.8832, "step": 22990 }, { "epoch": 0.31448475774418716, "grad_norm": 1.3534159660339355, "learning_rate": 0.000495025712155882, "loss": 6.882, "step": 23000 }, { "epoch": 0.3146214902475542, "grad_norm": 1.7824268341064453, "learning_rate": 0.0004950138630047503, "loss": 7.0263, "step": 23010 }, { "epoch": 0.31475822275092125, "grad_norm": 4.544478893280029, "learning_rate": 0.0004950019998997428, "loss": 6.9519, "step": 23020 }, { "epoch": 0.3148949552542883, "grad_norm": 1.2385023832321167, "learning_rate": 0.0004949901228415349, "loss": 7.082, "step": 23030 }, { "epoch": 0.31503168775765533, "grad_norm": 2.527815341949463, "learning_rate": 0.0004949782318308029, "loss": 6.727, "step": 23040 }, { "epoch": 0.3151684202610224, "grad_norm": 27.54463768005371, "learning_rate": 0.0004949663268682242, "loss": 6.6648, "step": 23050 }, { "epoch": 0.31530515276438936, "grad_norm": 3.0868282318115234, "learning_rate": 0.0004949544079544768, "loss": 7.3618, "step": 23060 }, { "epoch": 0.3154418852677564, "grad_norm": 1.6306406259536743, "learning_rate": 0.0004949424750902393, "loss": 6.9252, "step": 23070 }, { "epoch": 0.31557861777112345, "grad_norm": 8.399206161499023, "learning_rate": 0.0004949305282761915, "loss": 6.9456, "step": 23080 }, { "epoch": 0.3157153502744905, "grad_norm": 17.3095645904541, "learning_rate": 0.0004949185675130138, "loss": 7.3567, "step": 23090 }, { "epoch": 0.31585208277785753, "grad_norm": 4.459664821624756, "learning_rate": 0.0004949065928013873, "loss": 6.9153, "step": 23100 }, { "epoch": 0.3159888152812246, "grad_norm": 11.021233558654785, "learning_rate": 0.000494894604141994, "loss": 7.0613, "step": 23110 }, { "epoch": 0.3161255477845916, "grad_norm": 3.7347640991210938, "learning_rate": 0.0004948826015355166, "loss": 7.0264, "step": 23120 }, { "epoch": 0.31626228028795866, "grad_norm": 4.688857078552246, "learning_rate": 0.0004948705849826387, "loss": 7.0196, "step": 23130 }, { "epoch": 0.3163990127913257, "grad_norm": 8.019412994384766, "learning_rate": 0.0004948585544840448, "loss": 6.8688, "step": 23140 }, { "epoch": 0.31653574529469275, "grad_norm": 4.314286708831787, "learning_rate": 0.0004948465100404198, "loss": 7.0324, "step": 23150 }, { "epoch": 0.3166724777980598, "grad_norm": 2.9685018062591553, "learning_rate": 0.0004948344516524499, "loss": 7.3022, "step": 23160 }, { "epoch": 0.3168092103014268, "grad_norm": 1.5052560567855835, "learning_rate": 0.0004948223793208216, "loss": 6.8624, "step": 23170 }, { "epoch": 0.3169459428047938, "grad_norm": 0.8938080072402954, "learning_rate": 0.0004948102930462226, "loss": 7.5405, "step": 23180 }, { "epoch": 0.31708267530816087, "grad_norm": 1.78281569480896, "learning_rate": 0.0004947981928293412, "loss": 6.6272, "step": 23190 }, { "epoch": 0.3172194078115279, "grad_norm": 28.193023681640625, "learning_rate": 0.0004947860786708666, "loss": 6.9473, "step": 23200 }, { "epoch": 0.31735614031489495, "grad_norm": 3.9133408069610596, "learning_rate": 0.0004947739505714886, "loss": 6.9382, "step": 23210 }, { "epoch": 0.317492872818262, "grad_norm": 15.308144569396973, "learning_rate": 0.0004947618085318978, "loss": 6.6754, "step": 23220 }, { "epoch": 0.31762960532162904, "grad_norm": 14.98060417175293, "learning_rate": 0.000494749652552786, "loss": 7.5061, "step": 23230 }, { "epoch": 0.3177663378249961, "grad_norm": 25.82063102722168, "learning_rate": 0.0004947374826348453, "loss": 6.8693, "step": 23240 }, { "epoch": 0.3179030703283631, "grad_norm": 2.167405843734741, "learning_rate": 0.0004947252987787688, "loss": 6.7338, "step": 23250 }, { "epoch": 0.31803980283173017, "grad_norm": 1.5504534244537354, "learning_rate": 0.0004947131009852505, "loss": 6.8803, "step": 23260 }, { "epoch": 0.3181765353350972, "grad_norm": 3.9957692623138428, "learning_rate": 0.000494700889254985, "loss": 6.6929, "step": 23270 }, { "epoch": 0.3183132678384642, "grad_norm": 2.238245725631714, "learning_rate": 0.0004946886635886677, "loss": 7.1314, "step": 23280 }, { "epoch": 0.31845000034183124, "grad_norm": 2.4566147327423096, "learning_rate": 0.000494676423986995, "loss": 6.8332, "step": 23290 }, { "epoch": 0.3185867328451983, "grad_norm": 24.615787506103516, "learning_rate": 0.000494664170450664, "loss": 6.8309, "step": 23300 }, { "epoch": 0.3187234653485653, "grad_norm": 208.50733947753906, "learning_rate": 0.0004946519029803724, "loss": 6.879, "step": 23310 }, { "epoch": 0.31886019785193237, "grad_norm": 19.733463287353516, "learning_rate": 0.0004946396215768189, "loss": 7.3526, "step": 23320 }, { "epoch": 0.3189969303552994, "grad_norm": 16.7036190032959, "learning_rate": 0.0004946273262407029, "loss": 6.6322, "step": 23330 }, { "epoch": 0.31913366285866646, "grad_norm": 1.4884653091430664, "learning_rate": 0.0004946150169727249, "loss": 6.8753, "step": 23340 }, { "epoch": 0.3192703953620335, "grad_norm": 2.9968414306640625, "learning_rate": 0.0004946026937735856, "loss": 6.7453, "step": 23350 }, { "epoch": 0.31940712786540054, "grad_norm": 1.5990519523620605, "learning_rate": 0.000494590356643987, "loss": 6.7493, "step": 23360 }, { "epoch": 0.3195438603687676, "grad_norm": 1.1890608072280884, "learning_rate": 0.0004945780055846316, "loss": 7.0007, "step": 23370 }, { "epoch": 0.3196805928721346, "grad_norm": 25.88797950744629, "learning_rate": 0.000494565640596223, "loss": 6.8433, "step": 23380 }, { "epoch": 0.3198173253755016, "grad_norm": 2.1502089500427246, "learning_rate": 0.0004945532616794651, "loss": 6.8841, "step": 23390 }, { "epoch": 0.31995405787886866, "grad_norm": 2.63694429397583, "learning_rate": 0.0004945408688350633, "loss": 6.877, "step": 23400 }, { "epoch": 0.3200907903822357, "grad_norm": 1.649916410446167, "learning_rate": 0.0004945284620637229, "loss": 7.1267, "step": 23410 }, { "epoch": 0.32022752288560274, "grad_norm": 1.8968292474746704, "learning_rate": 0.0004945160413661509, "loss": 6.8546, "step": 23420 }, { "epoch": 0.3203642553889698, "grad_norm": 3.589125633239746, "learning_rate": 0.0004945036067430546, "loss": 6.7388, "step": 23430 }, { "epoch": 0.32050098789233683, "grad_norm": 3.231032133102417, "learning_rate": 0.000494491158195142, "loss": 6.885, "step": 23440 }, { "epoch": 0.3206377203957039, "grad_norm": 3.011662721633911, "learning_rate": 0.0004944786957231223, "loss": 6.9968, "step": 23450 }, { "epoch": 0.3207744528990709, "grad_norm": 14.525076866149902, "learning_rate": 0.0004944662193277048, "loss": 6.6238, "step": 23460 }, { "epoch": 0.32091118540243796, "grad_norm": 4.787230014801025, "learning_rate": 0.0004944537290096006, "loss": 6.5341, "step": 23470 }, { "epoch": 0.321047917905805, "grad_norm": 2.8689801692962646, "learning_rate": 0.0004944412247695208, "loss": 6.7314, "step": 23480 }, { "epoch": 0.321184650409172, "grad_norm": 4.403210639953613, "learning_rate": 0.0004944287066081776, "loss": 6.6609, "step": 23490 }, { "epoch": 0.32132138291253903, "grad_norm": 11.011029243469238, "learning_rate": 0.0004944161745262836, "loss": 6.8035, "step": 23500 }, { "epoch": 0.3214581154159061, "grad_norm": 3.197772979736328, "learning_rate": 0.000494403628524553, "loss": 6.7792, "step": 23510 }, { "epoch": 0.3215948479192731, "grad_norm": 284.23260498046875, "learning_rate": 0.0004943910686036999, "loss": 6.6992, "step": 23520 }, { "epoch": 0.32173158042264016, "grad_norm": 3.8929636478424072, "learning_rate": 0.00049437849476444, "loss": 6.7319, "step": 23530 }, { "epoch": 0.3218683129260072, "grad_norm": 9.524616241455078, "learning_rate": 0.0004943659070074892, "loss": 6.8039, "step": 23540 }, { "epoch": 0.32200504542937425, "grad_norm": 4.476316928863525, "learning_rate": 0.0004943533053335643, "loss": 6.8851, "step": 23550 }, { "epoch": 0.3221417779327413, "grad_norm": 136.1106414794922, "learning_rate": 0.000494340689743383, "loss": 6.7872, "step": 23560 }, { "epoch": 0.32227851043610833, "grad_norm": 82.45074462890625, "learning_rate": 0.0004943280602376639, "loss": 6.8662, "step": 23570 }, { "epoch": 0.3224152429394754, "grad_norm": 27.593557357788086, "learning_rate": 0.0004943154168171262, "loss": 7.0914, "step": 23580 }, { "epoch": 0.3225519754428424, "grad_norm": 938.7073974609375, "learning_rate": 0.00049430275948249, "loss": 6.9563, "step": 23590 }, { "epoch": 0.3226887079462094, "grad_norm": 35.06803512573242, "learning_rate": 0.000494290088234476, "loss": 6.7769, "step": 23600 }, { "epoch": 0.32282544044957645, "grad_norm": 31.996599197387695, "learning_rate": 0.0004942774030738062, "loss": 7.0465, "step": 23610 }, { "epoch": 0.3229621729529435, "grad_norm": 11.305870056152344, "learning_rate": 0.0004942647040012027, "loss": 6.7806, "step": 23620 }, { "epoch": 0.32309890545631054, "grad_norm": 67.46775817871094, "learning_rate": 0.0004942519910173888, "loss": 6.8624, "step": 23630 }, { "epoch": 0.3232356379596776, "grad_norm": 11.021565437316895, "learning_rate": 0.0004942392641230886, "loss": 6.8117, "step": 23640 }, { "epoch": 0.3233723704630446, "grad_norm": 50.06534194946289, "learning_rate": 0.0004942265233190268, "loss": 6.8748, "step": 23650 }, { "epoch": 0.32350910296641167, "grad_norm": 291.52484130859375, "learning_rate": 0.0004942137686059292, "loss": 7.0871, "step": 23660 }, { "epoch": 0.3236458354697787, "grad_norm": 9.072708129882812, "learning_rate": 0.0004942009999845219, "loss": 6.5129, "step": 23670 }, { "epoch": 0.32378256797314575, "grad_norm": 2.422231674194336, "learning_rate": 0.0004941882174555324, "loss": 6.8764, "step": 23680 }, { "epoch": 0.3239193004765128, "grad_norm": 2.6873199939727783, "learning_rate": 0.0004941754210196885, "loss": 7.0024, "step": 23690 }, { "epoch": 0.32405603297987984, "grad_norm": 5.665765285491943, "learning_rate": 0.000494162610677719, "loss": 7.015, "step": 23700 }, { "epoch": 0.3241927654832468, "grad_norm": 4.539939880371094, "learning_rate": 0.0004941497864303536, "loss": 6.9664, "step": 23710 }, { "epoch": 0.32432949798661387, "grad_norm": 7.996610164642334, "learning_rate": 0.0004941369482783225, "loss": 7.179, "step": 23720 }, { "epoch": 0.3244662304899809, "grad_norm": 5.902650833129883, "learning_rate": 0.0004941240962223568, "loss": 6.6825, "step": 23730 }, { "epoch": 0.32460296299334795, "grad_norm": 12.391536712646484, "learning_rate": 0.0004941112302631886, "loss": 6.9046, "step": 23740 }, { "epoch": 0.324739695496715, "grad_norm": 3.084033966064453, "learning_rate": 0.0004940983504015505, "loss": 6.596, "step": 23750 }, { "epoch": 0.32487642800008204, "grad_norm": 6.630660057067871, "learning_rate": 0.0004940854566381763, "loss": 6.6506, "step": 23760 }, { "epoch": 0.3250131605034491, "grad_norm": 6.361959934234619, "learning_rate": 0.0004940725489738, "loss": 6.7803, "step": 23770 }, { "epoch": 0.3251498930068161, "grad_norm": 5.298465251922607, "learning_rate": 0.0004940596274091569, "loss": 6.766, "step": 23780 }, { "epoch": 0.32528662551018317, "grad_norm": 31.991275787353516, "learning_rate": 0.0004940466919449828, "loss": 6.6589, "step": 23790 }, { "epoch": 0.3254233580135502, "grad_norm": 8.213006973266602, "learning_rate": 0.0004940337425820143, "loss": 6.7144, "step": 23800 }, { "epoch": 0.32556009051691726, "grad_norm": 3.851470470428467, "learning_rate": 0.0004940207793209891, "loss": 6.3903, "step": 23810 }, { "epoch": 0.32569682302028424, "grad_norm": 2.65437912940979, "learning_rate": 0.0004940078021626453, "loss": 6.8502, "step": 23820 }, { "epoch": 0.3258335555236513, "grad_norm": 2.7381842136383057, "learning_rate": 0.0004939948111077222, "loss": 6.7893, "step": 23830 }, { "epoch": 0.32597028802701833, "grad_norm": 4.212514877319336, "learning_rate": 0.0004939818061569594, "loss": 6.6455, "step": 23840 }, { "epoch": 0.32610702053038537, "grad_norm": 1.6343246698379517, "learning_rate": 0.0004939687873110977, "loss": 7.0415, "step": 23850 }, { "epoch": 0.3262437530337524, "grad_norm": 27.95764923095703, "learning_rate": 0.0004939557545708785, "loss": 6.7281, "step": 23860 }, { "epoch": 0.32638048553711946, "grad_norm": 5.540347576141357, "learning_rate": 0.000493942707937044, "loss": 6.7921, "step": 23870 }, { "epoch": 0.3265172180404865, "grad_norm": 12.72043514251709, "learning_rate": 0.0004939296474103372, "loss": 6.8399, "step": 23880 }, { "epoch": 0.32665395054385354, "grad_norm": 15.356837272644043, "learning_rate": 0.0004939165729915021, "loss": 6.767, "step": 23890 }, { "epoch": 0.3267906830472206, "grad_norm": 1.9295425415039062, "learning_rate": 0.000493903484681283, "loss": 6.6473, "step": 23900 }, { "epoch": 0.32692741555058763, "grad_norm": 2.362717866897583, "learning_rate": 0.0004938903824804256, "loss": 6.6965, "step": 23910 }, { "epoch": 0.3270641480539547, "grad_norm": 16.399024963378906, "learning_rate": 0.0004938772663896759, "loss": 7.1716, "step": 23920 }, { "epoch": 0.32720088055732166, "grad_norm": 4.594872951507568, "learning_rate": 0.000493864136409781, "loss": 6.5401, "step": 23930 }, { "epoch": 0.3273376130606887, "grad_norm": 11.332867622375488, "learning_rate": 0.0004938509925414886, "loss": 6.9349, "step": 23940 }, { "epoch": 0.32747434556405575, "grad_norm": 3.8750417232513428, "learning_rate": 0.0004938378347855472, "loss": 6.756, "step": 23950 }, { "epoch": 0.3276110780674228, "grad_norm": 2.260838747024536, "learning_rate": 0.0004938246631427062, "loss": 7.0049, "step": 23960 }, { "epoch": 0.32774781057078983, "grad_norm": 7.854814052581787, "learning_rate": 0.000493811477613716, "loss": 6.3984, "step": 23970 }, { "epoch": 0.3278845430741569, "grad_norm": 2.6155331134796143, "learning_rate": 0.000493798278199327, "loss": 7.0545, "step": 23980 }, { "epoch": 0.3280212755775239, "grad_norm": 43.29301834106445, "learning_rate": 0.0004937850649002914, "loss": 6.6621, "step": 23990 }, { "epoch": 0.32815800808089096, "grad_norm": 1.9457545280456543, "learning_rate": 0.0004937718377173615, "loss": 7.2887, "step": 24000 }, { "epoch": 0.328294740584258, "grad_norm": 581.3088989257812, "learning_rate": 0.0004937585966512906, "loss": 6.5677, "step": 24010 }, { "epoch": 0.32843147308762505, "grad_norm": 1.715675950050354, "learning_rate": 0.0004937453417028329, "loss": 6.6409, "step": 24020 }, { "epoch": 0.32856820559099204, "grad_norm": 3.725377321243286, "learning_rate": 0.0004937320728727432, "loss": 6.612, "step": 24030 }, { "epoch": 0.3287049380943591, "grad_norm": 1.9185165166854858, "learning_rate": 0.0004937187901617772, "loss": 6.629, "step": 24040 }, { "epoch": 0.3288416705977261, "grad_norm": 1.294115424156189, "learning_rate": 0.0004937054935706915, "loss": 6.8274, "step": 24050 }, { "epoch": 0.32897840310109316, "grad_norm": 18.192106246948242, "learning_rate": 0.0004936921831002431, "loss": 6.7996, "step": 24060 }, { "epoch": 0.3291151356044602, "grad_norm": 15.600470542907715, "learning_rate": 0.0004936788587511903, "loss": 6.7369, "step": 24070 }, { "epoch": 0.32925186810782725, "grad_norm": 1.9823623895645142, "learning_rate": 0.0004936655205242916, "loss": 6.6948, "step": 24080 }, { "epoch": 0.3293886006111943, "grad_norm": 2.5741140842437744, "learning_rate": 0.0004936521684203071, "loss": 6.426, "step": 24090 }, { "epoch": 0.32952533311456134, "grad_norm": 7.049862861633301, "learning_rate": 0.000493638802439997, "loss": 6.7921, "step": 24100 }, { "epoch": 0.3296620656179284, "grad_norm": 2.582935094833374, "learning_rate": 0.0004936254225841223, "loss": 6.7322, "step": 24110 }, { "epoch": 0.3297987981212954, "grad_norm": 1.997197151184082, "learning_rate": 0.0004936120288534453, "loss": 6.4776, "step": 24120 }, { "epoch": 0.32993553062466247, "grad_norm": 53.924442291259766, "learning_rate": 0.0004935986212487286, "loss": 6.982, "step": 24130 }, { "epoch": 0.33007226312802945, "grad_norm": 2.549048900604248, "learning_rate": 0.0004935851997707359, "loss": 6.5621, "step": 24140 }, { "epoch": 0.3302089956313965, "grad_norm": 44.37416458129883, "learning_rate": 0.0004935717644202316, "loss": 6.9094, "step": 24150 }, { "epoch": 0.33034572813476354, "grad_norm": 22.86566734313965, "learning_rate": 0.0004935583151979807, "loss": 6.5616, "step": 24160 }, { "epoch": 0.3304824606381306, "grad_norm": 27.095157623291016, "learning_rate": 0.0004935448521047493, "loss": 6.8647, "step": 24170 }, { "epoch": 0.3306191931414976, "grad_norm": 15.606440544128418, "learning_rate": 0.000493531375141304, "loss": 6.9912, "step": 24180 }, { "epoch": 0.33075592564486467, "grad_norm": 4.23590612411499, "learning_rate": 0.0004935178843084126, "loss": 6.7725, "step": 24190 }, { "epoch": 0.3308926581482317, "grad_norm": 2.151994228363037, "learning_rate": 0.0004935043796068431, "loss": 6.602, "step": 24200 }, { "epoch": 0.33102939065159875, "grad_norm": 1.8805431127548218, "learning_rate": 0.0004934908610373649, "loss": 6.9024, "step": 24210 }, { "epoch": 0.3311661231549658, "grad_norm": 4.220793724060059, "learning_rate": 0.0004934773286007476, "loss": 6.6481, "step": 24220 }, { "epoch": 0.33130285565833284, "grad_norm": 3.7366364002227783, "learning_rate": 0.000493463782297762, "loss": 6.9527, "step": 24230 }, { "epoch": 0.3314395881616999, "grad_norm": 9.74271011352539, "learning_rate": 0.0004934502221291797, "loss": 7.0219, "step": 24240 }, { "epoch": 0.33157632066506687, "grad_norm": 83.47611236572266, "learning_rate": 0.000493436648095773, "loss": 6.8897, "step": 24250 }, { "epoch": 0.3317130531684339, "grad_norm": 23.145736694335938, "learning_rate": 0.0004934230601983148, "loss": 6.8766, "step": 24260 }, { "epoch": 0.33184978567180096, "grad_norm": 4.696556568145752, "learning_rate": 0.0004934094584375789, "loss": 7.0784, "step": 24270 }, { "epoch": 0.331986518175168, "grad_norm": 3.285250186920166, "learning_rate": 0.0004933958428143401, "loss": 6.6971, "step": 24280 }, { "epoch": 0.33212325067853504, "grad_norm": 2.9140377044677734, "learning_rate": 0.0004933822133293738, "loss": 6.592, "step": 24290 }, { "epoch": 0.3322599831819021, "grad_norm": 49.99687576293945, "learning_rate": 0.0004933685699834563, "loss": 6.4101, "step": 24300 }, { "epoch": 0.33239671568526913, "grad_norm": 65.05448913574219, "learning_rate": 0.0004933549127773643, "loss": 6.6315, "step": 24310 }, { "epoch": 0.33253344818863617, "grad_norm": 2.3580732345581055, "learning_rate": 0.0004933412417118759, "loss": 6.2166, "step": 24320 }, { "epoch": 0.3326701806920032, "grad_norm": 63.02171325683594, "learning_rate": 0.0004933275567877695, "loss": 6.9504, "step": 24330 }, { "epoch": 0.33280691319537026, "grad_norm": 1.5191973447799683, "learning_rate": 0.0004933138580058246, "loss": 6.8329, "step": 24340 }, { "epoch": 0.3329436456987373, "grad_norm": 13865.9365234375, "learning_rate": 0.0004933001453668213, "loss": 6.8287, "step": 24350 }, { "epoch": 0.3330803782021043, "grad_norm": 69.26751708984375, "learning_rate": 0.0004932864188715406, "loss": 6.8423, "step": 24360 }, { "epoch": 0.33321711070547133, "grad_norm": 115.58488464355469, "learning_rate": 0.0004932726785207643, "loss": 7.4026, "step": 24370 }, { "epoch": 0.3333538432088384, "grad_norm": 55446.04296875, "learning_rate": 0.0004932589243152747, "loss": 6.7707, "step": 24380 }, { "epoch": 0.3334905757122054, "grad_norm": 71578.6015625, "learning_rate": 0.0004932451562558554, "loss": 6.8896, "step": 24390 }, { "epoch": 0.33362730821557246, "grad_norm": 1003536448.0, "learning_rate": 0.0004932313743432903, "loss": 7.5117, "step": 24400 }, { "epoch": 0.3337640407189395, "grad_norm": 17.804513931274414, "learning_rate": 0.0004932175785783644, "loss": 8.7728, "step": 24410 }, { "epoch": 0.33390077322230655, "grad_norm": 7.13243293762207, "learning_rate": 0.0004932037689618634, "loss": 9.4893, "step": 24420 }, { "epoch": 0.3340375057256736, "grad_norm": 11.112024307250977, "learning_rate": 0.0004931899454945736, "loss": 8.1549, "step": 24430 }, { "epoch": 0.33417423822904063, "grad_norm": 6.284102916717529, "learning_rate": 0.0004931761081772826, "loss": 7.3329, "step": 24440 }, { "epoch": 0.3343109707324077, "grad_norm": 1.9539909362792969, "learning_rate": 0.0004931622570107781, "loss": 6.701, "step": 24450 }, { "epoch": 0.3344477032357747, "grad_norm": 13.38683795928955, "learning_rate": 0.0004931483919958493, "loss": 6.8655, "step": 24460 }, { "epoch": 0.3345844357391417, "grad_norm": 267.98193359375, "learning_rate": 0.0004931345131332854, "loss": 6.6135, "step": 24470 }, { "epoch": 0.33472116824250875, "grad_norm": 9.788619041442871, "learning_rate": 0.0004931206204238772, "loss": 6.8152, "step": 24480 }, { "epoch": 0.3348579007458758, "grad_norm": 2.0903515815734863, "learning_rate": 0.0004931067138684158, "loss": 6.8664, "step": 24490 }, { "epoch": 0.33499463324924283, "grad_norm": 12.205268859863281, "learning_rate": 0.000493092793467693, "loss": 6.5611, "step": 24500 }, { "epoch": 0.3351313657526099, "grad_norm": 108.79271697998047, "learning_rate": 0.0004930788592225019, "loss": 6.7835, "step": 24510 }, { "epoch": 0.3352680982559769, "grad_norm": 2.491588830947876, "learning_rate": 0.0004930649111336359, "loss": 6.7552, "step": 24520 }, { "epoch": 0.33540483075934396, "grad_norm": 11.61126708984375, "learning_rate": 0.0004930509492018893, "loss": 6.7149, "step": 24530 }, { "epoch": 0.335541563262711, "grad_norm": 40.9195556640625, "learning_rate": 0.0004930369734280574, "loss": 6.8192, "step": 24540 }, { "epoch": 0.33567829576607805, "grad_norm": 2.6176512241363525, "learning_rate": 0.000493022983812936, "loss": 6.6279, "step": 24550 }, { "epoch": 0.3358150282694451, "grad_norm": 5.022276878356934, "learning_rate": 0.0004930089803573221, "loss": 7.3552, "step": 24560 }, { "epoch": 0.33595176077281214, "grad_norm": 23.799177169799805, "learning_rate": 0.0004929949630620128, "loss": 6.6491, "step": 24570 }, { "epoch": 0.3360884932761791, "grad_norm": 2.6715359687805176, "learning_rate": 0.0004929809319278068, "loss": 6.6338, "step": 24580 }, { "epoch": 0.33622522577954617, "grad_norm": 6.40479850769043, "learning_rate": 0.0004929668869555029, "loss": 6.9772, "step": 24590 }, { "epoch": 0.3363619582829132, "grad_norm": 2.5778491497039795, "learning_rate": 0.0004929528281459011, "loss": 6.6678, "step": 24600 }, { "epoch": 0.33649869078628025, "grad_norm": 307.1153259277344, "learning_rate": 0.0004929387554998022, "loss": 6.6133, "step": 24610 }, { "epoch": 0.3366354232896473, "grad_norm": 3.858125686645508, "learning_rate": 0.0004929246690180074, "loss": 6.7666, "step": 24620 }, { "epoch": 0.33677215579301434, "grad_norm": 7.4304914474487305, "learning_rate": 0.0004929105687013192, "loss": 6.8873, "step": 24630 }, { "epoch": 0.3369088882963814, "grad_norm": 1.7971301078796387, "learning_rate": 0.0004928964545505404, "loss": 6.7687, "step": 24640 }, { "epoch": 0.3370456207997484, "grad_norm": 180.00546264648438, "learning_rate": 0.0004928823265664748, "loss": 6.6682, "step": 24650 }, { "epoch": 0.33718235330311547, "grad_norm": 7.436458587646484, "learning_rate": 0.0004928681847499272, "loss": 6.9362, "step": 24660 }, { "epoch": 0.3373190858064825, "grad_norm": 175.88121032714844, "learning_rate": 0.0004928540291017031, "loss": 6.5637, "step": 24670 }, { "epoch": 0.33745581830984955, "grad_norm": 2.508345365524292, "learning_rate": 0.0004928398596226082, "loss": 6.6049, "step": 24680 }, { "epoch": 0.33759255081321654, "grad_norm": 9.702527046203613, "learning_rate": 0.00049282567631345, "loss": 6.5521, "step": 24690 }, { "epoch": 0.3377292833165836, "grad_norm": 5.579897403717041, "learning_rate": 0.000492811479175036, "loss": 6.8388, "step": 24700 }, { "epoch": 0.3378660158199506, "grad_norm": 5.539103984832764, "learning_rate": 0.0004927972682081749, "loss": 6.9236, "step": 24710 }, { "epoch": 0.33800274832331767, "grad_norm": 4.557441711425781, "learning_rate": 0.0004927830434136758, "loss": 6.7776, "step": 24720 }, { "epoch": 0.3381394808266847, "grad_norm": 5.6645402908325195, "learning_rate": 0.0004927688047923488, "loss": 6.8216, "step": 24730 }, { "epoch": 0.33827621333005176, "grad_norm": 5.687450885772705, "learning_rate": 0.0004927545523450051, "loss": 6.9564, "step": 24740 }, { "epoch": 0.3384129458334188, "grad_norm": 23.855409622192383, "learning_rate": 0.0004927402860724563, "loss": 6.7736, "step": 24750 }, { "epoch": 0.33854967833678584, "grad_norm": 6.166195392608643, "learning_rate": 0.0004927260059755148, "loss": 6.7985, "step": 24760 }, { "epoch": 0.3386864108401529, "grad_norm": 13.10020637512207, "learning_rate": 0.0004927117120549938, "loss": 7.0453, "step": 24770 }, { "epoch": 0.33882314334351993, "grad_norm": 8.85280990600586, "learning_rate": 0.0004926974043117075, "loss": 7.2866, "step": 24780 }, { "epoch": 0.3389598758468869, "grad_norm": 17.485361099243164, "learning_rate": 0.0004926830827464708, "loss": 6.4864, "step": 24790 }, { "epoch": 0.33909660835025396, "grad_norm": 2.3145060539245605, "learning_rate": 0.0004926687473600991, "loss": 7.1563, "step": 24800 }, { "epoch": 0.339233340853621, "grad_norm": 72.44425964355469, "learning_rate": 0.000492654398153409, "loss": 6.7632, "step": 24810 }, { "epoch": 0.33937007335698804, "grad_norm": 18.24273109436035, "learning_rate": 0.0004926400351272177, "loss": 6.8451, "step": 24820 }, { "epoch": 0.3395068058603551, "grad_norm": 5.598021507263184, "learning_rate": 0.0004926256582823431, "loss": 7.1622, "step": 24830 }, { "epoch": 0.33964353836372213, "grad_norm": 8.504746437072754, "learning_rate": 0.0004926112676196041, "loss": 6.8863, "step": 24840 }, { "epoch": 0.3397802708670892, "grad_norm": 15.161881446838379, "learning_rate": 0.0004925968631398202, "loss": 6.9316, "step": 24850 }, { "epoch": 0.3399170033704562, "grad_norm": 1.803688883781433, "learning_rate": 0.0004925824448438117, "loss": 6.8146, "step": 24860 }, { "epoch": 0.34005373587382326, "grad_norm": 3.0246198177337646, "learning_rate": 0.0004925680127324, "loss": 6.7058, "step": 24870 }, { "epoch": 0.3401904683771903, "grad_norm": 9.06529426574707, "learning_rate": 0.0004925535668064066, "loss": 7.0865, "step": 24880 }, { "epoch": 0.34032720088055735, "grad_norm": 3.7763278484344482, "learning_rate": 0.0004925391070666545, "loss": 6.5467, "step": 24890 }, { "epoch": 0.34046393338392433, "grad_norm": 5.317086219787598, "learning_rate": 0.0004925246335139672, "loss": 6.5873, "step": 24900 }, { "epoch": 0.3406006658872914, "grad_norm": 6.222352027893066, "learning_rate": 0.0004925101461491689, "loss": 6.8057, "step": 24910 }, { "epoch": 0.3407373983906584, "grad_norm": 23.633413314819336, "learning_rate": 0.0004924956449730847, "loss": 6.3087, "step": 24920 }, { "epoch": 0.34087413089402546, "grad_norm": 5.613703727722168, "learning_rate": 0.0004924811299865406, "loss": 6.6846, "step": 24930 }, { "epoch": 0.3410108633973925, "grad_norm": 2.394354820251465, "learning_rate": 0.000492466601190363, "loss": 6.8175, "step": 24940 }, { "epoch": 0.34114759590075955, "grad_norm": 32.0588264465332, "learning_rate": 0.0004924520585853796, "loss": 6.7606, "step": 24950 }, { "epoch": 0.3412843284041266, "grad_norm": 61.74367141723633, "learning_rate": 0.0004924375021724183, "loss": 6.9459, "step": 24960 }, { "epoch": 0.34142106090749363, "grad_norm": 10.485276222229004, "learning_rate": 0.0004924229319523084, "loss": 6.5807, "step": 24970 }, { "epoch": 0.3415577934108607, "grad_norm": 2.8167757987976074, "learning_rate": 0.0004924083479258795, "loss": 7.1311, "step": 24980 }, { "epoch": 0.3416945259142277, "grad_norm": 32.559776306152344, "learning_rate": 0.0004923937500939624, "loss": 6.7977, "step": 24990 }, { "epoch": 0.34183125841759476, "grad_norm": 971.3616943359375, "learning_rate": 0.0004923791384573882, "loss": 6.6754, "step": 25000 }, { "epoch": 0.34196799092096175, "grad_norm": 15.69559383392334, "learning_rate": 0.0004923645130169892, "loss": 6.6966, "step": 25010 }, { "epoch": 0.3421047234243288, "grad_norm": 2.9009668827056885, "learning_rate": 0.0004923498737735983, "loss": 6.5224, "step": 25020 }, { "epoch": 0.34224145592769584, "grad_norm": 12.266546249389648, "learning_rate": 0.0004923352207280493, "loss": 6.77, "step": 25030 }, { "epoch": 0.3423781884310629, "grad_norm": 123.31204986572266, "learning_rate": 0.0004923205538811767, "loss": 6.727, "step": 25040 }, { "epoch": 0.3425149209344299, "grad_norm": 12.348726272583008, "learning_rate": 0.0004923058732338157, "loss": 7.1028, "step": 25050 }, { "epoch": 0.34265165343779697, "grad_norm": 1237.454345703125, "learning_rate": 0.0004922911787868024, "loss": 6.9466, "step": 25060 }, { "epoch": 0.342788385941164, "grad_norm": 25.899394989013672, "learning_rate": 0.0004922764705409738, "loss": 6.9974, "step": 25070 }, { "epoch": 0.34292511844453105, "grad_norm": 26.984073638916016, "learning_rate": 0.0004922617484971674, "loss": 6.8828, "step": 25080 }, { "epoch": 0.3430618509478981, "grad_norm": 43.39320755004883, "learning_rate": 0.0004922470126562217, "loss": 6.5729, "step": 25090 }, { "epoch": 0.34319858345126514, "grad_norm": 5.010589599609375, "learning_rate": 0.000492232263018976, "loss": 6.5056, "step": 25100 }, { "epoch": 0.3433353159546322, "grad_norm": 5.044241905212402, "learning_rate": 0.00049221749958627, "loss": 6.6214, "step": 25110 }, { "epoch": 0.34347204845799917, "grad_norm": 17.207717895507812, "learning_rate": 0.0004922027223589451, "loss": 6.9719, "step": 25120 }, { "epoch": 0.3436087809613662, "grad_norm": 30.364912033081055, "learning_rate": 0.0004921879313378422, "loss": 6.9901, "step": 25130 }, { "epoch": 0.34374551346473325, "grad_norm": 17.732812881469727, "learning_rate": 0.0004921731265238042, "loss": 6.8888, "step": 25140 }, { "epoch": 0.3438822459681003, "grad_norm": 9.42012882232666, "learning_rate": 0.0004921583079176739, "loss": 6.7585, "step": 25150 }, { "epoch": 0.34401897847146734, "grad_norm": 21.828174591064453, "learning_rate": 0.0004921434755202955, "loss": 6.6738, "step": 25160 }, { "epoch": 0.3441557109748344, "grad_norm": 520.0186767578125, "learning_rate": 0.0004921286293325136, "loss": 6.6816, "step": 25170 }, { "epoch": 0.3442924434782014, "grad_norm": 42279.8515625, "learning_rate": 0.0004921137693551736, "loss": 6.594, "step": 25180 }, { "epoch": 0.34442917598156847, "grad_norm": 929555776.0, "learning_rate": 0.000492098895589122, "loss": 7.215, "step": 25190 }, { "epoch": 0.3445659084849355, "grad_norm": 252131.0625, "learning_rate": 0.0004920840080352058, "loss": 8.6935, "step": 25200 }, { "epoch": 0.34470264098830256, "grad_norm": 233.14266967773438, "learning_rate": 0.0004920691066942728, "loss": 11.587, "step": 25210 }, { "epoch": 0.3448393734916696, "grad_norm": 1501.1309814453125, "learning_rate": 0.0004920541915671719, "loss": 14.1028, "step": 25220 }, { "epoch": 0.3449761059950366, "grad_norm": 894.546142578125, "learning_rate": 0.0004920392626547522, "loss": 13.3492, "step": 25230 }, { "epoch": 0.34511283849840363, "grad_norm": 621.9090576171875, "learning_rate": 0.0004920243199578641, "loss": 13.3351, "step": 25240 }, { "epoch": 0.34524957100177067, "grad_norm": 81845712.0, "learning_rate": 0.0004920093634773586, "loss": 11.4958, "step": 25250 }, { "epoch": 0.3453863035051377, "grad_norm": 3005.732177734375, "learning_rate": 0.0004919943932140874, "loss": 11.5935, "step": 25260 }, { "epoch": 0.34552303600850476, "grad_norm": 59887.44921875, "learning_rate": 0.0004919794091689033, "loss": 11.8426, "step": 25270 }, { "epoch": 0.3456597685118718, "grad_norm": 7201.54052734375, "learning_rate": 0.0004919644113426595, "loss": 13.5544, "step": 25280 }, { "epoch": 0.34579650101523884, "grad_norm": 12162.7392578125, "learning_rate": 0.00049194939973621, "loss": 15.1056, "step": 25290 }, { "epoch": 0.3459332335186059, "grad_norm": 45475.91015625, "learning_rate": 0.00049193437435041, "loss": 13.0384, "step": 25300 }, { "epoch": 0.34606996602197293, "grad_norm": 32349018.0, "learning_rate": 0.0004919193351861151, "loss": 12.1646, "step": 25310 }, { "epoch": 0.34620669852534, "grad_norm": 1776.4970703125, "learning_rate": 0.0004919042822441818, "loss": 13.6058, "step": 25320 }, { "epoch": 0.346343431028707, "grad_norm": 123.46666717529297, "learning_rate": 0.0004918892155254674, "loss": 14.5425, "step": 25330 }, { "epoch": 0.346480163532074, "grad_norm": 2696672.25, "learning_rate": 0.00049187413503083, "loss": 16.0468, "step": 25340 }, { "epoch": 0.34661689603544105, "grad_norm": 15088.724609375, "learning_rate": 0.0004918590407611284, "loss": 18.9862, "step": 25350 }, { "epoch": 0.3467536285388081, "grad_norm": 74.69071197509766, "learning_rate": 0.0004918439327172222, "loss": 18.8327, "step": 25360 }, { "epoch": 0.34689036104217513, "grad_norm": 136.3649139404297, "learning_rate": 0.000491828810899972, "loss": 13.8938, "step": 25370 }, { "epoch": 0.3470270935455422, "grad_norm": 329.7280578613281, "learning_rate": 0.0004918136753102388, "loss": 11.456, "step": 25380 }, { "epoch": 0.3471638260489092, "grad_norm": 3169.94873046875, "learning_rate": 0.0004917985259488847, "loss": 10.5848, "step": 25390 }, { "epoch": 0.34730055855227626, "grad_norm": 6.134954452514648, "learning_rate": 0.0004917833628167725, "loss": 8.6625, "step": 25400 }, { "epoch": 0.3474372910556433, "grad_norm": 14.874661445617676, "learning_rate": 0.0004917681859147656, "loss": 8.1076, "step": 25410 }, { "epoch": 0.34757402355901035, "grad_norm": 27.49446678161621, "learning_rate": 0.0004917529952437286, "loss": 7.4739, "step": 25420 }, { "epoch": 0.3477107560623774, "grad_norm": 10.095922470092773, "learning_rate": 0.0004917377908045265, "loss": 7.5334, "step": 25430 }, { "epoch": 0.3478474885657444, "grad_norm": 138.01043701171875, "learning_rate": 0.0004917225725980252, "loss": 8.074, "step": 25440 }, { "epoch": 0.3479842210691114, "grad_norm": 41.79126739501953, "learning_rate": 0.0004917073406250914, "loss": 8.6648, "step": 25450 }, { "epoch": 0.34812095357247846, "grad_norm": 2.6584413051605225, "learning_rate": 0.0004916920948865925, "loss": 7.1512, "step": 25460 }, { "epoch": 0.3482576860758455, "grad_norm": 5.821140289306641, "learning_rate": 0.0004916768353833969, "loss": 6.9908, "step": 25470 }, { "epoch": 0.34839441857921255, "grad_norm": 13.430356979370117, "learning_rate": 0.0004916615621163737, "loss": 7.3914, "step": 25480 }, { "epoch": 0.3485311510825796, "grad_norm": 8.057049751281738, "learning_rate": 0.0004916462750863926, "loss": 7.4778, "step": 25490 }, { "epoch": 0.34866788358594664, "grad_norm": 2400.87109375, "learning_rate": 0.0004916309742943242, "loss": 8.4959, "step": 25500 }, { "epoch": 0.3488046160893137, "grad_norm": 347.0770568847656, "learning_rate": 0.00049161565974104, "loss": 7.615, "step": 25510 }, { "epoch": 0.3489413485926807, "grad_norm": 49.47767639160156, "learning_rate": 0.0004916003314274121, "loss": 8.1816, "step": 25520 }, { "epoch": 0.34907808109604777, "grad_norm": 36.319400787353516, "learning_rate": 0.0004915849893543136, "loss": 7.4877, "step": 25530 }, { "epoch": 0.3492148135994148, "grad_norm": 228.33583068847656, "learning_rate": 0.0004915696335226181, "loss": 7.3813, "step": 25540 }, { "epoch": 0.3493515461027818, "grad_norm": 37.164146423339844, "learning_rate": 0.0004915542639332002, "loss": 7.293, "step": 25550 }, { "epoch": 0.34948827860614884, "grad_norm": 24703.6640625, "learning_rate": 0.0004915388805869352, "loss": 7.7794, "step": 25560 }, { "epoch": 0.3496250111095159, "grad_norm": 2388.670654296875, "learning_rate": 0.0004915234834846993, "loss": 7.3232, "step": 25570 }, { "epoch": 0.3497617436128829, "grad_norm": 174.62464904785156, "learning_rate": 0.0004915080726273693, "loss": 9.6366, "step": 25580 }, { "epoch": 0.34989847611624997, "grad_norm": 580.487548828125, "learning_rate": 0.0004914926480158228, "loss": 11.4712, "step": 25590 }, { "epoch": 0.350035208619617, "grad_norm": 5107.759765625, "learning_rate": 0.0004914772096509384, "loss": 10.6872, "step": 25600 }, { "epoch": 0.35017194112298405, "grad_norm": 3700.8037109375, "learning_rate": 0.0004914617575335952, "loss": 10.3411, "step": 25610 }, { "epoch": 0.3503086736263511, "grad_norm": 1286.5262451171875, "learning_rate": 0.0004914462916646734, "loss": 11.0036, "step": 25620 }, { "epoch": 0.35044540612971814, "grad_norm": 51.23128890991211, "learning_rate": 0.0004914308120450535, "loss": 9.6572, "step": 25630 }, { "epoch": 0.3505821386330852, "grad_norm": 1473.57080078125, "learning_rate": 0.0004914153186756174, "loss": 9.2449, "step": 25640 }, { "epoch": 0.3507188711364522, "grad_norm": 30527.939453125, "learning_rate": 0.0004913998115572473, "loss": 8.9237, "step": 25650 }, { "epoch": 0.3508556036398192, "grad_norm": 30.658109664916992, "learning_rate": 0.0004913842906908264, "loss": 8.1718, "step": 25660 }, { "epoch": 0.35099233614318626, "grad_norm": 47.46453094482422, "learning_rate": 0.0004913687560772387, "loss": 8.6747, "step": 25670 }, { "epoch": 0.3511290686465533, "grad_norm": 49.056209564208984, "learning_rate": 0.0004913532077173688, "loss": 7.7315, "step": 25680 }, { "epoch": 0.35126580114992034, "grad_norm": 14.553942680358887, "learning_rate": 0.0004913376456121023, "loss": 7.9917, "step": 25690 }, { "epoch": 0.3514025336532874, "grad_norm": 1.675055742263794, "learning_rate": 0.0004913220697623253, "loss": 7.438, "step": 25700 }, { "epoch": 0.35153926615665443, "grad_norm": 49.50114440917969, "learning_rate": 0.0004913064801689251, "loss": 7.7103, "step": 25710 }, { "epoch": 0.35167599866002147, "grad_norm": 88.58651733398438, "learning_rate": 0.0004912908768327893, "loss": 7.9371, "step": 25720 }, { "epoch": 0.3518127311633885, "grad_norm": 254.4914093017578, "learning_rate": 0.000491275259754807, "loss": 8.1477, "step": 25730 }, { "epoch": 0.35194946366675556, "grad_norm": 16.660327911376953, "learning_rate": 0.000491259628935867, "loss": 8.316, "step": 25740 }, { "epoch": 0.3520861961701226, "grad_norm": 122.08984375, "learning_rate": 0.0004912439843768599, "loss": 7.7976, "step": 25750 }, { "epoch": 0.35222292867348964, "grad_norm": 185.72695922851562, "learning_rate": 0.0004912283260786766, "loss": 8.0858, "step": 25760 }, { "epoch": 0.35235966117685663, "grad_norm": 458.0107727050781, "learning_rate": 0.0004912126540422086, "loss": 7.8331, "step": 25770 }, { "epoch": 0.3524963936802237, "grad_norm": 180.60781860351562, "learning_rate": 0.0004911969682683489, "loss": 7.9668, "step": 25780 }, { "epoch": 0.3526331261835907, "grad_norm": 8195.1435546875, "learning_rate": 0.0004911812687579905, "loss": 8.0357, "step": 25790 }, { "epoch": 0.35276985868695776, "grad_norm": 35464.29296875, "learning_rate": 0.0004911655555120277, "loss": 7.8385, "step": 25800 }, { "epoch": 0.3529065911903248, "grad_norm": 11838.9267578125, "learning_rate": 0.0004911498285313551, "loss": 7.2016, "step": 25810 }, { "epoch": 0.35304332369369185, "grad_norm": 38.24301528930664, "learning_rate": 0.0004911340878168687, "loss": 7.5742, "step": 25820 }, { "epoch": 0.3531800561970589, "grad_norm": 8.578469276428223, "learning_rate": 0.0004911183333694649, "loss": 7.3027, "step": 25830 }, { "epoch": 0.35331678870042593, "grad_norm": 10.092991828918457, "learning_rate": 0.0004911025651900408, "loss": 7.3749, "step": 25840 }, { "epoch": 0.353453521203793, "grad_norm": 3672.542236328125, "learning_rate": 0.0004910867832794945, "loss": 7.1835, "step": 25850 }, { "epoch": 0.35359025370716, "grad_norm": 48.43090057373047, "learning_rate": 0.0004910709876387247, "loss": 7.5682, "step": 25860 }, { "epoch": 0.35372698621052706, "grad_norm": 230.58514404296875, "learning_rate": 0.0004910551782686312, "loss": 7.5339, "step": 25870 }, { "epoch": 0.35386371871389405, "grad_norm": 244.30946350097656, "learning_rate": 0.0004910393551701141, "loss": 7.3633, "step": 25880 }, { "epoch": 0.3540004512172611, "grad_norm": 775.7923583984375, "learning_rate": 0.0004910235183440747, "loss": 7.4869, "step": 25890 }, { "epoch": 0.35413718372062813, "grad_norm": 268.34857177734375, "learning_rate": 0.0004910076677914151, "loss": 7.2417, "step": 25900 }, { "epoch": 0.3542739162239952, "grad_norm": 104.6951904296875, "learning_rate": 0.0004909918035130377, "loss": 7.6437, "step": 25910 }, { "epoch": 0.3544106487273622, "grad_norm": 33.828575134277344, "learning_rate": 0.0004909759255098459, "loss": 7.4955, "step": 25920 }, { "epoch": 0.35454738123072926, "grad_norm": 113.46275329589844, "learning_rate": 0.0004909600337827444, "loss": 7.0507, "step": 25930 }, { "epoch": 0.3546841137340963, "grad_norm": 16545.95703125, "learning_rate": 0.000490944128332638, "loss": 7.5698, "step": 25940 }, { "epoch": 0.35482084623746335, "grad_norm": 5.182306289672852, "learning_rate": 0.0004909282091604326, "loss": 7.3159, "step": 25950 }, { "epoch": 0.3549575787408304, "grad_norm": 920.7386474609375, "learning_rate": 0.0004909122762670348, "loss": 7.2192, "step": 25960 }, { "epoch": 0.35509431124419744, "grad_norm": 14.556685447692871, "learning_rate": 0.000490896329653352, "loss": 7.5657, "step": 25970 }, { "epoch": 0.3552310437475645, "grad_norm": 7155.22216796875, "learning_rate": 0.0004908803693202924, "loss": 7.6742, "step": 25980 }, { "epoch": 0.35536777625093147, "grad_norm": 2.7739038467407227, "learning_rate": 0.0004908643952687649, "loss": 7.5526, "step": 25990 }, { "epoch": 0.3555045087542985, "grad_norm": 18.61375617980957, "learning_rate": 0.0004908484074996793, "loss": 7.9284, "step": 26000 }, { "epoch": 0.35564124125766555, "grad_norm": 67.6688003540039, "learning_rate": 0.0004908324060139462, "loss": 7.5908, "step": 26010 }, { "epoch": 0.3557779737610326, "grad_norm": 23.829336166381836, "learning_rate": 0.0004908163908124766, "loss": 7.6703, "step": 26020 }, { "epoch": 0.35591470626439964, "grad_norm": 29.886947631835938, "learning_rate": 0.000490800361896183, "loss": 7.6366, "step": 26030 }, { "epoch": 0.3560514387677667, "grad_norm": 1060.5623779296875, "learning_rate": 0.000490784319265978, "loss": 7.5661, "step": 26040 }, { "epoch": 0.3561881712711337, "grad_norm": 33.62939453125, "learning_rate": 0.0004907682629227753, "loss": 7.3782, "step": 26050 }, { "epoch": 0.35632490377450077, "grad_norm": 297.3657531738281, "learning_rate": 0.0004907521928674893, "loss": 7.6487, "step": 26060 }, { "epoch": 0.3564616362778678, "grad_norm": 440.7185363769531, "learning_rate": 0.0004907361091010355, "loss": 7.4058, "step": 26070 }, { "epoch": 0.35659836878123485, "grad_norm": 42.31317901611328, "learning_rate": 0.0004907200116243295, "loss": 7.4867, "step": 26080 }, { "epoch": 0.3567351012846019, "grad_norm": 443.23944091796875, "learning_rate": 0.0004907039004382883, "loss": 7.389, "step": 26090 }, { "epoch": 0.3568718337879689, "grad_norm": 1614.5224609375, "learning_rate": 0.0004906877755438293, "loss": 7.1857, "step": 26100 }, { "epoch": 0.3570085662913359, "grad_norm": 5787.11376953125, "learning_rate": 0.000490671636941871, "loss": 7.4514, "step": 26110 }, { "epoch": 0.35714529879470297, "grad_norm": 1077.98828125, "learning_rate": 0.0004906554846333324, "loss": 7.6689, "step": 26120 }, { "epoch": 0.35728203129807, "grad_norm": 84.92243957519531, "learning_rate": 0.0004906393186191334, "loss": 7.5784, "step": 26130 }, { "epoch": 0.35741876380143706, "grad_norm": 229.51907348632812, "learning_rate": 0.0004906231389001948, "loss": 7.1099, "step": 26140 }, { "epoch": 0.3575554963048041, "grad_norm": 65.69464874267578, "learning_rate": 0.0004906069454774379, "loss": 6.9105, "step": 26150 }, { "epoch": 0.35769222880817114, "grad_norm": 74.0092544555664, "learning_rate": 0.000490590738351785, "loss": 7.285, "step": 26160 }, { "epoch": 0.3578289613115382, "grad_norm": 131.9727783203125, "learning_rate": 0.0004905745175241592, "loss": 7.4485, "step": 26170 }, { "epoch": 0.35796569381490523, "grad_norm": 387.2841796875, "learning_rate": 0.0004905582829954841, "loss": 7.362, "step": 26180 }, { "epoch": 0.35810242631827227, "grad_norm": 20.650203704833984, "learning_rate": 0.0004905420347666845, "loss": 7.3622, "step": 26190 }, { "epoch": 0.35823915882163926, "grad_norm": 72.32485961914062, "learning_rate": 0.0004905257728386856, "loss": 7.4205, "step": 26200 }, { "epoch": 0.3583758913250063, "grad_norm": 57.07978820800781, "learning_rate": 0.0004905094972124137, "loss": 7.3812, "step": 26210 }, { "epoch": 0.35851262382837334, "grad_norm": 158.3357391357422, "learning_rate": 0.0004904932078887955, "loss": 7.1993, "step": 26220 }, { "epoch": 0.3586493563317404, "grad_norm": 144.7058868408203, "learning_rate": 0.0004904769048687589, "loss": 7.3491, "step": 26230 }, { "epoch": 0.35878608883510743, "grad_norm": 768.4950561523438, "learning_rate": 0.0004904605881532322, "loss": 6.9829, "step": 26240 }, { "epoch": 0.3589228213384745, "grad_norm": 56.86876678466797, "learning_rate": 0.0004904442577431449, "loss": 7.1756, "step": 26250 }, { "epoch": 0.3590595538418415, "grad_norm": 33817.5546875, "learning_rate": 0.0004904279136394268, "loss": 7.3949, "step": 26260 }, { "epoch": 0.35919628634520856, "grad_norm": 164.82455444335938, "learning_rate": 0.0004904115558430089, "loss": 7.0022, "step": 26270 }, { "epoch": 0.3593330188485756, "grad_norm": 45.71158218383789, "learning_rate": 0.0004903951843548227, "loss": 7.13, "step": 26280 }, { "epoch": 0.35946975135194265, "grad_norm": 1023.5829467773438, "learning_rate": 0.0004903787991758005, "loss": 6.9978, "step": 26290 }, { "epoch": 0.3596064838553097, "grad_norm": 87.10894012451172, "learning_rate": 0.0004903624003068756, "loss": 7.3833, "step": 26300 }, { "epoch": 0.3597432163586767, "grad_norm": 43.368446350097656, "learning_rate": 0.0004903459877489819, "loss": 7.2632, "step": 26310 }, { "epoch": 0.3598799488620437, "grad_norm": 32.59264373779297, "learning_rate": 0.000490329561503054, "loss": 7.1651, "step": 26320 }, { "epoch": 0.36001668136541076, "grad_norm": 66.45183563232422, "learning_rate": 0.0004903131215700276, "loss": 7.8788, "step": 26330 }, { "epoch": 0.3601534138687778, "grad_norm": 135.9302978515625, "learning_rate": 0.0004902966679508389, "loss": 8.3245, "step": 26340 }, { "epoch": 0.36029014637214485, "grad_norm": 525.1259765625, "learning_rate": 0.0004902802006464249, "loss": 8.1749, "step": 26350 }, { "epoch": 0.3604268788755119, "grad_norm": 2634.504638671875, "learning_rate": 0.0004902637196577235, "loss": 7.9839, "step": 26360 }, { "epoch": 0.36056361137887893, "grad_norm": 47.2863655090332, "learning_rate": 0.0004902472249856733, "loss": 8.0846, "step": 26370 }, { "epoch": 0.360700343882246, "grad_norm": 407.07769775390625, "learning_rate": 0.0004902307166312137, "loss": 7.8584, "step": 26380 }, { "epoch": 0.360837076385613, "grad_norm": 369.9409484863281, "learning_rate": 0.0004902141945952848, "loss": 7.7329, "step": 26390 }, { "epoch": 0.36097380888898006, "grad_norm": 188.23849487304688, "learning_rate": 0.0004901976588788276, "loss": 7.3611, "step": 26400 }, { "epoch": 0.3611105413923471, "grad_norm": 620354.125, "learning_rate": 0.0004901811094827839, "loss": 7.0631, "step": 26410 }, { "epoch": 0.3612472738957141, "grad_norm": 36801.6015625, "learning_rate": 0.000490164546408096, "loss": 7.7714, "step": 26420 }, { "epoch": 0.36138400639908114, "grad_norm": 50.724605560302734, "learning_rate": 0.0004901479696557075, "loss": 7.5188, "step": 26430 }, { "epoch": 0.3615207389024482, "grad_norm": 20.183839797973633, "learning_rate": 0.0004901313792265622, "loss": 7.4282, "step": 26440 }, { "epoch": 0.3616574714058152, "grad_norm": 16.314834594726562, "learning_rate": 0.0004901147751216052, "loss": 7.036, "step": 26450 }, { "epoch": 0.36179420390918227, "grad_norm": 4.474398612976074, "learning_rate": 0.0004900981573417819, "loss": 7.2916, "step": 26460 }, { "epoch": 0.3619309364125493, "grad_norm": 29.249704360961914, "learning_rate": 0.0004900815258880388, "loss": 6.844, "step": 26470 }, { "epoch": 0.36206766891591635, "grad_norm": 61.98725128173828, "learning_rate": 0.000490064880761323, "loss": 7.3624, "step": 26480 }, { "epoch": 0.3622044014192834, "grad_norm": 9.659233093261719, "learning_rate": 0.0004900482219625827, "loss": 6.9081, "step": 26490 }, { "epoch": 0.36234113392265044, "grad_norm": 117.28099822998047, "learning_rate": 0.0004900315494927663, "loss": 7.4991, "step": 26500 }, { "epoch": 0.3624778664260175, "grad_norm": 9.188996315002441, "learning_rate": 0.0004900148633528237, "loss": 7.0064, "step": 26510 }, { "epoch": 0.3626145989293845, "grad_norm": 5.370859622955322, "learning_rate": 0.0004899981635437048, "loss": 7.4541, "step": 26520 }, { "epoch": 0.3627513314327515, "grad_norm": 1.9971107244491577, "learning_rate": 0.0004899814500663609, "loss": 7.2865, "step": 26530 }, { "epoch": 0.36288806393611855, "grad_norm": 18.66511344909668, "learning_rate": 0.0004899647229217439, "loss": 7.1152, "step": 26540 }, { "epoch": 0.3630247964394856, "grad_norm": 327.0421447753906, "learning_rate": 0.0004899479821108063, "loss": 6.937, "step": 26550 }, { "epoch": 0.36316152894285264, "grad_norm": 1420.69873046875, "learning_rate": 0.0004899312276345017, "loss": 7.01, "step": 26560 }, { "epoch": 0.3632982614462197, "grad_norm": 53.45705795288086, "learning_rate": 0.0004899144594937841, "loss": 7.0683, "step": 26570 }, { "epoch": 0.3634349939495867, "grad_norm": 28.167194366455078, "learning_rate": 0.0004898976776896085, "loss": 6.8597, "step": 26580 }, { "epoch": 0.36357172645295377, "grad_norm": 24.412538528442383, "learning_rate": 0.0004898808822229307, "loss": 7.0275, "step": 26590 }, { "epoch": 0.3637084589563208, "grad_norm": 41.84207534790039, "learning_rate": 0.0004898640730947072, "loss": 7.0959, "step": 26600 }, { "epoch": 0.36384519145968786, "grad_norm": 233.5434112548828, "learning_rate": 0.0004898472503058953, "loss": 7.0982, "step": 26610 }, { "epoch": 0.3639819239630549, "grad_norm": 51755.46875, "learning_rate": 0.0004898304138574531, "loss": 7.3083, "step": 26620 }, { "epoch": 0.36411865646642194, "grad_norm": 566.0274047851562, "learning_rate": 0.0004898135637503395, "loss": 7.1313, "step": 26630 }, { "epoch": 0.36425538896978893, "grad_norm": 417.6748046875, "learning_rate": 0.000489796699985514, "loss": 6.8766, "step": 26640 }, { "epoch": 0.36439212147315597, "grad_norm": 316.9483642578125, "learning_rate": 0.0004897798225639372, "loss": 7.3432, "step": 26650 }, { "epoch": 0.364528853976523, "grad_norm": 1611.781494140625, "learning_rate": 0.00048976293148657, "loss": 7.1561, "step": 26660 }, { "epoch": 0.36466558647989006, "grad_norm": 152.07192993164062, "learning_rate": 0.0004897460267543748, "loss": 7.1757, "step": 26670 }, { "epoch": 0.3648023189832571, "grad_norm": 570.7141723632812, "learning_rate": 0.0004897291083683139, "loss": 7.2116, "step": 26680 }, { "epoch": 0.36493905148662414, "grad_norm": 652.3390502929688, "learning_rate": 0.0004897121763293512, "loss": 7.2584, "step": 26690 }, { "epoch": 0.3650757839899912, "grad_norm": 1012439.125, "learning_rate": 0.0004896952306384508, "loss": 8.5283, "step": 26700 }, { "epoch": 0.36521251649335823, "grad_norm": 1159717715968.0, "learning_rate": 0.0004896782712965778, "loss": 7.6674, "step": 26710 }, { "epoch": 0.3653492489967253, "grad_norm": 1293686293921792.0, "learning_rate": 0.000489661298304698, "loss": 7.8805, "step": 26720 }, { "epoch": 0.3654859815000923, "grad_norm": NaN, "learning_rate": 0.0004896443116637782, "loss": 16.4176, "step": 26730 }, { "epoch": 0.36562271400345936, "grad_norm": NaN, "learning_rate": 0.0004896273113747856, "loss": 0.0, "step": 26740 }, { "epoch": 0.36575944650682635, "grad_norm": NaN, "learning_rate": 0.0004896102974386887, "loss": 0.0, "step": 26750 }, { "epoch": 0.3658961790101934, "grad_norm": NaN, "learning_rate": 0.0004895932698564562, "loss": 0.0, "step": 26760 }, { "epoch": 0.36603291151356043, "grad_norm": NaN, "learning_rate": 0.0004895762286290579, "loss": 0.0, "step": 26770 }, { "epoch": 0.3661696440169275, "grad_norm": NaN, "learning_rate": 0.0004895591737574643, "loss": 0.0, "step": 26780 }, { "epoch": 0.3663063765202945, "grad_norm": NaN, "learning_rate": 0.0004895421052426469, "loss": 0.0, "step": 26790 }, { "epoch": 0.36644310902366156, "grad_norm": NaN, "learning_rate": 0.0004895250230855776, "loss": 0.0, "step": 26800 }, { "epoch": 0.3665798415270286, "grad_norm": NaN, "learning_rate": 0.0004895079272872291, "loss": 0.0, "step": 26810 }, { "epoch": 0.36671657403039565, "grad_norm": NaN, "learning_rate": 0.0004894908178485753, "loss": 0.0, "step": 26820 }, { "epoch": 0.3668533065337627, "grad_norm": NaN, "learning_rate": 0.0004894736947705906, "loss": 0.0, "step": 26830 }, { "epoch": 0.36699003903712973, "grad_norm": NaN, "learning_rate": 0.0004894565580542499, "loss": 0.0, "step": 26840 }, { "epoch": 0.3671267715404968, "grad_norm": NaN, "learning_rate": 0.0004894394077005295, "loss": 0.0, "step": 26850 }, { "epoch": 0.36726350404386376, "grad_norm": NaN, "learning_rate": 0.0004894222437104059, "loss": 0.0, "step": 26860 }, { "epoch": 0.3674002365472308, "grad_norm": NaN, "learning_rate": 0.0004894050660848568, "loss": 0.0, "step": 26870 }, { "epoch": 0.36753696905059785, "grad_norm": NaN, "learning_rate": 0.0004893878748248603, "loss": 0.0, "step": 26880 }, { "epoch": 0.3676737015539649, "grad_norm": NaN, "learning_rate": 0.0004893706699313956, "loss": 0.0, "step": 26890 }, { "epoch": 0.36781043405733194, "grad_norm": NaN, "learning_rate": 0.0004893534514054424, "loss": 0.0, "step": 26900 }, { "epoch": 0.367947166560699, "grad_norm": NaN, "learning_rate": 0.0004893362192479816, "loss": 0.0, "step": 26910 }, { "epoch": 0.368083899064066, "grad_norm": NaN, "learning_rate": 0.0004893189734599942, "loss": 0.0, "step": 26920 }, { "epoch": 0.36822063156743307, "grad_norm": NaN, "learning_rate": 0.0004893017140424628, "loss": 0.0, "step": 26930 }, { "epoch": 0.3683573640708001, "grad_norm": NaN, "learning_rate": 0.0004892844409963699, "loss": 0.0, "step": 26940 }, { "epoch": 0.36849409657416715, "grad_norm": NaN, "learning_rate": 0.0004892671543226996, "loss": 0.0, "step": 26950 }, { "epoch": 0.36863082907753414, "grad_norm": NaN, "learning_rate": 0.0004892498540224363, "loss": 0.0, "step": 26960 }, { "epoch": 0.3687675615809012, "grad_norm": NaN, "learning_rate": 0.000489232540096565, "loss": 0.0, "step": 26970 }, { "epoch": 0.3689042940842682, "grad_norm": NaN, "learning_rate": 0.0004892152125460723, "loss": 0.0, "step": 26980 }, { "epoch": 0.36904102658763527, "grad_norm": NaN, "learning_rate": 0.0004891978713719445, "loss": 0.0, "step": 26990 }, { "epoch": 0.3691777590910023, "grad_norm": NaN, "learning_rate": 0.0004891805165751693, "loss": 0.0, "step": 27000 }, { "epoch": 0.36931449159436935, "grad_norm": NaN, "learning_rate": 0.0004891631481567355, "loss": 0.0, "step": 27010 }, { "epoch": 0.3694512240977364, "grad_norm": NaN, "learning_rate": 0.0004891457661176318, "loss": 0.0, "step": 27020 }, { "epoch": 0.36958795660110344, "grad_norm": NaN, "learning_rate": 0.0004891283704588482, "loss": 0.0, "step": 27030 }, { "epoch": 0.3697246891044705, "grad_norm": NaN, "learning_rate": 0.0004891109611813756, "loss": 0.0, "step": 27040 }, { "epoch": 0.3698614216078375, "grad_norm": NaN, "learning_rate": 0.0004890935382862053, "loss": 0.0, "step": 27050 }, { "epoch": 0.36999815411120457, "grad_norm": NaN, "learning_rate": 0.0004890761017743296, "loss": 0.0, "step": 27060 }, { "epoch": 0.37013488661457156, "grad_norm": NaN, "learning_rate": 0.0004890586516467416, "loss": 0.0, "step": 27070 }, { "epoch": 0.3702716191179386, "grad_norm": NaN, "learning_rate": 0.0004890411879044351, "loss": 0.0, "step": 27080 }, { "epoch": 0.37040835162130564, "grad_norm": NaN, "learning_rate": 0.0004890237105484045, "loss": 0.0, "step": 27090 }, { "epoch": 0.3705450841246727, "grad_norm": NaN, "learning_rate": 0.0004890062195796454, "loss": 0.0, "step": 27100 }, { "epoch": 0.37068181662803973, "grad_norm": NaN, "learning_rate": 0.000488988714999154, "loss": 0.0, "step": 27110 }, { "epoch": 0.37081854913140677, "grad_norm": NaN, "learning_rate": 0.0004889711968079269, "loss": 0.0, "step": 27120 }, { "epoch": 0.3709552816347738, "grad_norm": NaN, "learning_rate": 0.0004889536650069619, "loss": 0.0, "step": 27130 }, { "epoch": 0.37109201413814086, "grad_norm": NaN, "learning_rate": 0.0004889361195972576, "loss": 0.0, "step": 27140 }, { "epoch": 0.3712287466415079, "grad_norm": NaN, "learning_rate": 0.0004889185605798131, "loss": 0.0, "step": 27150 }, { "epoch": 0.37136547914487494, "grad_norm": NaN, "learning_rate": 0.0004889009879556285, "loss": 0.0, "step": 27160 }, { "epoch": 0.371502211648242, "grad_norm": NaN, "learning_rate": 0.0004888834017257044, "loss": 0.0, "step": 27170 }, { "epoch": 0.371638944151609, "grad_norm": NaN, "learning_rate": 0.0004888658018910426, "loss": 0.0, "step": 27180 }, { "epoch": 0.371775676654976, "grad_norm": NaN, "learning_rate": 0.0004888481884526453, "loss": 0.0, "step": 27190 }, { "epoch": 0.37191240915834306, "grad_norm": NaN, "learning_rate": 0.0004888305614115156, "loss": 0.0, "step": 27200 }, { "epoch": 0.3720491416617101, "grad_norm": NaN, "learning_rate": 0.0004888129207686573, "loss": 0.0, "step": 27210 }, { "epoch": 0.37218587416507715, "grad_norm": NaN, "learning_rate": 0.0004887952665250754, "loss": 0.0, "step": 27220 }, { "epoch": 0.3723226066684442, "grad_norm": NaN, "learning_rate": 0.000488777598681775, "loss": 0.0, "step": 27230 }, { "epoch": 0.37245933917181123, "grad_norm": NaN, "learning_rate": 0.0004887599172397624, "loss": 0.0, "step": 27240 }, { "epoch": 0.3725960716751783, "grad_norm": NaN, "learning_rate": 0.0004887422222000447, "loss": 0.0, "step": 27250 }, { "epoch": 0.3727328041785453, "grad_norm": NaN, "learning_rate": 0.0004887245135636295, "loss": 0.0, "step": 27260 }, { "epoch": 0.37286953668191236, "grad_norm": NaN, "learning_rate": 0.0004887067913315254, "loss": 0.0, "step": 27270 }, { "epoch": 0.3730062691852794, "grad_norm": NaN, "learning_rate": 0.0004886890555047417, "loss": 0.0, "step": 27280 }, { "epoch": 0.3731430016886464, "grad_norm": NaN, "learning_rate": 0.0004886713060842884, "loss": 0.0, "step": 27290 }, { "epoch": 0.37327973419201343, "grad_norm": NaN, "learning_rate": 0.0004886535430711766, "loss": 0.0, "step": 27300 }, { "epoch": 0.3734164666953805, "grad_norm": NaN, "learning_rate": 0.0004886357664664176, "loss": 0.0, "step": 27310 }, { "epoch": 0.3735531991987475, "grad_norm": NaN, "learning_rate": 0.000488617976271024, "loss": 0.0, "step": 27320 }, { "epoch": 0.37368993170211456, "grad_norm": NaN, "learning_rate": 0.0004886001724860089, "loss": 0.0, "step": 27330 }, { "epoch": 0.3738266642054816, "grad_norm": NaN, "learning_rate": 0.0004885823551123863, "loss": 0.0, "step": 27340 }, { "epoch": 0.37396339670884865, "grad_norm": NaN, "learning_rate": 0.000488564524151171, "loss": 0.0, "step": 27350 }, { "epoch": 0.3741001292122157, "grad_norm": NaN, "learning_rate": 0.0004885466796033783, "loss": 0.0, "step": 27360 }, { "epoch": 0.37423686171558274, "grad_norm": NaN, "learning_rate": 0.0004885288214700246, "loss": 0.0, "step": 27370 }, { "epoch": 0.3743735942189498, "grad_norm": NaN, "learning_rate": 0.0004885109497521269, "loss": 0.0, "step": 27380 }, { "epoch": 0.3745103267223168, "grad_norm": NaN, "learning_rate": 0.0004884930644507031, "loss": 0.0, "step": 27390 }, { "epoch": 0.3746470592256838, "grad_norm": NaN, "learning_rate": 0.0004884751655667717, "loss": 0.0, "step": 27400 }, { "epoch": 0.37478379172905085, "grad_norm": NaN, "learning_rate": 0.0004884572531013521, "loss": 0.0, "step": 27410 }, { "epoch": 0.3749205242324179, "grad_norm": NaN, "learning_rate": 0.0004884393270554644, "loss": 0.0, "step": 27420 }, { "epoch": 0.37505725673578494, "grad_norm": NaN, "learning_rate": 0.0004884213874301296, "loss": 0.0, "step": 27430 }, { "epoch": 0.375193989239152, "grad_norm": NaN, "learning_rate": 0.0004884034342263695, "loss": 0.0, "step": 27440 }, { "epoch": 0.375330721742519, "grad_norm": NaN, "learning_rate": 0.0004883854674452062, "loss": 0.0, "step": 27450 }, { "epoch": 0.37546745424588607, "grad_norm": NaN, "learning_rate": 0.0004883674870876631, "loss": 0.0, "step": 27460 }, { "epoch": 0.3756041867492531, "grad_norm": NaN, "learning_rate": 0.0004883494931547643, "loss": 0.0, "step": 27470 }, { "epoch": 0.37574091925262015, "grad_norm": NaN, "learning_rate": 0.0004883314856475346, "loss": 0.0, "step": 27480 }, { "epoch": 0.3758776517559872, "grad_norm": NaN, "learning_rate": 0.0004883134645669993, "loss": 0.0, "step": 27490 }, { "epoch": 0.37601438425935424, "grad_norm": NaN, "learning_rate": 0.0004882954299141851, "loss": 0.0, "step": 27500 }, { "epoch": 0.3761511167627212, "grad_norm": NaN, "learning_rate": 0.0004882773816901187, "loss": 0.0, "step": 27510 }, { "epoch": 0.37628784926608827, "grad_norm": NaN, "learning_rate": 0.00048825931989582825, "loss": 0.0, "step": 27520 }, { "epoch": 0.3764245817694553, "grad_norm": NaN, "learning_rate": 0.00048824124453234234, "loss": 0.0, "step": 27530 }, { "epoch": 0.37656131427282236, "grad_norm": NaN, "learning_rate": 0.0004882231556006903, "loss": 0.0, "step": 27540 }, { "epoch": 0.3766980467761894, "grad_norm": NaN, "learning_rate": 0.00048820505310190234, "loss": 0.0, "step": 27550 }, { "epoch": 0.37683477927955644, "grad_norm": NaN, "learning_rate": 0.00048818693703700955, "loss": 0.0, "step": 27560 }, { "epoch": 0.3769715117829235, "grad_norm": NaN, "learning_rate": 0.0004881688074070435, "loss": 0.0, "step": 27570 }, { "epoch": 0.37710824428629053, "grad_norm": NaN, "learning_rate": 0.00048815066421303675, "loss": 0.0, "step": 27580 }, { "epoch": 0.37724497678965757, "grad_norm": NaN, "learning_rate": 0.0004881325074560227, "loss": 0.0, "step": 27590 }, { "epoch": 0.3773817092930246, "grad_norm": NaN, "learning_rate": 0.00048811433713703524, "loss": 0.0, "step": 27600 }, { "epoch": 0.3775184417963916, "grad_norm": NaN, "learning_rate": 0.0004880961532571092, "loss": 0.0, "step": 27610 }, { "epoch": 0.37765517429975864, "grad_norm": NaN, "learning_rate": 0.0004880779558172803, "loss": 0.0, "step": 27620 }, { "epoch": 0.3777919068031257, "grad_norm": NaN, "learning_rate": 0.00048805974481858484, "loss": 0.0, "step": 27630 }, { "epoch": 0.37792863930649273, "grad_norm": NaN, "learning_rate": 0.0004880415202620599, "loss": 0.0, "step": 27640 }, { "epoch": 0.3780653718098598, "grad_norm": NaN, "learning_rate": 0.0004880232821487435, "loss": 0.0, "step": 27650 }, { "epoch": 0.3782021043132268, "grad_norm": NaN, "learning_rate": 0.0004880050304796743, "loss": 0.0, "step": 27660 }, { "epoch": 0.37833883681659386, "grad_norm": NaN, "learning_rate": 0.00048798676525589174, "loss": 0.0, "step": 27670 }, { "epoch": 0.3784755693199609, "grad_norm": NaN, "learning_rate": 0.00048796848647843607, "loss": 0.0, "step": 27680 }, { "epoch": 0.37861230182332795, "grad_norm": NaN, "learning_rate": 0.0004879501941483482, "loss": 0.0, "step": 27690 }, { "epoch": 0.378749034326695, "grad_norm": NaN, "learning_rate": 0.00048793188826667, "loss": 0.0, "step": 27700 }, { "epoch": 0.37888576683006203, "grad_norm": NaN, "learning_rate": 0.000487913568834444, "loss": 0.0, "step": 27710 }, { "epoch": 0.379022499333429, "grad_norm": NaN, "learning_rate": 0.00048789523585271346, "loss": 0.0, "step": 27720 }, { "epoch": 0.37915923183679606, "grad_norm": NaN, "learning_rate": 0.00048787688932252257, "loss": 0.0, "step": 27730 }, { "epoch": 0.3792959643401631, "grad_norm": NaN, "learning_rate": 0.00048785852924491615, "loss": 0.0, "step": 27740 }, { "epoch": 0.37943269684353015, "grad_norm": NaN, "learning_rate": 0.00048784015562093976, "loss": 0.0, "step": 27750 }, { "epoch": 0.3795694293468972, "grad_norm": NaN, "learning_rate": 0.0004878217684516398, "loss": 0.0, "step": 27760 }, { "epoch": 0.37970616185026423, "grad_norm": NaN, "learning_rate": 0.00048780336773806367, "loss": 0.0, "step": 27770 }, { "epoch": 0.3798428943536313, "grad_norm": NaN, "learning_rate": 0.00048778495348125907, "loss": 0.0, "step": 27780 }, { "epoch": 0.3799796268569983, "grad_norm": NaN, "learning_rate": 0.0004877665256822748, "loss": 0.0, "step": 27790 }, { "epoch": 0.38011635936036536, "grad_norm": NaN, "learning_rate": 0.00048774808434216024, "loss": 0.0, "step": 27800 }, { "epoch": 0.3802530918637324, "grad_norm": NaN, "learning_rate": 0.00048772962946196585, "loss": 0.0, "step": 27810 }, { "epoch": 0.38038982436709945, "grad_norm": NaN, "learning_rate": 0.0004877111610427426, "loss": 0.0, "step": 27820 }, { "epoch": 0.38052655687046644, "grad_norm": NaN, "learning_rate": 0.00048769267908554216, "loss": 0.0, "step": 27830 }, { "epoch": 0.3806632893738335, "grad_norm": NaN, "learning_rate": 0.0004876741835914173, "loss": 0.0, "step": 27840 }, { "epoch": 0.3808000218772005, "grad_norm": NaN, "learning_rate": 0.00048765567456142124, "loss": 0.0, "step": 27850 }, { "epoch": 0.38093675438056757, "grad_norm": NaN, "learning_rate": 0.0004876371519966081, "loss": 0.0, "step": 27860 }, { "epoch": 0.3810734868839346, "grad_norm": NaN, "learning_rate": 0.00048761861589803276, "loss": 0.0, "step": 27870 }, { "epoch": 0.38121021938730165, "grad_norm": NaN, "learning_rate": 0.0004876000662667509, "loss": 0.0, "step": 27880 }, { "epoch": 0.3813469518906687, "grad_norm": NaN, "learning_rate": 0.000487581503103819, "loss": 0.0, "step": 27890 }, { "epoch": 0.38148368439403574, "grad_norm": NaN, "learning_rate": 0.00048756292641029416, "loss": 0.0, "step": 27900 }, { "epoch": 0.3816204168974028, "grad_norm": NaN, "learning_rate": 0.0004875443361872344, "loss": 0.0, "step": 27910 }, { "epoch": 0.3817571494007698, "grad_norm": NaN, "learning_rate": 0.0004875257324356984, "loss": 0.0, "step": 27920 }, { "epoch": 0.38189388190413687, "grad_norm": NaN, "learning_rate": 0.0004875071151567458, "loss": 0.0, "step": 27930 }, { "epoch": 0.38203061440750385, "grad_norm": NaN, "learning_rate": 0.0004874884843514368, "loss": 0.0, "step": 27940 }, { "epoch": 0.3821673469108709, "grad_norm": NaN, "learning_rate": 0.0004874698400208324, "loss": 0.0, "step": 27950 }, { "epoch": 0.38230407941423794, "grad_norm": NaN, "learning_rate": 0.0004874511821659945, "loss": 0.0, "step": 27960 }, { "epoch": 0.382440811917605, "grad_norm": NaN, "learning_rate": 0.0004874325107879857, "loss": 0.0, "step": 27970 }, { "epoch": 0.382577544420972, "grad_norm": NaN, "learning_rate": 0.00048741382588786923, "loss": 0.0, "step": 27980 }, { "epoch": 0.38271427692433907, "grad_norm": NaN, "learning_rate": 0.0004873951274667094, "loss": 0.0, "step": 27990 }, { "epoch": 0.3828510094277061, "grad_norm": NaN, "learning_rate": 0.000487376415525571, "loss": 0.0, "step": 28000 }, { "epoch": 0.38298774193107316, "grad_norm": NaN, "learning_rate": 0.00048735769006551965, "loss": 0.0, "step": 28010 }, { "epoch": 0.3831244744344402, "grad_norm": NaN, "learning_rate": 0.00048733895108762194, "loss": 0.0, "step": 28020 }, { "epoch": 0.38326120693780724, "grad_norm": NaN, "learning_rate": 0.000487320198592945, "loss": 0.0, "step": 28030 }, { "epoch": 0.3833979394411743, "grad_norm": NaN, "learning_rate": 0.0004873014325825568, "loss": 0.0, "step": 28040 }, { "epoch": 0.38353467194454127, "grad_norm": NaN, "learning_rate": 0.0004872826530575261, "loss": 0.0, "step": 28050 }, { "epoch": 0.3836714044479083, "grad_norm": NaN, "learning_rate": 0.00048726386001892244, "loss": 0.0, "step": 28060 }, { "epoch": 0.38380813695127536, "grad_norm": NaN, "learning_rate": 0.0004872450534678161, "loss": 0.0, "step": 28070 }, { "epoch": 0.3839448694546424, "grad_norm": NaN, "learning_rate": 0.0004872262334052782, "loss": 0.0, "step": 28080 }, { "epoch": 0.38408160195800944, "grad_norm": NaN, "learning_rate": 0.0004872073998323804, "loss": 0.0, "step": 28090 }, { "epoch": 0.3842183344613765, "grad_norm": NaN, "learning_rate": 0.00048718855275019533, "loss": 0.0, "step": 28100 }, { "epoch": 0.38435506696474353, "grad_norm": NaN, "learning_rate": 0.00048716969215979657, "loss": 0.0, "step": 28110 }, { "epoch": 0.3844917994681106, "grad_norm": NaN, "learning_rate": 0.000487150818062258, "loss": 0.0, "step": 28120 }, { "epoch": 0.3846285319714776, "grad_norm": NaN, "learning_rate": 0.0004871319304586548, "loss": 0.0, "step": 28130 }, { "epoch": 0.38476526447484466, "grad_norm": NaN, "learning_rate": 0.0004871130293500624, "loss": 0.0, "step": 28140 }, { "epoch": 0.3849019969782117, "grad_norm": NaN, "learning_rate": 0.0004870941147375573, "loss": 0.0, "step": 28150 }, { "epoch": 0.3850387294815787, "grad_norm": NaN, "learning_rate": 0.0004870751866222167, "loss": 0.0, "step": 28160 }, { "epoch": 0.38517546198494573, "grad_norm": NaN, "learning_rate": 0.0004870562450051187, "loss": 0.0, "step": 28170 }, { "epoch": 0.3853121944883128, "grad_norm": NaN, "learning_rate": 0.00048703728988734195, "loss": 0.0, "step": 28180 }, { "epoch": 0.3854489269916798, "grad_norm": NaN, "learning_rate": 0.000487018321269966, "loss": 0.0, "step": 28190 }, { "epoch": 0.38558565949504686, "grad_norm": NaN, "learning_rate": 0.00048699933915407115, "loss": 0.0, "step": 28200 }, { "epoch": 0.3857223919984139, "grad_norm": NaN, "learning_rate": 0.00048698034354073837, "loss": 0.0, "step": 28210 }, { "epoch": 0.38585912450178095, "grad_norm": NaN, "learning_rate": 0.00048696133443104964, "loss": 0.0, "step": 28220 }, { "epoch": 0.385995857005148, "grad_norm": NaN, "learning_rate": 0.00048694231182608744, "loss": 0.0, "step": 28230 }, { "epoch": 0.38613258950851503, "grad_norm": NaN, "learning_rate": 0.00048692327572693515, "loss": 0.0, "step": 28240 }, { "epoch": 0.3862693220118821, "grad_norm": NaN, "learning_rate": 0.000486904226134677, "loss": 0.0, "step": 28250 }, { "epoch": 0.3864060545152491, "grad_norm": NaN, "learning_rate": 0.0004868851630503977, "loss": 0.0, "step": 28260 }, { "epoch": 0.3865427870186161, "grad_norm": NaN, "learning_rate": 0.00048686608647518305, "loss": 0.0, "step": 28270 }, { "epoch": 0.38667951952198315, "grad_norm": NaN, "learning_rate": 0.00048684699641011954, "loss": 0.0, "step": 28280 }, { "epoch": 0.3868162520253502, "grad_norm": NaN, "learning_rate": 0.0004868278928562943, "loss": 0.0, "step": 28290 }, { "epoch": 0.38695298452871724, "grad_norm": NaN, "learning_rate": 0.00048680877581479514, "loss": 0.0, "step": 28300 }, { "epoch": 0.3870897170320843, "grad_norm": NaN, "learning_rate": 0.0004867896452867111, "loss": 0.0, "step": 28310 }, { "epoch": 0.3872264495354513, "grad_norm": NaN, "learning_rate": 0.0004867705012731315, "loss": 0.0, "step": 28320 }, { "epoch": 0.38736318203881837, "grad_norm": NaN, "learning_rate": 0.00048675134377514675, "loss": 0.0, "step": 28330 }, { "epoch": 0.3874999145421854, "grad_norm": NaN, "learning_rate": 0.0004867321727938477, "loss": 0.0, "step": 28340 }, { "epoch": 0.38763664704555245, "grad_norm": NaN, "learning_rate": 0.0004867129883303264, "loss": 0.0, "step": 28350 }, { "epoch": 0.3877733795489195, "grad_norm": NaN, "learning_rate": 0.00048669379038567527, "loss": 0.0, "step": 28360 }, { "epoch": 0.3879101120522865, "grad_norm": NaN, "learning_rate": 0.00048667457896098764, "loss": 0.0, "step": 28370 }, { "epoch": 0.3880468445556535, "grad_norm": NaN, "learning_rate": 0.0004866553540573578, "loss": 0.0, "step": 28380 }, { "epoch": 0.38818357705902057, "grad_norm": NaN, "learning_rate": 0.0004866361156758805, "loss": 0.0, "step": 28390 }, { "epoch": 0.3883203095623876, "grad_norm": NaN, "learning_rate": 0.00048661686381765135, "loss": 0.0, "step": 28400 }, { "epoch": 0.38845704206575465, "grad_norm": NaN, "learning_rate": 0.0004865975984837669, "loss": 0.0, "step": 28410 }, { "epoch": 0.3885937745691217, "grad_norm": NaN, "learning_rate": 0.00048657831967532423, "loss": 0.0, "step": 28420 }, { "epoch": 0.38873050707248874, "grad_norm": NaN, "learning_rate": 0.00048655902739342137, "loss": 0.0, "step": 28430 }, { "epoch": 0.3888672395758558, "grad_norm": NaN, "learning_rate": 0.000486539721639157, "loss": 0.0, "step": 28440 }, { "epoch": 0.3890039720792228, "grad_norm": NaN, "learning_rate": 0.0004865204024136306, "loss": 0.0, "step": 28450 }, { "epoch": 0.38914070458258987, "grad_norm": NaN, "learning_rate": 0.0004865010697179425, "loss": 0.0, "step": 28460 }, { "epoch": 0.3892774370859569, "grad_norm": NaN, "learning_rate": 0.00048648172355319363, "loss": 0.0, "step": 28470 }, { "epoch": 0.3894141695893239, "grad_norm": NaN, "learning_rate": 0.0004864623639204858, "loss": 0.0, "step": 28480 }, { "epoch": 0.38955090209269094, "grad_norm": NaN, "learning_rate": 0.0004864429908209216, "loss": 0.0, "step": 28490 }, { "epoch": 0.389687634596058, "grad_norm": NaN, "learning_rate": 0.0004864236042556044, "loss": 0.0, "step": 28500 }, { "epoch": 0.38982436709942503, "grad_norm": NaN, "learning_rate": 0.00048640420422563815, "loss": 0.0, "step": 28510 }, { "epoch": 0.38996109960279207, "grad_norm": NaN, "learning_rate": 0.0004863847907321278, "loss": 0.0, "step": 28520 }, { "epoch": 0.3900978321061591, "grad_norm": NaN, "learning_rate": 0.000486365363776179, "loss": 0.0, "step": 28530 }, { "epoch": 0.39023456460952616, "grad_norm": NaN, "learning_rate": 0.00048634592335889814, "loss": 0.0, "step": 28540 }, { "epoch": 0.3903712971128932, "grad_norm": NaN, "learning_rate": 0.0004863264694813924, "loss": 0.0, "step": 28550 }, { "epoch": 0.39050802961626024, "grad_norm": NaN, "learning_rate": 0.0004863070021447695, "loss": 0.0, "step": 28560 }, { "epoch": 0.3906447621196273, "grad_norm": NaN, "learning_rate": 0.0004862875213501383, "loss": 0.0, "step": 28570 }, { "epoch": 0.39078149462299433, "grad_norm": NaN, "learning_rate": 0.00048626802709860834, "loss": 0.0, "step": 28580 }, { "epoch": 0.3909182271263613, "grad_norm": NaN, "learning_rate": 0.0004862485193912897, "loss": 0.0, "step": 28590 }, { "epoch": 0.39105495962972836, "grad_norm": NaN, "learning_rate": 0.00048622899822929345, "loss": 0.0, "step": 28600 }, { "epoch": 0.3911916921330954, "grad_norm": NaN, "learning_rate": 0.0004862094636137313, "loss": 0.0, "step": 28610 }, { "epoch": 0.39132842463646245, "grad_norm": NaN, "learning_rate": 0.00048618991554571573, "loss": 0.0, "step": 28620 }, { "epoch": 0.3914651571398295, "grad_norm": NaN, "learning_rate": 0.0004861703540263601, "loss": 0.0, "step": 28630 }, { "epoch": 0.39160188964319653, "grad_norm": NaN, "learning_rate": 0.00048615077905677853, "loss": 0.0, "step": 28640 }, { "epoch": 0.3917386221465636, "grad_norm": NaN, "learning_rate": 0.0004861311906380858, "loss": 0.0, "step": 28650 }, { "epoch": 0.3918753546499306, "grad_norm": NaN, "learning_rate": 0.00048611158877139726, "loss": 0.0, "step": 28660 }, { "epoch": 0.39201208715329766, "grad_norm": NaN, "learning_rate": 0.0004860919734578296, "loss": 0.0, "step": 28670 }, { "epoch": 0.3921488196566647, "grad_norm": NaN, "learning_rate": 0.0004860723446984998, "loss": 0.0, "step": 28680 }, { "epoch": 0.39228555216003175, "grad_norm": NaN, "learning_rate": 0.0004860527024945257, "loss": 0.0, "step": 28690 }, { "epoch": 0.39242228466339873, "grad_norm": NaN, "learning_rate": 0.00048603304684702607, "loss": 0.0, "step": 28700 }, { "epoch": 0.3925590171667658, "grad_norm": NaN, "learning_rate": 0.0004860133777571202, "loss": 0.0, "step": 28710 }, { "epoch": 0.3926957496701328, "grad_norm": NaN, "learning_rate": 0.0004859936952259284, "loss": 0.0, "step": 28720 }, { "epoch": 0.39283248217349986, "grad_norm": NaN, "learning_rate": 0.0004859739992545714, "loss": 0.0, "step": 28730 }, { "epoch": 0.3929692146768669, "grad_norm": NaN, "learning_rate": 0.0004859542898441712, "loss": 0.0, "step": 28740 }, { "epoch": 0.39310594718023395, "grad_norm": NaN, "learning_rate": 0.0004859345669958501, "loss": 0.0, "step": 28750 }, { "epoch": 0.393242679683601, "grad_norm": NaN, "learning_rate": 0.0004859148307107313, "loss": 0.0, "step": 28760 }, { "epoch": 0.39337941218696804, "grad_norm": NaN, "learning_rate": 0.00048589508098993895, "loss": 0.0, "step": 28770 }, { "epoch": 0.3935161446903351, "grad_norm": NaN, "learning_rate": 0.0004858753178345978, "loss": 0.0, "step": 28780 }, { "epoch": 0.3936528771937021, "grad_norm": NaN, "learning_rate": 0.0004858555412458333, "loss": 0.0, "step": 28790 }, { "epoch": 0.39378960969706917, "grad_norm": NaN, "learning_rate": 0.0004858357512247718, "loss": 0.0, "step": 28800 }, { "epoch": 0.39392634220043615, "grad_norm": NaN, "learning_rate": 0.00048581594777254036, "loss": 0.0, "step": 28810 }, { "epoch": 0.3940630747038032, "grad_norm": NaN, "learning_rate": 0.00048579613089026687, "loss": 0.0, "step": 28820 }, { "epoch": 0.39419980720717024, "grad_norm": NaN, "learning_rate": 0.00048577630057907985, "loss": 0.0, "step": 28830 }, { "epoch": 0.3943365397105373, "grad_norm": NaN, "learning_rate": 0.0004857564568401087, "loss": 0.0, "step": 28840 }, { "epoch": 0.3944732722139043, "grad_norm": NaN, "learning_rate": 0.0004857365996744836, "loss": 0.0, "step": 28850 }, { "epoch": 0.39461000471727137, "grad_norm": NaN, "learning_rate": 0.0004857167290833353, "loss": 0.0, "step": 28860 }, { "epoch": 0.3947467372206384, "grad_norm": NaN, "learning_rate": 0.00048569684506779556, "loss": 0.0, "step": 28870 }, { "epoch": 0.39488346972400545, "grad_norm": NaN, "learning_rate": 0.00048567694762899685, "loss": 0.0, "step": 28880 }, { "epoch": 0.3950202022273725, "grad_norm": NaN, "learning_rate": 0.00048565703676807227, "loss": 0.0, "step": 28890 }, { "epoch": 0.39515693473073954, "grad_norm": NaN, "learning_rate": 0.0004856371124861557, "loss": 0.0, "step": 28900 }, { "epoch": 0.3952936672341066, "grad_norm": NaN, "learning_rate": 0.00048561717478438205, "loss": 0.0, "step": 28910 }, { "epoch": 0.39543039973747357, "grad_norm": NaN, "learning_rate": 0.0004855972236638867, "loss": 0.0, "step": 28920 }, { "epoch": 0.3955671322408406, "grad_norm": NaN, "learning_rate": 0.00048557725912580594, "loss": 0.0, "step": 28930 }, { "epoch": 0.39570386474420766, "grad_norm": NaN, "learning_rate": 0.0004855572811712766, "loss": 0.0, "step": 28940 }, { "epoch": 0.3958405972475747, "grad_norm": NaN, "learning_rate": 0.00048553728980143667, "loss": 0.0, "step": 28950 }, { "epoch": 0.39597732975094174, "grad_norm": NaN, "learning_rate": 0.00048551728501742457, "loss": 0.0, "step": 28960 }, { "epoch": 0.3961140622543088, "grad_norm": NaN, "learning_rate": 0.00048549726682037964, "loss": 0.0, "step": 28970 }, { "epoch": 0.39625079475767583, "grad_norm": NaN, "learning_rate": 0.00048547723521144196, "loss": 0.0, "step": 28980 }, { "epoch": 0.39638752726104287, "grad_norm": NaN, "learning_rate": 0.00048545719019175233, "loss": 0.0, "step": 28990 }, { "epoch": 0.3965242597644099, "grad_norm": NaN, "learning_rate": 0.0004854371317624523, "loss": 0.0, "step": 29000 }, { "epoch": 0.39666099226777696, "grad_norm": NaN, "learning_rate": 0.00048541705992468433, "loss": 0.0, "step": 29010 }, { "epoch": 0.39679772477114394, "grad_norm": NaN, "learning_rate": 0.0004853969746795914, "loss": 0.0, "step": 29020 }, { "epoch": 0.396934457274511, "grad_norm": NaN, "learning_rate": 0.00048537687602831747, "loss": 0.0, "step": 29030 }, { "epoch": 0.39707118977787803, "grad_norm": NaN, "learning_rate": 0.00048535676397200723, "loss": 0.0, "step": 29040 }, { "epoch": 0.3972079222812451, "grad_norm": NaN, "learning_rate": 0.00048533663851180604, "loss": 0.0, "step": 29050 }, { "epoch": 0.3973446547846121, "grad_norm": NaN, "learning_rate": 0.00048531649964886006, "loss": 0.0, "step": 29060 }, { "epoch": 0.39748138728797916, "grad_norm": NaN, "learning_rate": 0.00048529634738431627, "loss": 0.0, "step": 29070 }, { "epoch": 0.3976181197913462, "grad_norm": NaN, "learning_rate": 0.0004852761817193223, "loss": 0.0, "step": 29080 }, { "epoch": 0.39775485229471325, "grad_norm": NaN, "learning_rate": 0.0004852560026550267, "loss": 0.0, "step": 29090 }, { "epoch": 0.3978915847980803, "grad_norm": NaN, "learning_rate": 0.00048523581019257854, "loss": 0.0, "step": 29100 }, { "epoch": 0.39802831730144733, "grad_norm": NaN, "learning_rate": 0.0004852156043331281, "loss": 0.0, "step": 29110 }, { "epoch": 0.3981650498048144, "grad_norm": NaN, "learning_rate": 0.0004851953850778258, "loss": 0.0, "step": 29120 }, { "epoch": 0.39830178230818136, "grad_norm": NaN, "learning_rate": 0.00048517515242782336, "loss": 0.0, "step": 29130 }, { "epoch": 0.3984385148115484, "grad_norm": NaN, "learning_rate": 0.00048515490638427293, "loss": 0.0, "step": 29140 }, { "epoch": 0.39857524731491545, "grad_norm": NaN, "learning_rate": 0.0004851346469483278, "loss": 0.0, "step": 29150 }, { "epoch": 0.3987119798182825, "grad_norm": NaN, "learning_rate": 0.0004851143741211414, "loss": 0.0, "step": 29160 }, { "epoch": 0.39884871232164953, "grad_norm": NaN, "learning_rate": 0.0004850940879038686, "loss": 0.0, "step": 29170 }, { "epoch": 0.3989854448250166, "grad_norm": NaN, "learning_rate": 0.0004850737882976646, "loss": 0.0, "step": 29180 }, { "epoch": 0.3991221773283836, "grad_norm": NaN, "learning_rate": 0.00048505347530368545, "loss": 0.0, "step": 29190 }, { "epoch": 0.39925890983175066, "grad_norm": NaN, "learning_rate": 0.0004850331489230881, "loss": 0.0, "step": 29200 }, { "epoch": 0.3993956423351177, "grad_norm": NaN, "learning_rate": 0.0004850128091570301, "loss": 0.0, "step": 29210 }, { "epoch": 0.39953237483848475, "grad_norm": NaN, "learning_rate": 0.0004849924560066699, "loss": 0.0, "step": 29220 }, { "epoch": 0.3996691073418518, "grad_norm": NaN, "learning_rate": 0.0004849720894731665, "loss": 0.0, "step": 29230 }, { "epoch": 0.3998058398452188, "grad_norm": NaN, "learning_rate": 0.00048495170955768, "loss": 0.0, "step": 29240 }, { "epoch": 0.3999425723485858, "grad_norm": NaN, "learning_rate": 0.0004849313162613709, "loss": 0.0, "step": 29250 }, { "epoch": 0.40007930485195287, "grad_norm": NaN, "learning_rate": 0.0004849109095854007, "loss": 0.0, "step": 29260 }, { "epoch": 0.4002160373553199, "grad_norm": NaN, "learning_rate": 0.0004848904895309316, "loss": 0.0, "step": 29270 }, { "epoch": 0.40035276985868695, "grad_norm": NaN, "learning_rate": 0.00048487005609912647, "loss": 0.0, "step": 29280 }, { "epoch": 0.400489502362054, "grad_norm": NaN, "learning_rate": 0.00048484960929114914, "loss": 0.0, "step": 29290 }, { "epoch": 0.40062623486542104, "grad_norm": NaN, "learning_rate": 0.000484829149108164, "loss": 0.0, "step": 29300 }, { "epoch": 0.4007629673687881, "grad_norm": NaN, "learning_rate": 0.0004848086755513362, "loss": 0.0, "step": 29310 }, { "epoch": 0.4008996998721551, "grad_norm": NaN, "learning_rate": 0.000484788188621832, "loss": 0.0, "step": 29320 }, { "epoch": 0.40103643237552217, "grad_norm": NaN, "learning_rate": 0.000484767688320818, "loss": 0.0, "step": 29330 }, { "epoch": 0.4011731648788892, "grad_norm": NaN, "learning_rate": 0.0004847471746494616, "loss": 0.0, "step": 29340 }, { "epoch": 0.4013098973822562, "grad_norm": NaN, "learning_rate": 0.00048472664760893126, "loss": 0.0, "step": 29350 }, { "epoch": 0.40144662988562324, "grad_norm": NaN, "learning_rate": 0.0004847061072003959, "loss": 0.0, "step": 29360 }, { "epoch": 0.4015833623889903, "grad_norm": NaN, "learning_rate": 0.00048468555342502553, "loss": 0.0, "step": 29370 }, { "epoch": 0.4017200948923573, "grad_norm": NaN, "learning_rate": 0.00048466498628399045, "loss": 0.0, "step": 29380 }, { "epoch": 0.40185682739572437, "grad_norm": NaN, "learning_rate": 0.00048464440577846216, "loss": 0.0, "step": 29390 }, { "epoch": 0.4019935598990914, "grad_norm": NaN, "learning_rate": 0.0004846238119096127, "loss": 0.0, "step": 29400 }, { "epoch": 0.40213029240245846, "grad_norm": NaN, "learning_rate": 0.00048460320467861494, "loss": 0.0, "step": 29410 }, { "epoch": 0.4022670249058255, "grad_norm": NaN, "learning_rate": 0.00048458258408664245, "loss": 0.0, "step": 29420 }, { "epoch": 0.40240375740919254, "grad_norm": NaN, "learning_rate": 0.0004845619501348696, "loss": 0.0, "step": 29430 }, { "epoch": 0.4025404899125596, "grad_norm": NaN, "learning_rate": 0.0004845413028244716, "loss": 0.0, "step": 29440 }, { "epoch": 0.40267722241592663, "grad_norm": NaN, "learning_rate": 0.0004845206421566242, "loss": 0.0, "step": 29450 }, { "epoch": 0.4028139549192936, "grad_norm": NaN, "learning_rate": 0.0004844999681325042, "loss": 0.0, "step": 29460 }, { "epoch": 0.40295068742266066, "grad_norm": NaN, "learning_rate": 0.00048447928075328895, "loss": 0.0, "step": 29470 }, { "epoch": 0.4030874199260277, "grad_norm": NaN, "learning_rate": 0.00048445858002015664, "loss": 0.0, "step": 29480 }, { "epoch": 0.40322415242939474, "grad_norm": NaN, "learning_rate": 0.0004844378659342862, "loss": 0.0, "step": 29490 }, { "epoch": 0.4033608849327618, "grad_norm": NaN, "learning_rate": 0.00048441713849685733, "loss": 0.0, "step": 29500 }, { "epoch": 0.40349761743612883, "grad_norm": NaN, "learning_rate": 0.00048439639770905043, "loss": 0.0, "step": 29510 }, { "epoch": 0.4036343499394959, "grad_norm": NaN, "learning_rate": 0.0004843756435720467, "loss": 0.0, "step": 29520 }, { "epoch": 0.4037710824428629, "grad_norm": NaN, "learning_rate": 0.00048435487608702827, "loss": 0.0, "step": 29530 }, { "epoch": 0.40390781494622996, "grad_norm": NaN, "learning_rate": 0.0004843340952551778, "loss": 0.0, "step": 29540 }, { "epoch": 0.404044547449597, "grad_norm": NaN, "learning_rate": 0.0004843133010776787, "loss": 0.0, "step": 29550 }, { "epoch": 0.40418127995296405, "grad_norm": NaN, "learning_rate": 0.0004842924935557154, "loss": 0.0, "step": 29560 }, { "epoch": 0.40431801245633103, "grad_norm": NaN, "learning_rate": 0.0004842716726904727, "loss": 0.0, "step": 29570 }, { "epoch": 0.4044547449596981, "grad_norm": NaN, "learning_rate": 0.0004842508384831366, "loss": 0.0, "step": 29580 }, { "epoch": 0.4045914774630651, "grad_norm": NaN, "learning_rate": 0.0004842299909348934, "loss": 0.0, "step": 29590 }, { "epoch": 0.40472820996643216, "grad_norm": NaN, "learning_rate": 0.00048420913004693057, "loss": 0.0, "step": 29600 }, { "epoch": 0.4048649424697992, "grad_norm": NaN, "learning_rate": 0.0004841882558204361, "loss": 0.0, "step": 29610 }, { "epoch": 0.40500167497316625, "grad_norm": NaN, "learning_rate": 0.0004841673682565989, "loss": 0.0, "step": 29620 }, { "epoch": 0.4051384074765333, "grad_norm": NaN, "learning_rate": 0.0004841464673566085, "loss": 0.0, "step": 29630 }, { "epoch": 0.40527513997990033, "grad_norm": NaN, "learning_rate": 0.0004841255531216551, "loss": 0.0, "step": 29640 }, { "epoch": 0.4054118724832674, "grad_norm": NaN, "learning_rate": 0.00048410462555293, "loss": 0.0, "step": 29650 }, { "epoch": 0.4055486049866344, "grad_norm": NaN, "learning_rate": 0.0004840836846516249, "loss": 0.0, "step": 29660 }, { "epoch": 0.40568533749000146, "grad_norm": NaN, "learning_rate": 0.0004840627304189324, "loss": 0.0, "step": 29670 }, { "epoch": 0.40582206999336845, "grad_norm": NaN, "learning_rate": 0.0004840417628560461, "loss": 0.0, "step": 29680 }, { "epoch": 0.4059588024967355, "grad_norm": NaN, "learning_rate": 0.00048402078196415986, "loss": 0.0, "step": 29690 }, { "epoch": 0.40609553500010254, "grad_norm": NaN, "learning_rate": 0.0004839997877444687, "loss": 0.0, "step": 29700 }, { "epoch": 0.4062322675034696, "grad_norm": NaN, "learning_rate": 0.0004839787801981683, "loss": 0.0, "step": 29710 }, { "epoch": 0.4063690000068366, "grad_norm": NaN, "learning_rate": 0.00048395775932645504, "loss": 0.0, "step": 29720 }, { "epoch": 0.40650573251020367, "grad_norm": NaN, "learning_rate": 0.000483936725130526, "loss": 0.0, "step": 29730 }, { "epoch": 0.4066424650135707, "grad_norm": NaN, "learning_rate": 0.0004839156776115792, "loss": 0.0, "step": 29740 }, { "epoch": 0.40677919751693775, "grad_norm": NaN, "learning_rate": 0.00048389461677081336, "loss": 0.0, "step": 29750 }, { "epoch": 0.4069159300203048, "grad_norm": NaN, "learning_rate": 0.0004838735426094278, "loss": 0.0, "step": 29760 }, { "epoch": 0.40705266252367184, "grad_norm": NaN, "learning_rate": 0.0004838524551286227, "loss": 0.0, "step": 29770 }, { "epoch": 0.4071893950270388, "grad_norm": NaN, "learning_rate": 0.0004838313543295993, "loss": 0.0, "step": 29780 }, { "epoch": 0.40732612753040587, "grad_norm": NaN, "learning_rate": 0.000483810240213559, "loss": 0.0, "step": 29790 }, { "epoch": 0.4074628600337729, "grad_norm": NaN, "learning_rate": 0.0004837891127817045, "loss": 0.0, "step": 29800 }, { "epoch": 0.40759959253713995, "grad_norm": NaN, "learning_rate": 0.00048376797203523883, "loss": 0.0, "step": 29810 }, { "epoch": 0.407736325040507, "grad_norm": NaN, "learning_rate": 0.0004837468179753661, "loss": 0.0, "step": 29820 }, { "epoch": 0.40787305754387404, "grad_norm": NaN, "learning_rate": 0.0004837256506032911, "loss": 0.0, "step": 29830 }, { "epoch": 0.4080097900472411, "grad_norm": NaN, "learning_rate": 0.00048370446992021933, "loss": 0.0, "step": 29840 }, { "epoch": 0.4081465225506081, "grad_norm": NaN, "learning_rate": 0.000483683275927357, "loss": 0.0, "step": 29850 }, { "epoch": 0.40828325505397517, "grad_norm": NaN, "learning_rate": 0.0004836620686259112, "loss": 0.0, "step": 29860 }, { "epoch": 0.4084199875573422, "grad_norm": NaN, "learning_rate": 0.0004836408480170896, "loss": 0.0, "step": 29870 }, { "epoch": 0.40855672006070926, "grad_norm": NaN, "learning_rate": 0.00048361961410210084, "loss": 0.0, "step": 29880 }, { "epoch": 0.40869345256407624, "grad_norm": NaN, "learning_rate": 0.0004835983668821543, "loss": 0.0, "step": 29890 }, { "epoch": 0.4088301850674433, "grad_norm": NaN, "learning_rate": 0.00048357710635845986, "loss": 0.0, "step": 29900 }, { "epoch": 0.40896691757081033, "grad_norm": NaN, "learning_rate": 0.0004835558325322285, "loss": 0.0, "step": 29910 }, { "epoch": 0.40910365007417737, "grad_norm": NaN, "learning_rate": 0.0004835345454046716, "loss": 0.0, "step": 29920 }, { "epoch": 0.4092403825775444, "grad_norm": NaN, "learning_rate": 0.00048351324497700166, "loss": 0.0, "step": 29930 }, { "epoch": 0.40937711508091146, "grad_norm": NaN, "learning_rate": 0.0004834919312504317, "loss": 0.0, "step": 29940 }, { "epoch": 0.4095138475842785, "grad_norm": NaN, "learning_rate": 0.0004834706042261756, "loss": 0.0, "step": 29950 }, { "epoch": 0.40965058008764554, "grad_norm": NaN, "learning_rate": 0.00048344926390544795, "loss": 0.0, "step": 29960 }, { "epoch": 0.4097873125910126, "grad_norm": NaN, "learning_rate": 0.00048342791028946406, "loss": 0.0, "step": 29970 }, { "epoch": 0.40992404509437963, "grad_norm": NaN, "learning_rate": 0.0004834065433794401, "loss": 0.0, "step": 29980 }, { "epoch": 0.4100607775977467, "grad_norm": NaN, "learning_rate": 0.000483385163176593, "loss": 0.0, "step": 29990 }, { "epoch": 0.41019751010111366, "grad_norm": NaN, "learning_rate": 0.00048336376968214023, "loss": 0.0, "step": 30000 }, { "epoch": 0.4103342426044807, "grad_norm": NaN, "learning_rate": 0.00048334236289730026, "loss": 0.0, "step": 30010 }, { "epoch": 0.41047097510784775, "grad_norm": NaN, "learning_rate": 0.00048332094282329234, "loss": 0.0, "step": 30020 }, { "epoch": 0.4106077076112148, "grad_norm": NaN, "learning_rate": 0.00048329950946133626, "loss": 0.0, "step": 30030 }, { "epoch": 0.41074444011458183, "grad_norm": NaN, "learning_rate": 0.0004832780628126527, "loss": 0.0, "step": 30040 }, { "epoch": 0.4108811726179489, "grad_norm": NaN, "learning_rate": 0.0004832566028784631, "loss": 0.0, "step": 30050 }, { "epoch": 0.4110179051213159, "grad_norm": NaN, "learning_rate": 0.00048323512965998955, "loss": 0.0, "step": 30060 }, { "epoch": 0.41115463762468296, "grad_norm": NaN, "learning_rate": 0.00048321364315845507, "loss": 0.0, "step": 30070 }, { "epoch": 0.41129137012805, "grad_norm": NaN, "learning_rate": 0.0004831921433750833, "loss": 0.0, "step": 30080 }, { "epoch": 0.41142810263141705, "grad_norm": NaN, "learning_rate": 0.0004831706303110988, "loss": 0.0, "step": 30090 }, { "epoch": 0.4115648351347841, "grad_norm": NaN, "learning_rate": 0.0004831491039677266, "loss": 0.0, "step": 30100 }, { "epoch": 0.4117015676381511, "grad_norm": NaN, "learning_rate": 0.00048312756434619263, "loss": 0.0, "step": 30110 }, { "epoch": 0.4118383001415181, "grad_norm": NaN, "learning_rate": 0.0004831060114477238, "loss": 0.0, "step": 30120 }, { "epoch": 0.41197503264488516, "grad_norm": NaN, "learning_rate": 0.00048308444527354747, "loss": 0.0, "step": 30130 }, { "epoch": 0.4121117651482522, "grad_norm": NaN, "learning_rate": 0.00048306286582489185, "loss": 0.0, "step": 30140 }, { "epoch": 0.41224849765161925, "grad_norm": NaN, "learning_rate": 0.00048304127310298594, "loss": 0.0, "step": 30150 }, { "epoch": 0.4123852301549863, "grad_norm": NaN, "learning_rate": 0.0004830196671090594, "loss": 0.0, "step": 30160 }, { "epoch": 0.41252196265835334, "grad_norm": NaN, "learning_rate": 0.00048299804784434285, "loss": 0.0, "step": 30170 }, { "epoch": 0.4126586951617204, "grad_norm": NaN, "learning_rate": 0.0004829764153100675, "loss": 0.0, "step": 30180 }, { "epoch": 0.4127954276650874, "grad_norm": NaN, "learning_rate": 0.0004829547695074653, "loss": 0.0, "step": 30190 }, { "epoch": 0.41293216016845447, "grad_norm": NaN, "learning_rate": 0.000482933110437769, "loss": 0.0, "step": 30200 }, { "epoch": 0.4130688926718215, "grad_norm": NaN, "learning_rate": 0.0004829114381022122, "loss": 0.0, "step": 30210 }, { "epoch": 0.4132056251751885, "grad_norm": NaN, "learning_rate": 0.00048288975250202913, "loss": 0.0, "step": 30220 }, { "epoch": 0.41334235767855554, "grad_norm": NaN, "learning_rate": 0.00048286805363845475, "loss": 0.0, "step": 30230 }, { "epoch": 0.4134790901819226, "grad_norm": NaN, "learning_rate": 0.000482846341512725, "loss": 0.0, "step": 30240 }, { "epoch": 0.4136158226852896, "grad_norm": NaN, "learning_rate": 0.0004828246161260762, "loss": 0.0, "step": 30250 }, { "epoch": 0.41375255518865667, "grad_norm": NaN, "learning_rate": 0.0004828028774797458, "loss": 0.0, "step": 30260 }, { "epoch": 0.4138892876920237, "grad_norm": NaN, "learning_rate": 0.0004827811255749718, "loss": 0.0, "step": 30270 }, { "epoch": 0.41402602019539075, "grad_norm": NaN, "learning_rate": 0.00048275936041299294, "loss": 0.0, "step": 30280 }, { "epoch": 0.4141627526987578, "grad_norm": NaN, "learning_rate": 0.0004827375819950489, "loss": 0.0, "step": 30290 }, { "epoch": 0.41429948520212484, "grad_norm": NaN, "learning_rate": 0.0004827157903223799, "loss": 0.0, "step": 30300 }, { "epoch": 0.4144362177054919, "grad_norm": NaN, "learning_rate": 0.00048269398539622704, "loss": 0.0, "step": 30310 }, { "epoch": 0.4145729502088589, "grad_norm": NaN, "learning_rate": 0.00048267216721783215, "loss": 0.0, "step": 30320 }, { "epoch": 0.4147096827122259, "grad_norm": NaN, "learning_rate": 0.00048265033578843774, "loss": 0.0, "step": 30330 }, { "epoch": 0.41484641521559296, "grad_norm": NaN, "learning_rate": 0.0004826284911092872, "loss": 0.0, "step": 30340 }, { "epoch": 0.41498314771896, "grad_norm": NaN, "learning_rate": 0.00048260663318162454, "loss": 0.0, "step": 30350 }, { "epoch": 0.41511988022232704, "grad_norm": NaN, "learning_rate": 0.00048258476200669476, "loss": 0.0, "step": 30360 }, { "epoch": 0.4152566127256941, "grad_norm": NaN, "learning_rate": 0.00048256287758574335, "loss": 0.0, "step": 30370 }, { "epoch": 0.41539334522906113, "grad_norm": NaN, "learning_rate": 0.0004825409799200166, "loss": 0.0, "step": 30380 }, { "epoch": 0.41553007773242817, "grad_norm": NaN, "learning_rate": 0.0004825190690107617, "loss": 0.0, "step": 30390 }, { "epoch": 0.4156668102357952, "grad_norm": NaN, "learning_rate": 0.0004824971448592265, "loss": 0.0, "step": 30400 }, { "epoch": 0.41580354273916226, "grad_norm": NaN, "learning_rate": 0.0004824752074666596, "loss": 0.0, "step": 30410 }, { "epoch": 0.4159402752425293, "grad_norm": NaN, "learning_rate": 0.0004824532568343103, "loss": 0.0, "step": 30420 }, { "epoch": 0.41607700774589634, "grad_norm": NaN, "learning_rate": 0.0004824312929634288, "loss": 0.0, "step": 30430 }, { "epoch": 0.41621374024926333, "grad_norm": NaN, "learning_rate": 0.00048240931585526593, "loss": 0.0, "step": 30440 }, { "epoch": 0.4163504727526304, "grad_norm": NaN, "learning_rate": 0.00048238732551107337, "loss": 0.0, "step": 30450 }, { "epoch": 0.4164872052559974, "grad_norm": NaN, "learning_rate": 0.0004823653219321034, "loss": 0.0, "step": 30460 }, { "epoch": 0.41662393775936446, "grad_norm": NaN, "learning_rate": 0.0004823433051196092, "loss": 0.0, "step": 30470 }, { "epoch": 0.4167606702627315, "grad_norm": NaN, "learning_rate": 0.0004823212750748448, "loss": 0.0, "step": 30480 }, { "epoch": 0.41689740276609855, "grad_norm": NaN, "learning_rate": 0.0004822992317990646, "loss": 0.0, "step": 30490 }, { "epoch": 0.4170341352694656, "grad_norm": NaN, "learning_rate": 0.00048227717529352414, "loss": 0.0, "step": 30500 }, { "epoch": 0.41717086777283263, "grad_norm": NaN, "learning_rate": 0.00048225510555947957, "loss": 0.0, "step": 30510 }, { "epoch": 0.4173076002761997, "grad_norm": NaN, "learning_rate": 0.0004822330225981877, "loss": 0.0, "step": 30520 }, { "epoch": 0.4174443327795667, "grad_norm": NaN, "learning_rate": 0.00048221092641090633, "loss": 0.0, "step": 30530 }, { "epoch": 0.4175810652829337, "grad_norm": NaN, "learning_rate": 0.0004821888169988937, "loss": 0.0, "step": 30540 }, { "epoch": 0.41771779778630075, "grad_norm": NaN, "learning_rate": 0.0004821666943634091, "loss": 0.0, "step": 30550 }, { "epoch": 0.4178545302896678, "grad_norm": NaN, "learning_rate": 0.0004821445585057124, "loss": 0.0, "step": 30560 }, { "epoch": 0.41799126279303483, "grad_norm": NaN, "learning_rate": 0.0004821224094270642, "loss": 0.0, "step": 30570 }, { "epoch": 0.4181279952964019, "grad_norm": NaN, "learning_rate": 0.0004821002471287261, "loss": 0.0, "step": 30580 }, { "epoch": 0.4182647277997689, "grad_norm": NaN, "learning_rate": 0.00048207807161196006, "loss": 0.0, "step": 30590 }, { "epoch": 0.41840146030313596, "grad_norm": NaN, "learning_rate": 0.00048205588287802915, "loss": 0.0, "step": 30600 }, { "epoch": 0.418538192806503, "grad_norm": NaN, "learning_rate": 0.000482033680928197, "loss": 0.0, "step": 30610 }, { "epoch": 0.41867492530987005, "grad_norm": NaN, "learning_rate": 0.00048201146576372806, "loss": 0.0, "step": 30620 }, { "epoch": 0.4188116578132371, "grad_norm": NaN, "learning_rate": 0.00048198923738588753, "loss": 0.0, "step": 30630 }, { "epoch": 0.41894839031660414, "grad_norm": NaN, "learning_rate": 0.00048196699579594127, "loss": 0.0, "step": 30640 }, { "epoch": 0.4190851228199711, "grad_norm": NaN, "learning_rate": 0.000481944740995156, "loss": 0.0, "step": 30650 }, { "epoch": 0.41922185532333817, "grad_norm": NaN, "learning_rate": 0.0004819224729847992, "loss": 0.0, "step": 30660 }, { "epoch": 0.4193585878267052, "grad_norm": NaN, "learning_rate": 0.0004819001917661391, "loss": 0.0, "step": 30670 }, { "epoch": 0.41949532033007225, "grad_norm": NaN, "learning_rate": 0.0004818778973404445, "loss": 0.0, "step": 30680 }, { "epoch": 0.4196320528334393, "grad_norm": NaN, "learning_rate": 0.0004818555897089852, "loss": 0.0, "step": 30690 }, { "epoch": 0.41976878533680634, "grad_norm": NaN, "learning_rate": 0.0004818332688730316, "loss": 0.0, "step": 30700 }, { "epoch": 0.4199055178401734, "grad_norm": NaN, "learning_rate": 0.00048181093483385507, "loss": 0.0, "step": 30710 }, { "epoch": 0.4200422503435404, "grad_norm": NaN, "learning_rate": 0.0004817885875927273, "loss": 0.0, "step": 30720 }, { "epoch": 0.42017898284690747, "grad_norm": NaN, "learning_rate": 0.00048176622715092124, "loss": 0.0, "step": 30730 }, { "epoch": 0.4203157153502745, "grad_norm": NaN, "learning_rate": 0.0004817438535097102, "loss": 0.0, "step": 30740 }, { "epoch": 0.42045244785364155, "grad_norm": NaN, "learning_rate": 0.00048172146667036837, "loss": 0.0, "step": 30750 }, { "epoch": 0.42058918035700854, "grad_norm": NaN, "learning_rate": 0.00048169906663417077, "loss": 0.0, "step": 30760 }, { "epoch": 0.4207259128603756, "grad_norm": NaN, "learning_rate": 0.0004816766534023932, "loss": 0.0, "step": 30770 }, { "epoch": 0.4208626453637426, "grad_norm": NaN, "learning_rate": 0.00048165422697631197, "loss": 0.0, "step": 30780 }, { "epoch": 0.42099937786710967, "grad_norm": NaN, "learning_rate": 0.0004816317873572044, "loss": 0.0, "step": 30790 }, { "epoch": 0.4211361103704767, "grad_norm": NaN, "learning_rate": 0.0004816093345463484, "loss": 0.0, "step": 30800 }, { "epoch": 0.42127284287384376, "grad_norm": NaN, "learning_rate": 0.0004815868685450226, "loss": 0.0, "step": 30810 }, { "epoch": 0.4214095753772108, "grad_norm": NaN, "learning_rate": 0.00048156438935450667, "loss": 0.0, "step": 30820 }, { "epoch": 0.42154630788057784, "grad_norm": NaN, "learning_rate": 0.00048154189697608075, "loss": 0.0, "step": 30830 }, { "epoch": 0.4216830403839449, "grad_norm": NaN, "learning_rate": 0.0004815193914110258, "loss": 0.0, "step": 30840 }, { "epoch": 0.42181977288731193, "grad_norm": NaN, "learning_rate": 0.00048149687266062354, "loss": 0.0, "step": 30850 }, { "epoch": 0.42195650539067897, "grad_norm": NaN, "learning_rate": 0.0004814743407261564, "loss": 0.0, "step": 30860 }, { "epoch": 0.42209323789404596, "grad_norm": NaN, "learning_rate": 0.00048145179560890773, "loss": 0.0, "step": 30870 }, { "epoch": 0.422229970397413, "grad_norm": NaN, "learning_rate": 0.00048142923731016134, "loss": 0.0, "step": 30880 }, { "epoch": 0.42236670290078004, "grad_norm": NaN, "learning_rate": 0.00048140666583120205, "loss": 0.0, "step": 30890 }, { "epoch": 0.4225034354041471, "grad_norm": NaN, "learning_rate": 0.00048138408117331537, "loss": 0.0, "step": 30900 }, { "epoch": 0.42264016790751413, "grad_norm": NaN, "learning_rate": 0.0004813614833377875, "loss": 0.0, "step": 30910 }, { "epoch": 0.4227769004108812, "grad_norm": NaN, "learning_rate": 0.00048133887232590535, "loss": 0.0, "step": 30920 }, { "epoch": 0.4229136329142482, "grad_norm": NaN, "learning_rate": 0.0004813162481389567, "loss": 0.0, "step": 30930 }, { "epoch": 0.42305036541761526, "grad_norm": NaN, "learning_rate": 0.00048129361077823006, "loss": 0.0, "step": 30940 }, { "epoch": 0.4231870979209823, "grad_norm": NaN, "learning_rate": 0.00048127096024501464, "loss": 0.0, "step": 30950 }, { "epoch": 0.42332383042434935, "grad_norm": NaN, "learning_rate": 0.00048124829654060044, "loss": 0.0, "step": 30960 }, { "epoch": 0.4234605629277164, "grad_norm": NaN, "learning_rate": 0.0004812256196662781, "loss": 0.0, "step": 30970 }, { "epoch": 0.4235972954310834, "grad_norm": NaN, "learning_rate": 0.0004812029296233391, "loss": 0.0, "step": 30980 }, { "epoch": 0.4237340279344504, "grad_norm": NaN, "learning_rate": 0.0004811802264130758, "loss": 0.0, "step": 30990 }, { "epoch": 0.42387076043781746, "grad_norm": NaN, "learning_rate": 0.0004811575100367811, "loss": 0.0, "step": 31000 }, { "epoch": 0.4240074929411845, "grad_norm": NaN, "learning_rate": 0.00048113478049574874, "loss": 0.0, "step": 31010 }, { "epoch": 0.42414422544455155, "grad_norm": NaN, "learning_rate": 0.0004811120377912731, "loss": 0.0, "step": 31020 }, { "epoch": 0.4242809579479186, "grad_norm": NaN, "learning_rate": 0.0004810892819246496, "loss": 0.0, "step": 31030 }, { "epoch": 0.42441769045128563, "grad_norm": NaN, "learning_rate": 0.00048106651289717405, "loss": 0.0, "step": 31040 }, { "epoch": 0.4245544229546527, "grad_norm": NaN, "learning_rate": 0.0004810437307101433, "loss": 0.0, "step": 31050 }, { "epoch": 0.4246911554580197, "grad_norm": NaN, "learning_rate": 0.0004810209353648548, "loss": 0.0, "step": 31060 }, { "epoch": 0.42482788796138676, "grad_norm": NaN, "learning_rate": 0.0004809981268626067, "loss": 0.0, "step": 31070 }, { "epoch": 0.4249646204647538, "grad_norm": NaN, "learning_rate": 0.000480975305204698, "loss": 0.0, "step": 31080 }, { "epoch": 0.4251013529681208, "grad_norm": NaN, "learning_rate": 0.0004809524703924285, "loss": 0.0, "step": 31090 }, { "epoch": 0.42523808547148784, "grad_norm": NaN, "learning_rate": 0.0004809296224270986, "loss": 0.0, "step": 31100 }, { "epoch": 0.4253748179748549, "grad_norm": NaN, "learning_rate": 0.0004809067613100095, "loss": 0.0, "step": 31110 }, { "epoch": 0.4255115504782219, "grad_norm": NaN, "learning_rate": 0.0004808838870424633, "loss": 0.0, "step": 31120 }, { "epoch": 0.42564828298158897, "grad_norm": NaN, "learning_rate": 0.0004808609996257626, "loss": 0.0, "step": 31130 }, { "epoch": 0.425785015484956, "grad_norm": NaN, "learning_rate": 0.0004808380990612108, "loss": 0.0, "step": 31140 }, { "epoch": 0.42592174798832305, "grad_norm": NaN, "learning_rate": 0.00048081518535011237, "loss": 0.0, "step": 31150 }, { "epoch": 0.4260584804916901, "grad_norm": NaN, "learning_rate": 0.0004807922584937721, "loss": 0.0, "step": 31160 }, { "epoch": 0.42619521299505714, "grad_norm": NaN, "learning_rate": 0.0004807693184934957, "loss": 0.0, "step": 31170 }, { "epoch": 0.4263319454984242, "grad_norm": NaN, "learning_rate": 0.0004807463653505897, "loss": 0.0, "step": 31180 }, { "epoch": 0.42646867800179117, "grad_norm": NaN, "learning_rate": 0.0004807233990663613, "loss": 0.0, "step": 31190 }, { "epoch": 0.4266054105051582, "grad_norm": NaN, "learning_rate": 0.00048070041964211845, "loss": 0.0, "step": 31200 }, { "epoch": 0.42674214300852525, "grad_norm": NaN, "learning_rate": 0.0004806774270791698, "loss": 0.0, "step": 31210 }, { "epoch": 0.4268788755118923, "grad_norm": NaN, "learning_rate": 0.00048065442137882496, "loss": 0.0, "step": 31220 }, { "epoch": 0.42701560801525934, "grad_norm": NaN, "learning_rate": 0.00048063140254239403, "loss": 0.0, "step": 31230 }, { "epoch": 0.4271523405186264, "grad_norm": NaN, "learning_rate": 0.0004806083705711879, "loss": 0.0, "step": 31240 }, { "epoch": 0.4272890730219934, "grad_norm": NaN, "learning_rate": 0.0004805853254665184, "loss": 0.0, "step": 31250 }, { "epoch": 0.42742580552536047, "grad_norm": NaN, "learning_rate": 0.0004805622672296979, "loss": 0.0, "step": 31260 }, { "epoch": 0.4275625380287275, "grad_norm": NaN, "learning_rate": 0.00048053919586203963, "loss": 0.0, "step": 31270 }, { "epoch": 0.42769927053209456, "grad_norm": NaN, "learning_rate": 0.0004805161113648576, "loss": 0.0, "step": 31280 }, { "epoch": 0.4278360030354616, "grad_norm": NaN, "learning_rate": 0.0004804930137394664, "loss": 0.0, "step": 31290 }, { "epoch": 0.4279727355388286, "grad_norm": NaN, "learning_rate": 0.00048046990298718147, "loss": 0.0, "step": 31300 }, { "epoch": 0.42810946804219563, "grad_norm": NaN, "learning_rate": 0.0004804467791093191, "loss": 0.0, "step": 31310 }, { "epoch": 0.42824620054556267, "grad_norm": NaN, "learning_rate": 0.00048042364210719614, "loss": 0.0, "step": 31320 }, { "epoch": 0.4283829330489297, "grad_norm": NaN, "learning_rate": 0.00048040049198213024, "loss": 0.0, "step": 31330 }, { "epoch": 0.42851966555229676, "grad_norm": NaN, "learning_rate": 0.00048037732873543995, "loss": 0.0, "step": 31340 }, { "epoch": 0.4286563980556638, "grad_norm": NaN, "learning_rate": 0.00048035415236844436, "loss": 0.0, "step": 31350 }, { "epoch": 0.42879313055903084, "grad_norm": NaN, "learning_rate": 0.0004803309628824635, "loss": 0.0, "step": 31360 }, { "epoch": 0.4289298630623979, "grad_norm": NaN, "learning_rate": 0.00048030776027881784, "loss": 0.0, "step": 31370 }, { "epoch": 0.42906659556576493, "grad_norm": NaN, "learning_rate": 0.000480284544558829, "loss": 0.0, "step": 31380 }, { "epoch": 0.429203328069132, "grad_norm": NaN, "learning_rate": 0.00048026131572381916, "loss": 0.0, "step": 31390 }, { "epoch": 0.429340060572499, "grad_norm": NaN, "learning_rate": 0.00048023807377511104, "loss": 0.0, "step": 31400 }, { "epoch": 0.429476793075866, "grad_norm": NaN, "learning_rate": 0.0004802148187140284, "loss": 0.0, "step": 31410 }, { "epoch": 0.42961352557923305, "grad_norm": NaN, "learning_rate": 0.0004801915505418957, "loss": 0.0, "step": 31420 }, { "epoch": 0.4297502580826001, "grad_norm": NaN, "learning_rate": 0.00048016826926003804, "loss": 0.0, "step": 31430 }, { "epoch": 0.42988699058596713, "grad_norm": NaN, "learning_rate": 0.00048014497486978137, "loss": 0.0, "step": 31440 }, { "epoch": 0.4300237230893342, "grad_norm": NaN, "learning_rate": 0.00048012166737245224, "loss": 0.0, "step": 31450 }, { "epoch": 0.4301604555927012, "grad_norm": NaN, "learning_rate": 0.0004800983467693781, "loss": 0.0, "step": 31460 }, { "epoch": 0.43029718809606826, "grad_norm": NaN, "learning_rate": 0.0004800750130618872, "loss": 0.0, "step": 31470 }, { "epoch": 0.4304339205994353, "grad_norm": NaN, "learning_rate": 0.00048005166625130825, "loss": 0.0, "step": 31480 }, { "epoch": 0.43057065310280235, "grad_norm": NaN, "learning_rate": 0.0004800283063389709, "loss": 0.0, "step": 31490 }, { "epoch": 0.4307073856061694, "grad_norm": NaN, "learning_rate": 0.0004800049333262057, "loss": 0.0, "step": 31500 }, { "epoch": 0.43084411810953643, "grad_norm": NaN, "learning_rate": 0.00047998154721434354, "loss": 0.0, "step": 31510 }, { "epoch": 0.4309808506129034, "grad_norm": NaN, "learning_rate": 0.0004799581480047165, "loss": 0.0, "step": 31520 }, { "epoch": 0.43111758311627046, "grad_norm": NaN, "learning_rate": 0.0004799347356986571, "loss": 0.0, "step": 31530 }, { "epoch": 0.4312543156196375, "grad_norm": NaN, "learning_rate": 0.0004799113102974987, "loss": 0.0, "step": 31540 }, { "epoch": 0.43139104812300455, "grad_norm": NaN, "learning_rate": 0.00047988787180257536, "loss": 0.0, "step": 31550 }, { "epoch": 0.4315277806263716, "grad_norm": NaN, "learning_rate": 0.00047986442021522203, "loss": 0.0, "step": 31560 }, { "epoch": 0.43166451312973864, "grad_norm": NaN, "learning_rate": 0.0004798409555367743, "loss": 0.0, "step": 31570 }, { "epoch": 0.4318012456331057, "grad_norm": NaN, "learning_rate": 0.0004798174777685684, "loss": 0.0, "step": 31580 }, { "epoch": 0.4319379781364727, "grad_norm": NaN, "learning_rate": 0.00047979398691194166, "loss": 0.0, "step": 31590 }, { "epoch": 0.43207471063983977, "grad_norm": NaN, "learning_rate": 0.0004797704829682317, "loss": 0.0, "step": 31600 }, { "epoch": 0.4322114431432068, "grad_norm": NaN, "learning_rate": 0.00047974696593877707, "loss": 0.0, "step": 31610 }, { "epoch": 0.43234817564657385, "grad_norm": NaN, "learning_rate": 0.0004797234358249173, "loss": 0.0, "step": 31620 }, { "epoch": 0.43248490814994084, "grad_norm": NaN, "learning_rate": 0.0004796998926279924, "loss": 0.0, "step": 31630 }, { "epoch": 0.4326216406533079, "grad_norm": NaN, "learning_rate": 0.000479676336349343, "loss": 0.0, "step": 31640 }, { "epoch": 0.4327583731566749, "grad_norm": NaN, "learning_rate": 0.00047965276699031095, "loss": 0.0, "step": 31650 }, { "epoch": 0.43289510566004197, "grad_norm": NaN, "learning_rate": 0.00047962918455223837, "loss": 0.0, "step": 31660 }, { "epoch": 0.433031838163409, "grad_norm": NaN, "learning_rate": 0.00047960558903646834, "loss": 0.0, "step": 31670 }, { "epoch": 0.43316857066677605, "grad_norm": NaN, "learning_rate": 0.0004795819804443447, "loss": 0.0, "step": 31680 }, { "epoch": 0.4333053031701431, "grad_norm": NaN, "learning_rate": 0.000479558358777212, "loss": 0.0, "step": 31690 }, { "epoch": 0.43344203567351014, "grad_norm": NaN, "learning_rate": 0.0004795347240364154, "loss": 0.0, "step": 31700 }, { "epoch": 0.4335787681768772, "grad_norm": NaN, "learning_rate": 0.000479511076223301, "loss": 0.0, "step": 31710 }, { "epoch": 0.4337155006802442, "grad_norm": NaN, "learning_rate": 0.00047948741533921567, "loss": 0.0, "step": 31720 }, { "epoch": 0.43385223318361127, "grad_norm": NaN, "learning_rate": 0.00047946374138550693, "loss": 0.0, "step": 31730 }, { "epoch": 0.43398896568697826, "grad_norm": NaN, "learning_rate": 0.0004794400543635229, "loss": 0.0, "step": 31740 }, { "epoch": 0.4341256981903453, "grad_norm": NaN, "learning_rate": 0.0004794163542746126, "loss": 0.0, "step": 31750 }, { "epoch": 0.43426243069371234, "grad_norm": NaN, "learning_rate": 0.00047939264112012594, "loss": 0.0, "step": 31760 }, { "epoch": 0.4343991631970794, "grad_norm": NaN, "learning_rate": 0.00047936891490141335, "loss": 0.0, "step": 31770 }, { "epoch": 0.43453589570044643, "grad_norm": NaN, "learning_rate": 0.0004793451756198259, "loss": 0.0, "step": 31780 }, { "epoch": 0.43467262820381347, "grad_norm": NaN, "learning_rate": 0.0004793214232767158, "loss": 0.0, "step": 31790 }, { "epoch": 0.4348093607071805, "grad_norm": NaN, "learning_rate": 0.0004792976578734357, "loss": 0.0, "step": 31800 }, { "epoch": 0.43494609321054756, "grad_norm": NaN, "learning_rate": 0.00047927387941133906, "loss": 0.0, "step": 31810 }, { "epoch": 0.4350828257139146, "grad_norm": NaN, "learning_rate": 0.0004792500878917801, "loss": 0.0, "step": 31820 }, { "epoch": 0.43521955821728164, "grad_norm": NaN, "learning_rate": 0.0004792262833161138, "loss": 0.0, "step": 31830 }, { "epoch": 0.4353562907206487, "grad_norm": NaN, "learning_rate": 0.0004792024656856958, "loss": 0.0, "step": 31840 }, { "epoch": 0.4354930232240157, "grad_norm": NaN, "learning_rate": 0.00047917863500188264, "loss": 0.0, "step": 31850 }, { "epoch": 0.4356297557273827, "grad_norm": NaN, "learning_rate": 0.00047915479126603146, "loss": 0.0, "step": 31860 }, { "epoch": 0.43576648823074976, "grad_norm": NaN, "learning_rate": 0.00047913093447950016, "loss": 0.0, "step": 31870 }, { "epoch": 0.4359032207341168, "grad_norm": NaN, "learning_rate": 0.0004791070646436475, "loss": 0.0, "step": 31880 }, { "epoch": 0.43603995323748385, "grad_norm": NaN, "learning_rate": 0.00047908318175983276, "loss": 0.0, "step": 31890 }, { "epoch": 0.4361766857408509, "grad_norm": NaN, "learning_rate": 0.00047905928582941634, "loss": 0.0, "step": 31900 }, { "epoch": 0.43631341824421793, "grad_norm": NaN, "learning_rate": 0.0004790353768537589, "loss": 0.0, "step": 31910 }, { "epoch": 0.436450150747585, "grad_norm": NaN, "learning_rate": 0.00047901145483422226, "loss": 0.0, "step": 31920 }, { "epoch": 0.436586883250952, "grad_norm": NaN, "learning_rate": 0.0004789875197721687, "loss": 0.0, "step": 31930 }, { "epoch": 0.43672361575431906, "grad_norm": NaN, "learning_rate": 0.00047896357166896136, "loss": 0.0, "step": 31940 }, { "epoch": 0.43686034825768605, "grad_norm": NaN, "learning_rate": 0.00047893961052596426, "loss": 0.0, "step": 31950 }, { "epoch": 0.4369970807610531, "grad_norm": NaN, "learning_rate": 0.00047891563634454184, "loss": 0.0, "step": 31960 }, { "epoch": 0.43713381326442013, "grad_norm": NaN, "learning_rate": 0.00047889164912605955, "loss": 0.0, "step": 31970 }, { "epoch": 0.4372705457677872, "grad_norm": NaN, "learning_rate": 0.0004788676488718835, "loss": 0.0, "step": 31980 }, { "epoch": 0.4374072782711542, "grad_norm": NaN, "learning_rate": 0.0004788436355833805, "loss": 0.0, "step": 31990 }, { "epoch": 0.43754401077452126, "grad_norm": NaN, "learning_rate": 0.00047881960926191816, "loss": 0.0, "step": 32000 }, { "epoch": 0.4376807432778883, "grad_norm": NaN, "learning_rate": 0.0004787955699088648, "loss": 0.0, "step": 32010 }, { "epoch": 0.43781747578125535, "grad_norm": NaN, "learning_rate": 0.00047877151752558956, "loss": 0.0, "step": 32020 }, { "epoch": 0.4379542082846224, "grad_norm": NaN, "learning_rate": 0.00047874745211346214, "loss": 0.0, "step": 32030 }, { "epoch": 0.43809094078798944, "grad_norm": NaN, "learning_rate": 0.0004787233736738532, "loss": 0.0, "step": 32040 }, { "epoch": 0.4382276732913565, "grad_norm": NaN, "learning_rate": 0.00047869928220813394, "loss": 0.0, "step": 32050 }, { "epoch": 0.43836440579472347, "grad_norm": NaN, "learning_rate": 0.00047867517771767653, "loss": 0.0, "step": 32060 }, { "epoch": 0.4385011382980905, "grad_norm": NaN, "learning_rate": 0.0004786510602038537, "loss": 0.0, "step": 32070 }, { "epoch": 0.43863787080145755, "grad_norm": NaN, "learning_rate": 0.0004786269296680389, "loss": 0.0, "step": 32080 }, { "epoch": 0.4387746033048246, "grad_norm": NaN, "learning_rate": 0.00047860278611160654, "loss": 0.0, "step": 32090 }, { "epoch": 0.43891133580819164, "grad_norm": NaN, "learning_rate": 0.0004785786295359315, "loss": 0.0, "step": 32100 }, { "epoch": 0.4390480683115587, "grad_norm": NaN, "learning_rate": 0.0004785544599423895, "loss": 0.0, "step": 32110 }, { "epoch": 0.4391848008149257, "grad_norm": NaN, "learning_rate": 0.00047853027733235723, "loss": 0.0, "step": 32120 }, { "epoch": 0.43932153331829277, "grad_norm": NaN, "learning_rate": 0.00047850608170721176, "loss": 0.0, "step": 32130 }, { "epoch": 0.4394582658216598, "grad_norm": NaN, "learning_rate": 0.0004784818730683311, "loss": 0.0, "step": 32140 }, { "epoch": 0.43959499832502685, "grad_norm": NaN, "learning_rate": 0.00047845765141709404, "loss": 0.0, "step": 32150 }, { "epoch": 0.4397317308283939, "grad_norm": NaN, "learning_rate": 0.0004784334167548799, "loss": 0.0, "step": 32160 }, { "epoch": 0.4398684633317609, "grad_norm": NaN, "learning_rate": 0.00047840916908306895, "loss": 0.0, "step": 32170 }, { "epoch": 0.4400051958351279, "grad_norm": NaN, "learning_rate": 0.0004783849084030422, "loss": 0.0, "step": 32180 }, { "epoch": 0.44014192833849497, "grad_norm": NaN, "learning_rate": 0.0004783606347161811, "loss": 0.0, "step": 32190 }, { "epoch": 0.440278660841862, "grad_norm": NaN, "learning_rate": 0.00047833634802386835, "loss": 0.0, "step": 32200 }, { "epoch": 0.44041539334522906, "grad_norm": NaN, "learning_rate": 0.0004783120483274869, "loss": 0.0, "step": 32210 }, { "epoch": 0.4405521258485961, "grad_norm": NaN, "learning_rate": 0.00047828773562842074, "loss": 0.0, "step": 32220 }, { "epoch": 0.44068885835196314, "grad_norm": NaN, "learning_rate": 0.0004782634099280546, "loss": 0.0, "step": 32230 }, { "epoch": 0.4408255908553302, "grad_norm": NaN, "learning_rate": 0.0004782390712277737, "loss": 0.0, "step": 32240 }, { "epoch": 0.44096232335869723, "grad_norm": NaN, "learning_rate": 0.0004782147195289642, "loss": 0.0, "step": 32250 }, { "epoch": 0.44109905586206427, "grad_norm": NaN, "learning_rate": 0.0004781903548330131, "loss": 0.0, "step": 32260 }, { "epoch": 0.4412357883654313, "grad_norm": NaN, "learning_rate": 0.0004781659771413078, "loss": 0.0, "step": 32270 }, { "epoch": 0.4413725208687983, "grad_norm": NaN, "learning_rate": 0.0004781415864552368, "loss": 0.0, "step": 32280 }, { "epoch": 0.44150925337216534, "grad_norm": NaN, "learning_rate": 0.0004781171827761891, "loss": 0.0, "step": 32290 }, { "epoch": 0.4416459858755324, "grad_norm": NaN, "learning_rate": 0.00047809276610555454, "loss": 0.0, "step": 32300 }, { "epoch": 0.44178271837889943, "grad_norm": NaN, "learning_rate": 0.0004780683364447237, "loss": 0.0, "step": 32310 }, { "epoch": 0.4419194508822665, "grad_norm": NaN, "learning_rate": 0.00047804389379508785, "loss": 0.0, "step": 32320 }, { "epoch": 0.4420561833856335, "grad_norm": NaN, "learning_rate": 0.00047801943815803903, "loss": 0.0, "step": 32330 }, { "epoch": 0.44219291588900056, "grad_norm": NaN, "learning_rate": 0.00047799496953497013, "loss": 0.0, "step": 32340 }, { "epoch": 0.4423296483923676, "grad_norm": NaN, "learning_rate": 0.00047797048792727446, "loss": 0.0, "step": 32350 }, { "epoch": 0.44246638089573465, "grad_norm": NaN, "learning_rate": 0.0004779459933363465, "loss": 0.0, "step": 32360 }, { "epoch": 0.4426031133991017, "grad_norm": NaN, "learning_rate": 0.0004779214857635812, "loss": 0.0, "step": 32370 }, { "epoch": 0.44273984590246873, "grad_norm": NaN, "learning_rate": 0.0004778969652103742, "loss": 0.0, "step": 32380 }, { "epoch": 0.4428765784058357, "grad_norm": NaN, "learning_rate": 0.000477872431678122, "loss": 0.0, "step": 32390 }, { "epoch": 0.44301331090920276, "grad_norm": NaN, "learning_rate": 0.0004778478851682219, "loss": 0.0, "step": 32400 }, { "epoch": 0.4431500434125698, "grad_norm": NaN, "learning_rate": 0.00047782332568207186, "loss": 0.0, "step": 32410 }, { "epoch": 0.44328677591593685, "grad_norm": NaN, "learning_rate": 0.00047779875322107046, "loss": 0.0, "step": 32420 }, { "epoch": 0.4434235084193039, "grad_norm": NaN, "learning_rate": 0.0004777741677866172, "loss": 0.0, "step": 32430 }, { "epoch": 0.44356024092267093, "grad_norm": NaN, "learning_rate": 0.0004777495693801123, "loss": 0.0, "step": 32440 }, { "epoch": 0.443696973426038, "grad_norm": NaN, "learning_rate": 0.00047772495800295665, "loss": 0.0, "step": 32450 }, { "epoch": 0.443833705929405, "grad_norm": NaN, "learning_rate": 0.00047770033365655183, "loss": 0.0, "step": 32460 }, { "epoch": 0.44397043843277206, "grad_norm": NaN, "learning_rate": 0.00047767569634230033, "loss": 0.0, "step": 32470 }, { "epoch": 0.4441071709361391, "grad_norm": NaN, "learning_rate": 0.0004776510460616052, "loss": 0.0, "step": 32480 }, { "epoch": 0.44424390343950615, "grad_norm": NaN, "learning_rate": 0.00047762638281587035, "loss": 0.0, "step": 32490 }, { "epoch": 0.44438063594287314, "grad_norm": NaN, "learning_rate": 0.0004776017066065004, "loss": 0.0, "step": 32500 }, { "epoch": 0.4445173684462402, "grad_norm": NaN, "learning_rate": 0.00047757701743490066, "loss": 0.0, "step": 32510 }, { "epoch": 0.4446541009496072, "grad_norm": NaN, "learning_rate": 0.0004775523153024772, "loss": 0.0, "step": 32520 }, { "epoch": 0.44479083345297427, "grad_norm": NaN, "learning_rate": 0.0004775276002106369, "loss": 0.0, "step": 32530 }, { "epoch": 0.4449275659563413, "grad_norm": NaN, "learning_rate": 0.00047750287216078726, "loss": 0.0, "step": 32540 }, { "epoch": 0.44506429845970835, "grad_norm": NaN, "learning_rate": 0.00047747813115433656, "loss": 0.0, "step": 32550 }, { "epoch": 0.4452010309630754, "grad_norm": NaN, "learning_rate": 0.000477453377192694, "loss": 0.0, "step": 32560 }, { "epoch": 0.44533776346644244, "grad_norm": NaN, "learning_rate": 0.00047742861027726916, "loss": 0.0, "step": 32570 }, { "epoch": 0.4454744959698095, "grad_norm": NaN, "learning_rate": 0.00047740383040947257, "loss": 0.0, "step": 32580 }, { "epoch": 0.4456112284731765, "grad_norm": NaN, "learning_rate": 0.00047737903759071553, "loss": 0.0, "step": 32590 }, { "epoch": 0.4457479609765435, "grad_norm": NaN, "learning_rate": 0.00047735423182241, "loss": 0.0, "step": 32600 }, { "epoch": 0.44588469347991055, "grad_norm": NaN, "learning_rate": 0.0004773294131059688, "loss": 0.0, "step": 32610 }, { "epoch": 0.4460214259832776, "grad_norm": NaN, "learning_rate": 0.00047730458144280526, "loss": 0.0, "step": 32620 }, { "epoch": 0.44615815848664464, "grad_norm": NaN, "learning_rate": 0.00047727973683433367, "loss": 0.0, "step": 32630 }, { "epoch": 0.4462948909900117, "grad_norm": NaN, "learning_rate": 0.0004772548792819689, "loss": 0.0, "step": 32640 }, { "epoch": 0.4464316234933787, "grad_norm": NaN, "learning_rate": 0.00047723000878712655, "loss": 0.0, "step": 32650 }, { "epoch": 0.44656835599674577, "grad_norm": NaN, "learning_rate": 0.00047720512535122324, "loss": 0.0, "step": 32660 }, { "epoch": 0.4467050885001128, "grad_norm": NaN, "learning_rate": 0.00047718022897567594, "loss": 0.0, "step": 32670 }, { "epoch": 0.44684182100347986, "grad_norm": NaN, "learning_rate": 0.00047715531966190263, "loss": 0.0, "step": 32680 }, { "epoch": 0.4469785535068469, "grad_norm": NaN, "learning_rate": 0.00047713039741132193, "loss": 0.0, "step": 32690 }, { "epoch": 0.44711528601021394, "grad_norm": NaN, "learning_rate": 0.000477105462225353, "loss": 0.0, "step": 32700 }, { "epoch": 0.44725201851358093, "grad_norm": NaN, "learning_rate": 0.00047708051410541623, "loss": 0.0, "step": 32710 }, { "epoch": 0.44738875101694797, "grad_norm": NaN, "learning_rate": 0.0004770555530529322, "loss": 0.0, "step": 32720 }, { "epoch": 0.447525483520315, "grad_norm": NaN, "learning_rate": 0.00047703057906932274, "loss": 0.0, "step": 32730 }, { "epoch": 0.44766221602368206, "grad_norm": NaN, "learning_rate": 0.0004770055921560099, "loss": 0.0, "step": 32740 }, { "epoch": 0.4477989485270491, "grad_norm": NaN, "learning_rate": 0.0004769805923144168, "loss": 0.0, "step": 32750 }, { "epoch": 0.44793568103041614, "grad_norm": NaN, "learning_rate": 0.0004769555795459673, "loss": 0.0, "step": 32760 }, { "epoch": 0.4480724135337832, "grad_norm": NaN, "learning_rate": 0.00047693055385208586, "loss": 0.0, "step": 32770 }, { "epoch": 0.44820914603715023, "grad_norm": NaN, "learning_rate": 0.00047690551523419766, "loss": 0.0, "step": 32780 }, { "epoch": 0.4483458785405173, "grad_norm": NaN, "learning_rate": 0.0004768804636937287, "loss": 0.0, "step": 32790 }, { "epoch": 0.4484826110438843, "grad_norm": NaN, "learning_rate": 0.00047685539923210575, "loss": 0.0, "step": 32800 }, { "epoch": 0.44861934354725136, "grad_norm": NaN, "learning_rate": 0.0004768303218507563, "loss": 0.0, "step": 32810 }, { "epoch": 0.44875607605061835, "grad_norm": NaN, "learning_rate": 0.0004768052315511085, "loss": 0.0, "step": 32820 }, { "epoch": 0.4488928085539854, "grad_norm": NaN, "learning_rate": 0.0004767801283345912, "loss": 0.0, "step": 32830 }, { "epoch": 0.44902954105735243, "grad_norm": NaN, "learning_rate": 0.0004767550122026342, "loss": 0.0, "step": 32840 }, { "epoch": 0.4491662735607195, "grad_norm": NaN, "learning_rate": 0.0004767298831566678, "loss": 0.0, "step": 32850 }, { "epoch": 0.4493030060640865, "grad_norm": NaN, "learning_rate": 0.00047670474119812316, "loss": 0.0, "step": 32860 }, { "epoch": 0.44943973856745356, "grad_norm": NaN, "learning_rate": 0.0004766795863284322, "loss": 0.0, "step": 32870 }, { "epoch": 0.4495764710708206, "grad_norm": NaN, "learning_rate": 0.00047665441854902743, "loss": 0.0, "step": 32880 }, { "epoch": 0.44971320357418765, "grad_norm": NaN, "learning_rate": 0.00047662923786134223, "loss": 0.0, "step": 32890 }, { "epoch": 0.4498499360775547, "grad_norm": NaN, "learning_rate": 0.0004766040442668107, "loss": 0.0, "step": 32900 }, { "epoch": 0.44998666858092173, "grad_norm": NaN, "learning_rate": 0.0004765788377668676, "loss": 0.0, "step": 32910 }, { "epoch": 0.4501234010842888, "grad_norm": NaN, "learning_rate": 0.0004765536183629485, "loss": 0.0, "step": 32920 }, { "epoch": 0.45026013358765576, "grad_norm": NaN, "learning_rate": 0.0004765283860564897, "loss": 0.0, "step": 32930 }, { "epoch": 0.4503968660910228, "grad_norm": NaN, "learning_rate": 0.0004765031408489283, "loss": 0.0, "step": 32940 }, { "epoch": 0.45053359859438985, "grad_norm": NaN, "learning_rate": 0.0004764778827417019, "loss": 0.0, "step": 32950 }, { "epoch": 0.4506703310977569, "grad_norm": NaN, "learning_rate": 0.000476452611736249, "loss": 0.0, "step": 32960 }, { "epoch": 0.45080706360112394, "grad_norm": NaN, "learning_rate": 0.0004764273278340088, "loss": 0.0, "step": 32970 }, { "epoch": 0.450943796104491, "grad_norm": NaN, "learning_rate": 0.0004764020310364214, "loss": 0.0, "step": 32980 }, { "epoch": 0.451080528607858, "grad_norm": NaN, "learning_rate": 0.0004763767213449274, "loss": 0.0, "step": 32990 }, { "epoch": 0.45121726111122507, "grad_norm": NaN, "learning_rate": 0.0004763513987609681, "loss": 0.0, "step": 33000 }, { "epoch": 0.4513539936145921, "grad_norm": NaN, "learning_rate": 0.00047632606328598583, "loss": 0.0, "step": 33010 }, { "epoch": 0.45149072611795915, "grad_norm": NaN, "learning_rate": 0.00047630071492142347, "loss": 0.0, "step": 33020 }, { "epoch": 0.4516274586213262, "grad_norm": NaN, "learning_rate": 0.00047627535366872453, "loss": 0.0, "step": 33030 }, { "epoch": 0.4517641911246932, "grad_norm": NaN, "learning_rate": 0.00047624997952933345, "loss": 0.0, "step": 33040 }, { "epoch": 0.4519009236280602, "grad_norm": NaN, "learning_rate": 0.0004762245925046953, "loss": 0.0, "step": 33050 }, { "epoch": 0.45203765613142727, "grad_norm": NaN, "learning_rate": 0.00047619919259625596, "loss": 0.0, "step": 33060 }, { "epoch": 0.4521743886347943, "grad_norm": NaN, "learning_rate": 0.0004761737798054619, "loss": 0.0, "step": 33070 }, { "epoch": 0.45231112113816135, "grad_norm": NaN, "learning_rate": 0.0004761483541337605, "loss": 0.0, "step": 33080 }, { "epoch": 0.4524478536415284, "grad_norm": NaN, "learning_rate": 0.00047612291558259964, "loss": 0.0, "step": 33090 }, { "epoch": 0.45258458614489544, "grad_norm": NaN, "learning_rate": 0.00047609746415342824, "loss": 0.0, "step": 33100 }, { "epoch": 0.4527213186482625, "grad_norm": NaN, "learning_rate": 0.00047607199984769574, "loss": 0.0, "step": 33110 }, { "epoch": 0.4528580511516295, "grad_norm": NaN, "learning_rate": 0.00047604652266685233, "loss": 0.0, "step": 33120 }, { "epoch": 0.45299478365499657, "grad_norm": NaN, "learning_rate": 0.0004760210326123491, "loss": 0.0, "step": 33130 }, { "epoch": 0.4531315161583636, "grad_norm": NaN, "learning_rate": 0.0004759955296856375, "loss": 0.0, "step": 33140 }, { "epoch": 0.4532682486617306, "grad_norm": NaN, "learning_rate": 0.00047597001388817017, "loss": 0.0, "step": 33150 }, { "epoch": 0.45340498116509764, "grad_norm": NaN, "learning_rate": 0.00047594448522140023, "loss": 0.0, "step": 33160 }, { "epoch": 0.4535417136684647, "grad_norm": NaN, "learning_rate": 0.0004759189436867816, "loss": 0.0, "step": 33170 }, { "epoch": 0.45367844617183173, "grad_norm": NaN, "learning_rate": 0.0004758933892857687, "loss": 0.0, "step": 33180 }, { "epoch": 0.45381517867519877, "grad_norm": NaN, "learning_rate": 0.00047586782201981717, "loss": 0.0, "step": 33190 }, { "epoch": 0.4539519111785658, "grad_norm": NaN, "learning_rate": 0.0004758422418903828, "loss": 0.0, "step": 33200 }, { "epoch": 0.45408864368193286, "grad_norm": NaN, "learning_rate": 0.0004758166488989227, "loss": 0.0, "step": 33210 }, { "epoch": 0.4542253761852999, "grad_norm": NaN, "learning_rate": 0.0004757910430468944, "loss": 0.0, "step": 33220 }, { "epoch": 0.45436210868866694, "grad_norm": NaN, "learning_rate": 0.000475765424335756, "loss": 0.0, "step": 33230 }, { "epoch": 0.454498841192034, "grad_norm": NaN, "learning_rate": 0.00047573979276696653, "loss": 0.0, "step": 33240 }, { "epoch": 0.45463557369540103, "grad_norm": NaN, "learning_rate": 0.000475714148341986, "loss": 0.0, "step": 33250 }, { "epoch": 0.454772306198768, "grad_norm": NaN, "learning_rate": 0.00047568849106227465, "loss": 0.0, "step": 33260 }, { "epoch": 0.45490903870213506, "grad_norm": NaN, "learning_rate": 0.0004756628209292938, "loss": 0.0, "step": 33270 }, { "epoch": 0.4550457712055021, "grad_norm": NaN, "learning_rate": 0.0004756371379445053, "loss": 0.0, "step": 33280 }, { "epoch": 0.45518250370886915, "grad_norm": NaN, "learning_rate": 0.000475611442109372, "loss": 0.0, "step": 33290 }, { "epoch": 0.4553192362122362, "grad_norm": NaN, "learning_rate": 0.00047558573342535717, "loss": 0.0, "step": 33300 }, { "epoch": 0.45545596871560323, "grad_norm": NaN, "learning_rate": 0.00047556001189392495, "loss": 0.0, "step": 33310 }, { "epoch": 0.4555927012189703, "grad_norm": NaN, "learning_rate": 0.00047553427751654035, "loss": 0.0, "step": 33320 }, { "epoch": 0.4557294337223373, "grad_norm": NaN, "learning_rate": 0.0004755085302946689, "loss": 0.0, "step": 33330 }, { "epoch": 0.45586616622570436, "grad_norm": NaN, "learning_rate": 0.0004754827702297769, "loss": 0.0, "step": 33340 }, { "epoch": 0.4560028987290714, "grad_norm": NaN, "learning_rate": 0.0004754569973233314, "loss": 0.0, "step": 33350 }, { "epoch": 0.4561396312324384, "grad_norm": NaN, "learning_rate": 0.00047543121157680036, "loss": 0.0, "step": 33360 }, { "epoch": 0.45627636373580543, "grad_norm": NaN, "learning_rate": 0.0004754054129916522, "loss": 0.0, "step": 33370 }, { "epoch": 0.4564130962391725, "grad_norm": NaN, "learning_rate": 0.0004753796015693562, "loss": 0.0, "step": 33380 }, { "epoch": 0.4565498287425395, "grad_norm": NaN, "learning_rate": 0.0004753537773113823, "loss": 0.0, "step": 33390 }, { "epoch": 0.45668656124590656, "grad_norm": NaN, "learning_rate": 0.0004753279402192013, "loss": 0.0, "step": 33400 }, { "epoch": 0.4568232937492736, "grad_norm": NaN, "learning_rate": 0.0004753020902942847, "loss": 0.0, "step": 33410 }, { "epoch": 0.45696002625264065, "grad_norm": NaN, "learning_rate": 0.0004752762275381045, "loss": 0.0, "step": 33420 }, { "epoch": 0.4570967587560077, "grad_norm": NaN, "learning_rate": 0.00047525035195213386, "loss": 0.0, "step": 33430 }, { "epoch": 0.45723349125937474, "grad_norm": NaN, "learning_rate": 0.0004752244635378463, "loss": 0.0, "step": 33440 }, { "epoch": 0.4573702237627418, "grad_norm": NaN, "learning_rate": 0.0004751985622967162, "loss": 0.0, "step": 33450 }, { "epoch": 0.4575069562661088, "grad_norm": NaN, "learning_rate": 0.00047517264823021874, "loss": 0.0, "step": 33460 }, { "epoch": 0.4576436887694758, "grad_norm": NaN, "learning_rate": 0.00047514672133982963, "loss": 0.0, "step": 33470 }, { "epoch": 0.45778042127284285, "grad_norm": NaN, "learning_rate": 0.0004751207816270256, "loss": 0.0, "step": 33480 }, { "epoch": 0.4579171537762099, "grad_norm": NaN, "learning_rate": 0.00047509482909328384, "loss": 0.0, "step": 33490 }, { "epoch": 0.45805388627957694, "grad_norm": NaN, "learning_rate": 0.0004750688637400824, "loss": 0.0, "step": 33500 }, { "epoch": 0.458190618782944, "grad_norm": NaN, "learning_rate": 0.00047504288556890007, "loss": 0.0, "step": 33510 }, { "epoch": 0.458327351286311, "grad_norm": NaN, "learning_rate": 0.0004750168945812163, "loss": 0.0, "step": 33520 }, { "epoch": 0.45846408378967807, "grad_norm": NaN, "learning_rate": 0.00047499089077851145, "loss": 0.0, "step": 33530 }, { "epoch": 0.4586008162930451, "grad_norm": NaN, "learning_rate": 0.0004749648741622663, "loss": 0.0, "step": 33540 }, { "epoch": 0.45873754879641215, "grad_norm": NaN, "learning_rate": 0.00047493884473396263, "loss": 0.0, "step": 33550 }, { "epoch": 0.4588742812997792, "grad_norm": NaN, "learning_rate": 0.0004749128024950828, "loss": 0.0, "step": 33560 }, { "epoch": 0.45901101380314624, "grad_norm": NaN, "learning_rate": 0.00047488674744710995, "loss": 0.0, "step": 33570 }, { "epoch": 0.4591477463065132, "grad_norm": NaN, "learning_rate": 0.00047486067959152803, "loss": 0.0, "step": 33580 }, { "epoch": 0.45928447880988027, "grad_norm": NaN, "learning_rate": 0.0004748345989298215, "loss": 0.0, "step": 33590 }, { "epoch": 0.4594212113132473, "grad_norm": NaN, "learning_rate": 0.0004748085054634759, "loss": 0.0, "step": 33600 }, { "epoch": 0.45955794381661436, "grad_norm": NaN, "learning_rate": 0.000474782399193977, "loss": 0.0, "step": 33610 }, { "epoch": 0.4596946763199814, "grad_norm": NaN, "learning_rate": 0.0004747562801228119, "loss": 0.0, "step": 33620 }, { "epoch": 0.45983140882334844, "grad_norm": NaN, "learning_rate": 0.0004747301482514679, "loss": 0.0, "step": 33630 }, { "epoch": 0.4599681413267155, "grad_norm": NaN, "learning_rate": 0.0004747040035814333, "loss": 0.0, "step": 33640 }, { "epoch": 0.46010487383008253, "grad_norm": NaN, "learning_rate": 0.0004746778461141971, "loss": 0.0, "step": 33650 }, { "epoch": 0.46024160633344957, "grad_norm": NaN, "learning_rate": 0.000474651675851249, "loss": 0.0, "step": 33660 }, { "epoch": 0.4603783388368166, "grad_norm": NaN, "learning_rate": 0.00047462549279407947, "loss": 0.0, "step": 33670 }, { "epoch": 0.46051507134018366, "grad_norm": NaN, "learning_rate": 0.00047459929694417956, "loss": 0.0, "step": 33680 }, { "epoch": 0.46065180384355064, "grad_norm": NaN, "learning_rate": 0.00047457308830304125, "loss": 0.0, "step": 33690 }, { "epoch": 0.4607885363469177, "grad_norm": NaN, "learning_rate": 0.00047454686687215707, "loss": 0.0, "step": 33700 }, { "epoch": 0.46092526885028473, "grad_norm": NaN, "learning_rate": 0.00047452063265302055, "loss": 0.0, "step": 33710 }, { "epoch": 0.4610620013536518, "grad_norm": NaN, "learning_rate": 0.00047449438564712556, "loss": 0.0, "step": 33720 }, { "epoch": 0.4611987338570188, "grad_norm": NaN, "learning_rate": 0.000474468125855967, "loss": 0.0, "step": 33730 }, { "epoch": 0.46133546636038586, "grad_norm": NaN, "learning_rate": 0.00047444185328104037, "loss": 0.0, "step": 33740 }, { "epoch": 0.4614721988637529, "grad_norm": NaN, "learning_rate": 0.000474415567923842, "loss": 0.0, "step": 33750 }, { "epoch": 0.46160893136711995, "grad_norm": NaN, "learning_rate": 0.0004743892697858688, "loss": 0.0, "step": 33760 }, { "epoch": 0.461745663870487, "grad_norm": NaN, "learning_rate": 0.0004743629588686186, "loss": 0.0, "step": 33770 }, { "epoch": 0.46188239637385403, "grad_norm": NaN, "learning_rate": 0.0004743366351735896, "loss": 0.0, "step": 33780 }, { "epoch": 0.4620191288772211, "grad_norm": NaN, "learning_rate": 0.0004743102987022812, "loss": 0.0, "step": 33790 }, { "epoch": 0.46215586138058806, "grad_norm": NaN, "learning_rate": 0.00047428394945619323, "loss": 0.0, "step": 33800 }, { "epoch": 0.4622925938839551, "grad_norm": NaN, "learning_rate": 0.00047425758743682624, "loss": 0.0, "step": 33810 }, { "epoch": 0.46242932638732215, "grad_norm": NaN, "learning_rate": 0.0004742312126456817, "loss": 0.0, "step": 33820 }, { "epoch": 0.4625660588906892, "grad_norm": NaN, "learning_rate": 0.00047420482508426164, "loss": 0.0, "step": 33830 }, { "epoch": 0.46270279139405623, "grad_norm": NaN, "learning_rate": 0.0004741784247540689, "loss": 0.0, "step": 33840 }, { "epoch": 0.4628395238974233, "grad_norm": NaN, "learning_rate": 0.0004741520116566069, "loss": 0.0, "step": 33850 }, { "epoch": 0.4629762564007903, "grad_norm": NaN, "learning_rate": 0.00047412558579338005, "loss": 0.0, "step": 33860 }, { "epoch": 0.46311298890415736, "grad_norm": NaN, "learning_rate": 0.0004740991471658933, "loss": 0.0, "step": 33870 }, { "epoch": 0.4632497214075244, "grad_norm": NaN, "learning_rate": 0.00047407269577565234, "loss": 0.0, "step": 33880 }, { "epoch": 0.46338645391089145, "grad_norm": NaN, "learning_rate": 0.00047404623162416353, "loss": 0.0, "step": 33890 }, { "epoch": 0.4635231864142585, "grad_norm": NaN, "learning_rate": 0.0004740197547129342, "loss": 0.0, "step": 33900 }, { "epoch": 0.4636599189176255, "grad_norm": NaN, "learning_rate": 0.00047399326504347217, "loss": 0.0, "step": 33910 }, { "epoch": 0.4637966514209925, "grad_norm": NaN, "learning_rate": 0.000473966762617286, "loss": 0.0, "step": 33920 }, { "epoch": 0.46393338392435957, "grad_norm": NaN, "learning_rate": 0.00047394024743588516, "loss": 0.0, "step": 33930 }, { "epoch": 0.4640701164277266, "grad_norm": NaN, "learning_rate": 0.00047391371950077966, "loss": 0.0, "step": 33940 }, { "epoch": 0.46420684893109365, "grad_norm": NaN, "learning_rate": 0.00047388717881348034, "loss": 0.0, "step": 33950 }, { "epoch": 0.4643435814344607, "grad_norm": NaN, "learning_rate": 0.00047386062537549867, "loss": 0.0, "step": 33960 }, { "epoch": 0.46448031393782774, "grad_norm": NaN, "learning_rate": 0.0004738340591883469, "loss": 0.0, "step": 33970 }, { "epoch": 0.4646170464411948, "grad_norm": NaN, "learning_rate": 0.000473807480253538, "loss": 0.0, "step": 33980 }, { "epoch": 0.4647537789445618, "grad_norm": NaN, "learning_rate": 0.0004737808885725859, "loss": 0.0, "step": 33990 }, { "epoch": 0.46489051144792887, "grad_norm": NaN, "learning_rate": 0.0004737542841470047, "loss": 0.0, "step": 34000 }, { "epoch": 0.4650272439512959, "grad_norm": NaN, "learning_rate": 0.00047372766697830984, "loss": 0.0, "step": 34010 }, { "epoch": 0.4651639764546629, "grad_norm": NaN, "learning_rate": 0.000473701037068017, "loss": 0.0, "step": 34020 }, { "epoch": 0.46530070895802994, "grad_norm": NaN, "learning_rate": 0.00047367439441764285, "loss": 0.0, "step": 34030 }, { "epoch": 0.465437441461397, "grad_norm": NaN, "learning_rate": 0.00047364773902870477, "loss": 0.0, "step": 34040 }, { "epoch": 0.465574173964764, "grad_norm": NaN, "learning_rate": 0.00047362107090272084, "loss": 0.0, "step": 34050 }, { "epoch": 0.46571090646813107, "grad_norm": NaN, "learning_rate": 0.00047359439004120976, "loss": 0.0, "step": 34060 }, { "epoch": 0.4658476389714981, "grad_norm": NaN, "learning_rate": 0.0004735676964456911, "loss": 0.0, "step": 34070 }, { "epoch": 0.46598437147486516, "grad_norm": NaN, "learning_rate": 0.00047354099011768505, "loss": 0.0, "step": 34080 }, { "epoch": 0.4661211039782322, "grad_norm": NaN, "learning_rate": 0.0004735142710587126, "loss": 0.0, "step": 34090 }, { "epoch": 0.46625783648159924, "grad_norm": NaN, "learning_rate": 0.00047348753927029544, "loss": 0.0, "step": 34100 }, { "epoch": 0.4663945689849663, "grad_norm": NaN, "learning_rate": 0.000473460794753956, "loss": 0.0, "step": 34110 }, { "epoch": 0.46653130148833327, "grad_norm": NaN, "learning_rate": 0.0004734340375112174, "loss": 0.0, "step": 34120 }, { "epoch": 0.4666680339917003, "grad_norm": NaN, "learning_rate": 0.00047340726754360355, "loss": 0.0, "step": 34130 }, { "epoch": 0.46680476649506736, "grad_norm": NaN, "learning_rate": 0.00047338048485263886, "loss": 0.0, "step": 34140 }, { "epoch": 0.4669414989984344, "grad_norm": NaN, "learning_rate": 0.00047335368943984883, "loss": 0.0, "step": 34150 }, { "epoch": 0.46707823150180144, "grad_norm": NaN, "learning_rate": 0.0004733268813067595, "loss": 0.0, "step": 34160 }, { "epoch": 0.4672149640051685, "grad_norm": NaN, "learning_rate": 0.0004733000604548975, "loss": 0.0, "step": 34170 }, { "epoch": 0.46735169650853553, "grad_norm": NaN, "learning_rate": 0.00047327322688579034, "loss": 0.0, "step": 34180 }, { "epoch": 0.4674884290119026, "grad_norm": NaN, "learning_rate": 0.00047324638060096635, "loss": 0.0, "step": 34190 }, { "epoch": 0.4676251615152696, "grad_norm": NaN, "learning_rate": 0.0004732195216019544, "loss": 0.0, "step": 34200 }, { "epoch": 0.46776189401863666, "grad_norm": NaN, "learning_rate": 0.000473192649890284, "loss": 0.0, "step": 34210 }, { "epoch": 0.4678986265220037, "grad_norm": NaN, "learning_rate": 0.0004731657654674857, "loss": 0.0, "step": 34220 }, { "epoch": 0.4680353590253707, "grad_norm": NaN, "learning_rate": 0.0004731388683350906, "loss": 0.0, "step": 34230 }, { "epoch": 0.46817209152873773, "grad_norm": NaN, "learning_rate": 0.00047311195849463047, "loss": 0.0, "step": 34240 }, { "epoch": 0.4683088240321048, "grad_norm": NaN, "learning_rate": 0.00047308503594763787, "loss": 0.0, "step": 34250 }, { "epoch": 0.4684455565354718, "grad_norm": NaN, "learning_rate": 0.00047305810069564615, "loss": 0.0, "step": 34260 }, { "epoch": 0.46858228903883886, "grad_norm": NaN, "learning_rate": 0.00047303115274018915, "loss": 0.0, "step": 34270 }, { "epoch": 0.4687190215422059, "grad_norm": NaN, "learning_rate": 0.00047300419208280177, "loss": 0.0, "step": 34280 }, { "epoch": 0.46885575404557295, "grad_norm": NaN, "learning_rate": 0.0004729772187250193, "loss": 0.0, "step": 34290 }, { "epoch": 0.46899248654894, "grad_norm": NaN, "learning_rate": 0.0004729502326683781, "loss": 0.0, "step": 34300 }, { "epoch": 0.46912921905230703, "grad_norm": NaN, "learning_rate": 0.00047292323391441484, "loss": 0.0, "step": 34310 }, { "epoch": 0.4692659515556741, "grad_norm": NaN, "learning_rate": 0.00047289622246466734, "loss": 0.0, "step": 34320 }, { "epoch": 0.4694026840590411, "grad_norm": NaN, "learning_rate": 0.00047286919832067374, "loss": 0.0, "step": 34330 }, { "epoch": 0.4695394165624081, "grad_norm": NaN, "learning_rate": 0.0004728421614839733, "loss": 0.0, "step": 34340 }, { "epoch": 0.46967614906577515, "grad_norm": NaN, "learning_rate": 0.0004728151119561057, "loss": 0.0, "step": 34350 }, { "epoch": 0.4698128815691422, "grad_norm": NaN, "learning_rate": 0.0004727880497386114, "loss": 0.0, "step": 34360 }, { "epoch": 0.46994961407250924, "grad_norm": NaN, "learning_rate": 0.00047276097483303175, "loss": 0.0, "step": 34370 }, { "epoch": 0.4700863465758763, "grad_norm": NaN, "learning_rate": 0.0004727338872409086, "loss": 0.0, "step": 34380 }, { "epoch": 0.4702230790792433, "grad_norm": NaN, "learning_rate": 0.0004727067869637847, "loss": 0.0, "step": 34390 }, { "epoch": 0.47035981158261037, "grad_norm": NaN, "learning_rate": 0.00047267967400320345, "loss": 0.0, "step": 34400 }, { "epoch": 0.4704965440859774, "grad_norm": NaN, "learning_rate": 0.00047265254836070895, "loss": 0.0, "step": 34410 }, { "epoch": 0.47063327658934445, "grad_norm": NaN, "learning_rate": 0.0004726254100378459, "loss": 0.0, "step": 34420 }, { "epoch": 0.4707700090927115, "grad_norm": NaN, "learning_rate": 0.0004725982590361601, "loss": 0.0, "step": 34430 }, { "epoch": 0.47090674159607854, "grad_norm": NaN, "learning_rate": 0.0004725710953571977, "loss": 0.0, "step": 34440 }, { "epoch": 0.4710434740994455, "grad_norm": NaN, "learning_rate": 0.0004725439190025058, "loss": 0.0, "step": 34450 }, { "epoch": 0.47118020660281257, "grad_norm": NaN, "learning_rate": 0.00047251672997363197, "loss": 0.0, "step": 34460 }, { "epoch": 0.4713169391061796, "grad_norm": NaN, "learning_rate": 0.0004724895282721248, "loss": 0.0, "step": 34470 }, { "epoch": 0.47145367160954665, "grad_norm": NaN, "learning_rate": 0.00047246231389953347, "loss": 0.0, "step": 34480 }, { "epoch": 0.4715904041129137, "grad_norm": NaN, "learning_rate": 0.0004724350868574078, "loss": 0.0, "step": 34490 }, { "epoch": 0.47172713661628074, "grad_norm": NaN, "learning_rate": 0.00047240784714729843, "loss": 0.0, "step": 34500 }, { "epoch": 0.4718638691196478, "grad_norm": NaN, "learning_rate": 0.0004723805947707568, "loss": 0.0, "step": 34510 }, { "epoch": 0.4720006016230148, "grad_norm": NaN, "learning_rate": 0.0004723533297293348, "loss": 0.0, "step": 34520 }, { "epoch": 0.47213733412638187, "grad_norm": NaN, "learning_rate": 0.0004723260520245853, "loss": 0.0, "step": 34530 }, { "epoch": 0.4722740666297489, "grad_norm": NaN, "learning_rate": 0.0004722987616580617, "loss": 0.0, "step": 34540 }, { "epoch": 0.47241079913311596, "grad_norm": NaN, "learning_rate": 0.0004722714586313185, "loss": 0.0, "step": 34550 }, { "epoch": 0.47254753163648294, "grad_norm": NaN, "learning_rate": 0.0004722441429459104, "loss": 0.0, "step": 34560 }, { "epoch": 0.47268426413985, "grad_norm": NaN, "learning_rate": 0.0004722168146033931, "loss": 0.0, "step": 34570 }, { "epoch": 0.47282099664321703, "grad_norm": NaN, "learning_rate": 0.00047218947360532307, "loss": 0.0, "step": 34580 }, { "epoch": 0.47295772914658407, "grad_norm": NaN, "learning_rate": 0.0004721621199532573, "loss": 0.0, "step": 34590 }, { "epoch": 0.4730944616499511, "grad_norm": NaN, "learning_rate": 0.0004721347536487537, "loss": 0.0, "step": 34600 }, { "epoch": 0.47323119415331816, "grad_norm": NaN, "learning_rate": 0.00047210737469337077, "loss": 0.0, "step": 34610 }, { "epoch": 0.4733679266566852, "grad_norm": NaN, "learning_rate": 0.00047207998308866785, "loss": 0.0, "step": 34620 }, { "epoch": 0.47350465916005224, "grad_norm": NaN, "learning_rate": 0.0004720525788362049, "loss": 0.0, "step": 34630 }, { "epoch": 0.4736413916634193, "grad_norm": NaN, "learning_rate": 0.00047202516193754263, "loss": 0.0, "step": 34640 }, { "epoch": 0.47377812416678633, "grad_norm": NaN, "learning_rate": 0.00047199773239424235, "loss": 0.0, "step": 34650 }, { "epoch": 0.4739148566701534, "grad_norm": NaN, "learning_rate": 0.0004719702902078664, "loss": 0.0, "step": 34660 }, { "epoch": 0.47405158917352036, "grad_norm": NaN, "learning_rate": 0.00047194283537997764, "loss": 0.0, "step": 34670 }, { "epoch": 0.4741883216768874, "grad_norm": NaN, "learning_rate": 0.0004719153679121395, "loss": 0.0, "step": 34680 }, { "epoch": 0.47432505418025445, "grad_norm": NaN, "learning_rate": 0.0004718878878059164, "loss": 0.0, "step": 34690 }, { "epoch": 0.4744617866836215, "grad_norm": NaN, "learning_rate": 0.00047186039506287336, "loss": 0.0, "step": 34700 }, { "epoch": 0.47459851918698853, "grad_norm": NaN, "learning_rate": 0.0004718328896845761, "loss": 0.0, "step": 34710 }, { "epoch": 0.4747352516903556, "grad_norm": NaN, "learning_rate": 0.0004718053716725911, "loss": 0.0, "step": 34720 }, { "epoch": 0.4748719841937226, "grad_norm": NaN, "learning_rate": 0.00047177784102848556, "loss": 0.0, "step": 34730 }, { "epoch": 0.47500871669708966, "grad_norm": NaN, "learning_rate": 0.0004717502977538274, "loss": 0.0, "step": 34740 }, { "epoch": 0.4751454492004567, "grad_norm": NaN, "learning_rate": 0.0004717227418501853, "loss": 0.0, "step": 34750 }, { "epoch": 0.47528218170382375, "grad_norm": NaN, "learning_rate": 0.0004716951733191284, "loss": 0.0, "step": 34760 }, { "epoch": 0.47541891420719073, "grad_norm": NaN, "learning_rate": 0.00047166759216222696, "loss": 0.0, "step": 34770 }, { "epoch": 0.4755556467105578, "grad_norm": NaN, "learning_rate": 0.00047163999838105176, "loss": 0.0, "step": 34780 }, { "epoch": 0.4756923792139248, "grad_norm": NaN, "learning_rate": 0.0004716123919771742, "loss": 0.0, "step": 34790 }, { "epoch": 0.47582911171729186, "grad_norm": NaN, "learning_rate": 0.00047158477295216656, "loss": 0.0, "step": 34800 }, { "epoch": 0.4759658442206589, "grad_norm": NaN, "learning_rate": 0.00047155714130760174, "loss": 0.0, "step": 34810 }, { "epoch": 0.47610257672402595, "grad_norm": NaN, "learning_rate": 0.00047152949704505354, "loss": 0.0, "step": 34820 }, { "epoch": 0.476239309227393, "grad_norm": NaN, "learning_rate": 0.00047150184016609613, "loss": 0.0, "step": 34830 }, { "epoch": 0.47637604173076004, "grad_norm": NaN, "learning_rate": 0.0004714741706723048, "loss": 0.0, "step": 34840 }, { "epoch": 0.4765127742341271, "grad_norm": NaN, "learning_rate": 0.00047144648856525524, "loss": 0.0, "step": 34850 }, { "epoch": 0.4766495067374941, "grad_norm": NaN, "learning_rate": 0.000471418793846524, "loss": 0.0, "step": 34860 }, { "epoch": 0.47678623924086116, "grad_norm": NaN, "learning_rate": 0.00047139108651768835, "loss": 0.0, "step": 34870 }, { "epoch": 0.47692297174422815, "grad_norm": NaN, "learning_rate": 0.0004713633665803263, "loss": 0.0, "step": 34880 }, { "epoch": 0.4770597042475952, "grad_norm": NaN, "learning_rate": 0.00047133563403601653, "loss": 0.0, "step": 34890 }, { "epoch": 0.47719643675096224, "grad_norm": NaN, "learning_rate": 0.00047130788888633836, "loss": 0.0, "step": 34900 }, { "epoch": 0.4773331692543293, "grad_norm": NaN, "learning_rate": 0.000471280131132872, "loss": 0.0, "step": 34910 }, { "epoch": 0.4774699017576963, "grad_norm": NaN, "learning_rate": 0.00047125236077719823, "loss": 0.0, "step": 34920 }, { "epoch": 0.47760663426106337, "grad_norm": NaN, "learning_rate": 0.00047122457782089867, "loss": 0.0, "step": 34930 }, { "epoch": 0.4777433667644304, "grad_norm": NaN, "learning_rate": 0.0004711967822655556, "loss": 0.0, "step": 34940 }, { "epoch": 0.47788009926779745, "grad_norm": NaN, "learning_rate": 0.00047116897411275194, "loss": 0.0, "step": 34950 }, { "epoch": 0.4780168317711645, "grad_norm": NaN, "learning_rate": 0.0004711411533640715, "loss": 0.0, "step": 34960 }, { "epoch": 0.47815356427453154, "grad_norm": NaN, "learning_rate": 0.00047111332002109865, "loss": 0.0, "step": 34970 }, { "epoch": 0.4782902967778986, "grad_norm": NaN, "learning_rate": 0.0004710854740854186, "loss": 0.0, "step": 34980 }, { "epoch": 0.47842702928126557, "grad_norm": NaN, "learning_rate": 0.00047105761555861703, "loss": 0.0, "step": 34990 }, { "epoch": 0.4785637617846326, "grad_norm": NaN, "learning_rate": 0.00047102974444228075, "loss": 0.0, "step": 35000 }, { "epoch": 0.47870049428799966, "grad_norm": NaN, "learning_rate": 0.00047100186073799696, "loss": 0.0, "step": 35010 }, { "epoch": 0.4788372267913667, "grad_norm": NaN, "learning_rate": 0.00047097396444735365, "loss": 0.0, "step": 35020 }, { "epoch": 0.47897395929473374, "grad_norm": NaN, "learning_rate": 0.0004709460555719396, "loss": 0.0, "step": 35030 }, { "epoch": 0.4791106917981008, "grad_norm": NaN, "learning_rate": 0.0004709181341133442, "loss": 0.0, "step": 35040 }, { "epoch": 0.47924742430146783, "grad_norm": NaN, "learning_rate": 0.0004708902000731577, "loss": 0.0, "step": 35050 }, { "epoch": 0.47938415680483487, "grad_norm": NaN, "learning_rate": 0.00047086225345297096, "loss": 0.0, "step": 35060 }, { "epoch": 0.4795208893082019, "grad_norm": NaN, "learning_rate": 0.0004708342942543755, "loss": 0.0, "step": 35070 }, { "epoch": 0.47965762181156896, "grad_norm": NaN, "learning_rate": 0.0004708063224789637, "loss": 0.0, "step": 35080 }, { "epoch": 0.479794354314936, "grad_norm": NaN, "learning_rate": 0.00047077833812832854, "loss": 0.0, "step": 35090 }, { "epoch": 0.479931086818303, "grad_norm": NaN, "learning_rate": 0.0004707503412040639, "loss": 0.0, "step": 35100 }, { "epoch": 0.48006781932167003, "grad_norm": NaN, "learning_rate": 0.0004707223317077641, "loss": 0.0, "step": 35110 }, { "epoch": 0.4802045518250371, "grad_norm": NaN, "learning_rate": 0.0004706943096410243, "loss": 0.0, "step": 35120 }, { "epoch": 0.4803412843284041, "grad_norm": NaN, "learning_rate": 0.0004706662750054406, "loss": 0.0, "step": 35130 }, { "epoch": 0.48047801683177116, "grad_norm": NaN, "learning_rate": 0.00047063822780260935, "loss": 0.0, "step": 35140 }, { "epoch": 0.4806147493351382, "grad_norm": NaN, "learning_rate": 0.0004706101680341281, "loss": 0.0, "step": 35150 }, { "epoch": 0.48075148183850525, "grad_norm": NaN, "learning_rate": 0.0004705820957015947, "loss": 0.0, "step": 35160 }, { "epoch": 0.4808882143418723, "grad_norm": NaN, "learning_rate": 0.0004705540108066081, "loss": 0.0, "step": 35170 }, { "epoch": 0.48102494684523933, "grad_norm": NaN, "learning_rate": 0.00047052591335076764, "loss": 0.0, "step": 35180 }, { "epoch": 0.4811616793486064, "grad_norm": NaN, "learning_rate": 0.00047049780333567354, "loss": 0.0, "step": 35190 }, { "epoch": 0.4812984118519734, "grad_norm": NaN, "learning_rate": 0.0004704696807629267, "loss": 0.0, "step": 35200 }, { "epoch": 0.4814351443553404, "grad_norm": NaN, "learning_rate": 0.0004704415456341288, "loss": 0.0, "step": 35210 }, { "epoch": 0.48157187685870745, "grad_norm": NaN, "learning_rate": 0.0004704133979508821, "loss": 0.0, "step": 35220 }, { "epoch": 0.4817086093620745, "grad_norm": NaN, "learning_rate": 0.00047038523771478956, "loss": 0.0, "step": 35230 }, { "epoch": 0.48184534186544153, "grad_norm": NaN, "learning_rate": 0.0004703570649274552, "loss": 0.0, "step": 35240 }, { "epoch": 0.4819820743688086, "grad_norm": NaN, "learning_rate": 0.0004703288795904833, "loss": 0.0, "step": 35250 }, { "epoch": 0.4821188068721756, "grad_norm": NaN, "learning_rate": 0.0004703006817054791, "loss": 0.0, "step": 35260 }, { "epoch": 0.48225553937554266, "grad_norm": NaN, "learning_rate": 0.0004702724712740485, "loss": 0.0, "step": 35270 }, { "epoch": 0.4823922718789097, "grad_norm": NaN, "learning_rate": 0.0004702442482977982, "loss": 0.0, "step": 35280 }, { "epoch": 0.48252900438227675, "grad_norm": NaN, "learning_rate": 0.0004702160127783355, "loss": 0.0, "step": 35290 }, { "epoch": 0.4826657368856438, "grad_norm": NaN, "learning_rate": 0.0004701877647172683, "loss": 0.0, "step": 35300 }, { "epoch": 0.48280246938901084, "grad_norm": NaN, "learning_rate": 0.00047015950411620554, "loss": 0.0, "step": 35310 }, { "epoch": 0.4829392018923778, "grad_norm": NaN, "learning_rate": 0.0004701312309767567, "loss": 0.0, "step": 35320 }, { "epoch": 0.48307593439574487, "grad_norm": NaN, "learning_rate": 0.00047010294530053194, "loss": 0.0, "step": 35330 }, { "epoch": 0.4832126668991119, "grad_norm": NaN, "learning_rate": 0.00047007464708914216, "loss": 0.0, "step": 35340 }, { "epoch": 0.48334939940247895, "grad_norm": NaN, "learning_rate": 0.0004700463363441989, "loss": 0.0, "step": 35350 }, { "epoch": 0.483486131905846, "grad_norm": NaN, "learning_rate": 0.00047001801306731464, "loss": 0.0, "step": 35360 }, { "epoch": 0.48362286440921304, "grad_norm": NaN, "learning_rate": 0.00046998967726010236, "loss": 0.0, "step": 35370 }, { "epoch": 0.4837595969125801, "grad_norm": NaN, "learning_rate": 0.00046996132892417573, "loss": 0.0, "step": 35380 }, { "epoch": 0.4838963294159471, "grad_norm": NaN, "learning_rate": 0.00046993296806114947, "loss": 0.0, "step": 35390 }, { "epoch": 0.48403306191931417, "grad_norm": NaN, "learning_rate": 0.0004699045946726386, "loss": 0.0, "step": 35400 }, { "epoch": 0.4841697944226812, "grad_norm": NaN, "learning_rate": 0.00046987620876025896, "loss": 0.0, "step": 35410 }, { "epoch": 0.48430652692604825, "grad_norm": NaN, "learning_rate": 0.0004698478103256273, "loss": 0.0, "step": 35420 }, { "epoch": 0.48444325942941524, "grad_norm": NaN, "learning_rate": 0.0004698193993703609, "loss": 0.0, "step": 35430 }, { "epoch": 0.4845799919327823, "grad_norm": NaN, "learning_rate": 0.0004697909758960778, "loss": 0.0, "step": 35440 }, { "epoch": 0.4847167244361493, "grad_norm": NaN, "learning_rate": 0.0004697625399043968, "loss": 0.0, "step": 35450 }, { "epoch": 0.48485345693951637, "grad_norm": NaN, "learning_rate": 0.00046973409139693725, "loss": 0.0, "step": 35460 }, { "epoch": 0.4849901894428834, "grad_norm": NaN, "learning_rate": 0.0004697056303753195, "loss": 0.0, "step": 35470 }, { "epoch": 0.48512692194625046, "grad_norm": NaN, "learning_rate": 0.0004696771568411643, "loss": 0.0, "step": 35480 }, { "epoch": 0.4852636544496175, "grad_norm": NaN, "learning_rate": 0.0004696486707960933, "loss": 0.0, "step": 35490 }, { "epoch": 0.48540038695298454, "grad_norm": NaN, "learning_rate": 0.00046962017224172893, "loss": 0.0, "step": 35500 }, { "epoch": 0.4855371194563516, "grad_norm": NaN, "learning_rate": 0.00046959166117969405, "loss": 0.0, "step": 35510 }, { "epoch": 0.4856738519597186, "grad_norm": NaN, "learning_rate": 0.0004695631376116125, "loss": 0.0, "step": 35520 }, { "epoch": 0.4858105844630856, "grad_norm": NaN, "learning_rate": 0.00046953460153910864, "loss": 0.0, "step": 35530 }, { "epoch": 0.48594731696645266, "grad_norm": NaN, "learning_rate": 0.00046950605296380783, "loss": 0.0, "step": 35540 }, { "epoch": 0.4860840494698197, "grad_norm": NaN, "learning_rate": 0.0004694774918873358, "loss": 0.0, "step": 35550 }, { "epoch": 0.48622078197318674, "grad_norm": NaN, "learning_rate": 0.00046944891831131915, "loss": 0.0, "step": 35560 }, { "epoch": 0.4863575144765538, "grad_norm": NaN, "learning_rate": 0.00046942033223738524, "loss": 0.0, "step": 35570 }, { "epoch": 0.48649424697992083, "grad_norm": NaN, "learning_rate": 0.000469391733667162, "loss": 0.0, "step": 35580 }, { "epoch": 0.4866309794832879, "grad_norm": NaN, "learning_rate": 0.0004693631226022783, "loss": 0.0, "step": 35590 }, { "epoch": 0.4867677119866549, "grad_norm": NaN, "learning_rate": 0.0004693344990443634, "loss": 0.0, "step": 35600 }, { "epoch": 0.48690444449002196, "grad_norm": NaN, "learning_rate": 0.00046930586299504765, "loss": 0.0, "step": 35610 }, { "epoch": 0.487041176993389, "grad_norm": NaN, "learning_rate": 0.0004692772144559617, "loss": 0.0, "step": 35620 }, { "epoch": 0.48717790949675605, "grad_norm": NaN, "learning_rate": 0.00046924855342873737, "loss": 0.0, "step": 35630 }, { "epoch": 0.48731464200012303, "grad_norm": NaN, "learning_rate": 0.0004692198799150067, "loss": 0.0, "step": 35640 }, { "epoch": 0.4874513745034901, "grad_norm": NaN, "learning_rate": 0.00046919119391640283, "loss": 0.0, "step": 35650 }, { "epoch": 0.4875881070068571, "grad_norm": NaN, "learning_rate": 0.0004691624954345594, "loss": 0.0, "step": 35660 }, { "epoch": 0.48772483951022416, "grad_norm": NaN, "learning_rate": 0.0004691337844711109, "loss": 0.0, "step": 35670 }, { "epoch": 0.4878615720135912, "grad_norm": NaN, "learning_rate": 0.00046910506102769233, "loss": 0.0, "step": 35680 }, { "epoch": 0.48799830451695825, "grad_norm": NaN, "learning_rate": 0.00046907632510593975, "loss": 0.0, "step": 35690 }, { "epoch": 0.4881350370203253, "grad_norm": NaN, "learning_rate": 0.0004690475767074895, "loss": 0.0, "step": 35700 }, { "epoch": 0.48827176952369233, "grad_norm": NaN, "learning_rate": 0.0004690188158339789, "loss": 0.0, "step": 35710 }, { "epoch": 0.4884085020270594, "grad_norm": NaN, "learning_rate": 0.000468990042487046, "loss": 0.0, "step": 35720 }, { "epoch": 0.4885452345304264, "grad_norm": NaN, "learning_rate": 0.00046896125666832933, "loss": 0.0, "step": 35730 }, { "epoch": 0.48868196703379346, "grad_norm": NaN, "learning_rate": 0.00046893245837946847, "loss": 0.0, "step": 35740 }, { "epoch": 0.48881869953716045, "grad_norm": NaN, "learning_rate": 0.00046890364762210336, "loss": 0.0, "step": 35750 }, { "epoch": 0.4889554320405275, "grad_norm": NaN, "learning_rate": 0.0004688748243978749, "loss": 0.0, "step": 35760 }, { "epoch": 0.48909216454389454, "grad_norm": NaN, "learning_rate": 0.0004688459887084246, "loss": 0.0, "step": 35770 }, { "epoch": 0.4892288970472616, "grad_norm": NaN, "learning_rate": 0.0004688171405553947, "loss": 0.0, "step": 35780 }, { "epoch": 0.4893656295506286, "grad_norm": NaN, "learning_rate": 0.00046878827994042803, "loss": 0.0, "step": 35790 }, { "epoch": 0.48950236205399567, "grad_norm": NaN, "learning_rate": 0.00046875940686516836, "loss": 0.0, "step": 35800 }, { "epoch": 0.4896390945573627, "grad_norm": NaN, "learning_rate": 0.00046873052133126016, "loss": 0.0, "step": 35810 }, { "epoch": 0.48977582706072975, "grad_norm": NaN, "learning_rate": 0.0004687016233403483, "loss": 0.0, "step": 35820 }, { "epoch": 0.4899125595640968, "grad_norm": NaN, "learning_rate": 0.0004686727128940785, "loss": 0.0, "step": 35830 }, { "epoch": 0.49004929206746384, "grad_norm": NaN, "learning_rate": 0.0004686437899940975, "loss": 0.0, "step": 35840 }, { "epoch": 0.4901860245708309, "grad_norm": NaN, "learning_rate": 0.00046861485464205234, "loss": 0.0, "step": 35850 }, { "epoch": 0.49032275707419787, "grad_norm": NaN, "learning_rate": 0.000468585906839591, "loss": 0.0, "step": 35860 }, { "epoch": 0.4904594895775649, "grad_norm": NaN, "learning_rate": 0.000468556946588362, "loss": 0.0, "step": 35870 }, { "epoch": 0.49059622208093195, "grad_norm": NaN, "learning_rate": 0.00046852797389001474, "loss": 0.0, "step": 35880 }, { "epoch": 0.490732954584299, "grad_norm": NaN, "learning_rate": 0.00046849898874619925, "loss": 0.0, "step": 35890 }, { "epoch": 0.49086968708766604, "grad_norm": NaN, "learning_rate": 0.00046846999115856625, "loss": 0.0, "step": 35900 }, { "epoch": 0.4910064195910331, "grad_norm": NaN, "learning_rate": 0.0004684409811287672, "loss": 0.0, "step": 35910 }, { "epoch": 0.4911431520944001, "grad_norm": NaN, "learning_rate": 0.00046841195865845434, "loss": 0.0, "step": 35920 }, { "epoch": 0.49127988459776717, "grad_norm": NaN, "learning_rate": 0.0004683829237492804, "loss": 0.0, "step": 35930 }, { "epoch": 0.4914166171011342, "grad_norm": NaN, "learning_rate": 0.00046835387640289906, "loss": 0.0, "step": 35940 }, { "epoch": 0.49155334960450126, "grad_norm": NaN, "learning_rate": 0.0004683248166209646, "loss": 0.0, "step": 35950 }, { "epoch": 0.4916900821078683, "grad_norm": NaN, "learning_rate": 0.0004682957444051319, "loss": 0.0, "step": 35960 }, { "epoch": 0.4918268146112353, "grad_norm": NaN, "learning_rate": 0.00046826665975705677, "loss": 0.0, "step": 35970 }, { "epoch": 0.49196354711460233, "grad_norm": NaN, "learning_rate": 0.00046823756267839567, "loss": 0.0, "step": 35980 }, { "epoch": 0.49210027961796937, "grad_norm": NaN, "learning_rate": 0.00046820845317080564, "loss": 0.0, "step": 35990 }, { "epoch": 0.4922370121213364, "grad_norm": NaN, "learning_rate": 0.0004681793312359445, "loss": 0.0, "step": 36000 }, { "epoch": 0.49237374462470346, "grad_norm": NaN, "learning_rate": 0.0004681501968754707, "loss": 0.0, "step": 36010 }, { "epoch": 0.4925104771280705, "grad_norm": NaN, "learning_rate": 0.00046812105009104367, "loss": 0.0, "step": 36020 }, { "epoch": 0.49264720963143754, "grad_norm": NaN, "learning_rate": 0.00046809189088432325, "loss": 0.0, "step": 36030 }, { "epoch": 0.4927839421348046, "grad_norm": NaN, "learning_rate": 0.00046806271925697016, "loss": 0.0, "step": 36040 }, { "epoch": 0.49292067463817163, "grad_norm": NaN, "learning_rate": 0.0004680335352106457, "loss": 0.0, "step": 36050 }, { "epoch": 0.4930574071415387, "grad_norm": NaN, "learning_rate": 0.000468004338747012, "loss": 0.0, "step": 36060 }, { "epoch": 0.4931941396449057, "grad_norm": NaN, "learning_rate": 0.0004679751298677317, "loss": 0.0, "step": 36070 }, { "epoch": 0.4933308721482727, "grad_norm": NaN, "learning_rate": 0.0004679459085744685, "loss": 0.0, "step": 36080 }, { "epoch": 0.49346760465163975, "grad_norm": NaN, "learning_rate": 0.00046791667486888633, "loss": 0.0, "step": 36090 }, { "epoch": 0.4936043371550068, "grad_norm": NaN, "learning_rate": 0.00046788742875265035, "loss": 0.0, "step": 36100 }, { "epoch": 0.49374106965837383, "grad_norm": NaN, "learning_rate": 0.00046785817022742605, "loss": 0.0, "step": 36110 }, { "epoch": 0.4938778021617409, "grad_norm": NaN, "learning_rate": 0.0004678288992948797, "loss": 0.0, "step": 36120 }, { "epoch": 0.4940145346651079, "grad_norm": NaN, "learning_rate": 0.00046779961595667843, "loss": 0.0, "step": 36130 }, { "epoch": 0.49415126716847496, "grad_norm": NaN, "learning_rate": 0.0004677703202144899, "loss": 0.0, "step": 36140 }, { "epoch": 0.494287999671842, "grad_norm": NaN, "learning_rate": 0.0004677410120699825, "loss": 0.0, "step": 36150 }, { "epoch": 0.49442473217520905, "grad_norm": NaN, "learning_rate": 0.0004677116915248255, "loss": 0.0, "step": 36160 }, { "epoch": 0.4945614646785761, "grad_norm": NaN, "learning_rate": 0.00046768235858068866, "loss": 0.0, "step": 36170 }, { "epoch": 0.4946981971819431, "grad_norm": NaN, "learning_rate": 0.00046765301323924244, "loss": 0.0, "step": 36180 }, { "epoch": 0.4948349296853101, "grad_norm": NaN, "learning_rate": 0.0004676236555021582, "loss": 0.0, "step": 36190 }, { "epoch": 0.49497166218867716, "grad_norm": NaN, "learning_rate": 0.000467594285371108, "loss": 0.0, "step": 36200 }, { "epoch": 0.4951083946920442, "grad_norm": NaN, "learning_rate": 0.00046756490284776433, "loss": 0.0, "step": 36210 }, { "epoch": 0.49524512719541125, "grad_norm": NaN, "learning_rate": 0.00046753550793380064, "loss": 0.0, "step": 36220 }, { "epoch": 0.4953818596987783, "grad_norm": NaN, "learning_rate": 0.00046750610063089106, "loss": 0.0, "step": 36230 }, { "epoch": 0.49551859220214534, "grad_norm": NaN, "learning_rate": 0.00046747668094071027, "loss": 0.0, "step": 36240 }, { "epoch": 0.4956553247055124, "grad_norm": NaN, "learning_rate": 0.0004674472488649338, "loss": 0.0, "step": 36250 }, { "epoch": 0.4957920572088794, "grad_norm": NaN, "learning_rate": 0.000467417804405238, "loss": 0.0, "step": 36260 }, { "epoch": 0.49592878971224647, "grad_norm": NaN, "learning_rate": 0.0004673883475632995, "loss": 0.0, "step": 36270 }, { "epoch": 0.4960655222156135, "grad_norm": NaN, "learning_rate": 0.00046735887834079606, "loss": 0.0, "step": 36280 }, { "epoch": 0.4962022547189805, "grad_norm": NaN, "learning_rate": 0.000467329396739406, "loss": 0.0, "step": 36290 }, { "epoch": 0.49633898722234754, "grad_norm": NaN, "learning_rate": 0.00046729990276080834, "loss": 0.0, "step": 36300 }, { "epoch": 0.4964757197257146, "grad_norm": NaN, "learning_rate": 0.0004672703964066828, "loss": 0.0, "step": 36310 }, { "epoch": 0.4966124522290816, "grad_norm": NaN, "learning_rate": 0.0004672408776787097, "loss": 0.0, "step": 36320 }, { "epoch": 0.49674918473244867, "grad_norm": NaN, "learning_rate": 0.00046721134657857035, "loss": 0.0, "step": 36330 }, { "epoch": 0.4968859172358157, "grad_norm": NaN, "learning_rate": 0.0004671818031079464, "loss": 0.0, "step": 36340 }, { "epoch": 0.49702264973918275, "grad_norm": NaN, "learning_rate": 0.0004671522472685206, "loss": 0.0, "step": 36350 }, { "epoch": 0.4971593822425498, "grad_norm": NaN, "learning_rate": 0.00046712267906197597, "loss": 0.0, "step": 36360 }, { "epoch": 0.49729611474591684, "grad_norm": NaN, "learning_rate": 0.0004670930984899966, "loss": 0.0, "step": 36370 }, { "epoch": 0.4974328472492839, "grad_norm": NaN, "learning_rate": 0.0004670635055542671, "loss": 0.0, "step": 36380 }, { "epoch": 0.4975695797526509, "grad_norm": NaN, "learning_rate": 0.000467033900256473, "loss": 0.0, "step": 36390 }, { "epoch": 0.4977063122560179, "grad_norm": NaN, "learning_rate": 0.00046700428259830004, "loss": 0.0, "step": 36400 }, { "epoch": 0.49784304475938496, "grad_norm": NaN, "learning_rate": 0.00046697465258143517, "loss": 0.0, "step": 36410 }, { "epoch": 0.497979777262752, "grad_norm": NaN, "learning_rate": 0.0004669450102075659, "loss": 0.0, "step": 36420 }, { "epoch": 0.49811650976611904, "grad_norm": NaN, "learning_rate": 0.0004669153554783803, "loss": 0.0, "step": 36430 }, { "epoch": 0.4982532422694861, "grad_norm": NaN, "learning_rate": 0.00046688568839556733, "loss": 0.0, "step": 36440 }, { "epoch": 0.49838997477285313, "grad_norm": NaN, "learning_rate": 0.0004668560089608165, "loss": 0.0, "step": 36450 }, { "epoch": 0.49852670727622017, "grad_norm": NaN, "learning_rate": 0.00046682631717581815, "loss": 0.0, "step": 36460 }, { "epoch": 0.4986634397795872, "grad_norm": NaN, "learning_rate": 0.0004667966130422633, "loss": 0.0, "step": 36470 }, { "epoch": 0.49880017228295426, "grad_norm": NaN, "learning_rate": 0.00046676689656184345, "loss": 0.0, "step": 36480 }, { "epoch": 0.4989369047863213, "grad_norm": NaN, "learning_rate": 0.0004667371677362513, "loss": 0.0, "step": 36490 }, { "epoch": 0.49907363728968834, "grad_norm": NaN, "learning_rate": 0.0004667074265671797, "loss": 0.0, "step": 36500 }, { "epoch": 0.49921036979305533, "grad_norm": NaN, "learning_rate": 0.00046667767305632246, "loss": 0.0, "step": 36510 }, { "epoch": 0.4993471022964224, "grad_norm": NaN, "learning_rate": 0.0004666479072053743, "loss": 0.0, "step": 36520 }, { "epoch": 0.4994838347997894, "grad_norm": NaN, "learning_rate": 0.0004666181290160302, "loss": 0.0, "step": 36530 }, { "epoch": 0.49962056730315646, "grad_norm": NaN, "learning_rate": 0.0004665883384899862, "loss": 0.0, "step": 36540 }, { "epoch": 0.4997572998065235, "grad_norm": NaN, "learning_rate": 0.0004665585356289388, "loss": 0.0, "step": 36550 }, { "epoch": 0.49989403230989055, "grad_norm": NaN, "learning_rate": 0.0004665287204345854, "loss": 0.0, "step": 36560 }, { "epoch": 0.5000307648132576, "grad_norm": NaN, "learning_rate": 0.00046649889290862397, "loss": 0.0, "step": 36570 }, { "epoch": 0.5001674973166246, "grad_norm": NaN, "learning_rate": 0.0004664690530527532, "loss": 0.0, "step": 36580 }, { "epoch": 0.5003042298199917, "grad_norm": NaN, "learning_rate": 0.00046643920086867263, "loss": 0.0, "step": 36590 }, { "epoch": 0.5004409623233587, "grad_norm": NaN, "learning_rate": 0.0004664093363580824, "loss": 0.0, "step": 36600 }, { "epoch": 0.5005776948267258, "grad_norm": NaN, "learning_rate": 0.00046637945952268314, "loss": 0.0, "step": 36610 }, { "epoch": 0.5007144273300927, "grad_norm": NaN, "learning_rate": 0.0004663495703641765, "loss": 0.0, "step": 36620 }, { "epoch": 0.5008511598334598, "grad_norm": NaN, "learning_rate": 0.0004663196688842647, "loss": 0.0, "step": 36630 }, { "epoch": 0.5009878923368268, "grad_norm": NaN, "learning_rate": 0.00046628975508465067, "loss": 0.0, "step": 36640 }, { "epoch": 0.5011246248401939, "grad_norm": NaN, "learning_rate": 0.000466259828967038, "loss": 0.0, "step": 36650 }, { "epoch": 0.5012613573435609, "grad_norm": NaN, "learning_rate": 0.00046622989053313115, "loss": 0.0, "step": 36660 }, { "epoch": 0.5013980898469279, "grad_norm": NaN, "learning_rate": 0.0004661999397846349, "loss": 0.0, "step": 36670 }, { "epoch": 0.501534822350295, "grad_norm": NaN, "learning_rate": 0.00046616997672325534, "loss": 0.0, "step": 36680 }, { "epoch": 0.501671554853662, "grad_norm": NaN, "learning_rate": 0.0004661400013506986, "loss": 0.0, "step": 36690 }, { "epoch": 0.5018082873570291, "grad_norm": NaN, "learning_rate": 0.000466110013668672, "loss": 0.0, "step": 36700 }, { "epoch": 0.5019450198603961, "grad_norm": NaN, "learning_rate": 0.0004660800136788833, "loss": 0.0, "step": 36710 }, { "epoch": 0.5020817523637632, "grad_norm": NaN, "learning_rate": 0.00046605000138304106, "loss": 0.0, "step": 36720 }, { "epoch": 0.5022184848671302, "grad_norm": NaN, "learning_rate": 0.00046601997678285444, "loss": 0.0, "step": 36730 }, { "epoch": 0.5023552173704973, "grad_norm": NaN, "learning_rate": 0.0004659899398800336, "loss": 0.0, "step": 36740 }, { "epoch": 0.5024919498738643, "grad_norm": NaN, "learning_rate": 0.00046595989067628896, "loss": 0.0, "step": 36750 }, { "epoch": 0.5026286823772314, "grad_norm": NaN, "learning_rate": 0.00046592982917333196, "loss": 0.0, "step": 36760 }, { "epoch": 0.5027654148805983, "grad_norm": NaN, "learning_rate": 0.0004658997553728746, "loss": 0.0, "step": 36770 }, { "epoch": 0.5029021473839653, "grad_norm": NaN, "learning_rate": 0.00046586966927662966, "loss": 0.0, "step": 36780 }, { "epoch": 0.5030388798873324, "grad_norm": NaN, "learning_rate": 0.0004658395708863106, "loss": 0.0, "step": 36790 }, { "epoch": 0.5031756123906994, "grad_norm": NaN, "learning_rate": 0.0004658094602036316, "loss": 0.0, "step": 36800 }, { "epoch": 0.5033123448940665, "grad_norm": NaN, "learning_rate": 0.0004657793372303074, "loss": 0.0, "step": 36810 }, { "epoch": 0.5034490773974335, "grad_norm": NaN, "learning_rate": 0.0004657492019680536, "loss": 0.0, "step": 36820 }, { "epoch": 0.5035858099008006, "grad_norm": NaN, "learning_rate": 0.0004657190544185864, "loss": 0.0, "step": 36830 }, { "epoch": 0.5037225424041676, "grad_norm": NaN, "learning_rate": 0.00046568889458362285, "loss": 0.0, "step": 36840 }, { "epoch": 0.5038592749075347, "grad_norm": NaN, "learning_rate": 0.00046565872246488043, "loss": 0.0, "step": 36850 }, { "epoch": 0.5039960074109017, "grad_norm": NaN, "learning_rate": 0.00046562853806407766, "loss": 0.0, "step": 36860 }, { "epoch": 0.5041327399142688, "grad_norm": NaN, "learning_rate": 0.00046559834138293343, "loss": 0.0, "step": 36870 }, { "epoch": 0.5042694724176358, "grad_norm": NaN, "learning_rate": 0.00046556813242316767, "loss": 0.0, "step": 36880 }, { "epoch": 0.5044062049210027, "grad_norm": NaN, "learning_rate": 0.0004655379111865007, "loss": 0.0, "step": 36890 }, { "epoch": 0.5045429374243698, "grad_norm": NaN, "learning_rate": 0.0004655076776746536, "loss": 0.0, "step": 36900 }, { "epoch": 0.5046796699277368, "grad_norm": NaN, "learning_rate": 0.0004654774318893483, "loss": 0.0, "step": 36910 }, { "epoch": 0.5048164024311039, "grad_norm": NaN, "learning_rate": 0.0004654471738323073, "loss": 0.0, "step": 36920 }, { "epoch": 0.5049531349344709, "grad_norm": NaN, "learning_rate": 0.00046541690350525387, "loss": 0.0, "step": 36930 }, { "epoch": 0.505089867437838, "grad_norm": NaN, "learning_rate": 0.0004653866209099119, "loss": 0.0, "step": 36940 }, { "epoch": 0.505226599941205, "grad_norm": NaN, "learning_rate": 0.0004653563260480061, "loss": 0.0, "step": 36950 }, { "epoch": 0.5053633324445721, "grad_norm": NaN, "learning_rate": 0.00046532601892126176, "loss": 0.0, "step": 36960 }, { "epoch": 0.5055000649479391, "grad_norm": NaN, "learning_rate": 0.00046529569953140487, "loss": 0.0, "step": 36970 }, { "epoch": 0.5056367974513062, "grad_norm": NaN, "learning_rate": 0.0004652653678801623, "loss": 0.0, "step": 36980 }, { "epoch": 0.5057735299546732, "grad_norm": NaN, "learning_rate": 0.0004652350239692613, "loss": 0.0, "step": 36990 }, { "epoch": 0.5059102624580402, "grad_norm": NaN, "learning_rate": 0.00046520466780043007, "loss": 0.0, "step": 37000 }, { "epoch": 0.5060469949614073, "grad_norm": NaN, "learning_rate": 0.0004651742993753975, "loss": 0.0, "step": 37010 }, { "epoch": 0.5061837274647742, "grad_norm": NaN, "learning_rate": 0.00046514391869589307, "loss": 0.0, "step": 37020 }, { "epoch": 0.5063204599681413, "grad_norm": NaN, "learning_rate": 0.00046511352576364694, "loss": 0.0, "step": 37030 }, { "epoch": 0.5064571924715083, "grad_norm": NaN, "learning_rate": 0.00046508312058039016, "loss": 0.0, "step": 37040 }, { "epoch": 0.5065939249748754, "grad_norm": NaN, "learning_rate": 0.0004650527031478542, "loss": 0.0, "step": 37050 }, { "epoch": 0.5067306574782424, "grad_norm": NaN, "learning_rate": 0.0004650222734677715, "loss": 0.0, "step": 37060 }, { "epoch": 0.5068673899816095, "grad_norm": NaN, "learning_rate": 0.0004649918315418749, "loss": 0.0, "step": 37070 }, { "epoch": 0.5070041224849765, "grad_norm": NaN, "learning_rate": 0.00046496137737189833, "loss": 0.0, "step": 37080 }, { "epoch": 0.5071408549883436, "grad_norm": NaN, "learning_rate": 0.00046493091095957607, "loss": 0.0, "step": 37090 }, { "epoch": 0.5072775874917106, "grad_norm": NaN, "learning_rate": 0.00046490043230664325, "loss": 0.0, "step": 37100 }, { "epoch": 0.5074143199950776, "grad_norm": NaN, "learning_rate": 0.00046486994141483565, "loss": 0.0, "step": 37110 }, { "epoch": 0.5075510524984447, "grad_norm": NaN, "learning_rate": 0.00046483943828588977, "loss": 0.0, "step": 37120 }, { "epoch": 0.5076877850018117, "grad_norm": NaN, "learning_rate": 0.00046480892292154283, "loss": 0.0, "step": 37130 }, { "epoch": 0.5078245175051788, "grad_norm": NaN, "learning_rate": 0.0004647783953235327, "loss": 0.0, "step": 37140 }, { "epoch": 0.5079612500085458, "grad_norm": NaN, "learning_rate": 0.000464747855493598, "loss": 0.0, "step": 37150 }, { "epoch": 0.5080979825119128, "grad_norm": NaN, "learning_rate": 0.000464717303433478, "loss": 0.0, "step": 37160 }, { "epoch": 0.5082347150152798, "grad_norm": NaN, "learning_rate": 0.00046468673914491266, "loss": 0.0, "step": 37170 }, { "epoch": 0.5083714475186469, "grad_norm": NaN, "learning_rate": 0.0004646561626296427, "loss": 0.0, "step": 37180 }, { "epoch": 0.5085081800220139, "grad_norm": NaN, "learning_rate": 0.0004646255738894095, "loss": 0.0, "step": 37190 }, { "epoch": 0.508644912525381, "grad_norm": NaN, "learning_rate": 0.00046459497292595506, "loss": 0.0, "step": 37200 }, { "epoch": 0.508781645028748, "grad_norm": NaN, "learning_rate": 0.0004645643597410222, "loss": 0.0, "step": 37210 }, { "epoch": 0.508918377532115, "grad_norm": NaN, "learning_rate": 0.0004645337343363543, "loss": 0.0, "step": 37220 }, { "epoch": 0.5090551100354821, "grad_norm": NaN, "learning_rate": 0.00046450309671369565, "loss": 0.0, "step": 37230 }, { "epoch": 0.5091918425388491, "grad_norm": NaN, "learning_rate": 0.0004644724468747911, "loss": 0.0, "step": 37240 }, { "epoch": 0.5093285750422162, "grad_norm": NaN, "learning_rate": 0.00046444178482138606, "loss": 0.0, "step": 37250 }, { "epoch": 0.5094653075455832, "grad_norm": NaN, "learning_rate": 0.0004644111105552269, "loss": 0.0, "step": 37260 }, { "epoch": 0.5096020400489503, "grad_norm": NaN, "learning_rate": 0.00046438042407806045, "loss": 0.0, "step": 37270 }, { "epoch": 0.5097387725523173, "grad_norm": NaN, "learning_rate": 0.00046434972539163453, "loss": 0.0, "step": 37280 }, { "epoch": 0.5098755050556844, "grad_norm": NaN, "learning_rate": 0.0004643190144976973, "loss": 0.0, "step": 37290 }, { "epoch": 0.5100122375590513, "grad_norm": NaN, "learning_rate": 0.0004642882913979979, "loss": 0.0, "step": 37300 }, { "epoch": 0.5101489700624184, "grad_norm": NaN, "learning_rate": 0.00046425755609428595, "loss": 0.0, "step": 37310 }, { "epoch": 0.5102857025657854, "grad_norm": NaN, "learning_rate": 0.0004642268085883119, "loss": 0.0, "step": 37320 }, { "epoch": 0.5104224350691524, "grad_norm": NaN, "learning_rate": 0.0004641960488818269, "loss": 0.0, "step": 37330 }, { "epoch": 0.5105591675725195, "grad_norm": NaN, "learning_rate": 0.0004641652769765827, "loss": 0.0, "step": 37340 }, { "epoch": 0.5106959000758865, "grad_norm": NaN, "learning_rate": 0.00046413449287433195, "loss": 0.0, "step": 37350 }, { "epoch": 0.5108326325792536, "grad_norm": NaN, "learning_rate": 0.00046410369657682774, "loss": 0.0, "step": 37360 }, { "epoch": 0.5109693650826206, "grad_norm": NaN, "learning_rate": 0.0004640728880858239, "loss": 0.0, "step": 37370 }, { "epoch": 0.5111060975859877, "grad_norm": NaN, "learning_rate": 0.0004640420674030751, "loss": 0.0, "step": 37380 }, { "epoch": 0.5112428300893547, "grad_norm": NaN, "learning_rate": 0.00046401123453033657, "loss": 0.0, "step": 37390 }, { "epoch": 0.5113795625927218, "grad_norm": NaN, "learning_rate": 0.00046398038946936427, "loss": 0.0, "step": 37400 }, { "epoch": 0.5115162950960888, "grad_norm": NaN, "learning_rate": 0.00046394953222191494, "loss": 0.0, "step": 37410 }, { "epoch": 0.5116530275994559, "grad_norm": NaN, "learning_rate": 0.00046391866278974595, "loss": 0.0, "step": 37420 }, { "epoch": 0.5117897601028228, "grad_norm": NaN, "learning_rate": 0.00046388778117461535, "loss": 0.0, "step": 37430 }, { "epoch": 0.5119264926061898, "grad_norm": NaN, "learning_rate": 0.0004638568873782818, "loss": 0.0, "step": 37440 }, { "epoch": 0.5120632251095569, "grad_norm": NaN, "learning_rate": 0.00046382598140250474, "loss": 0.0, "step": 37450 }, { "epoch": 0.5121999576129239, "grad_norm": NaN, "learning_rate": 0.00046379506324904454, "loss": 0.0, "step": 37460 }, { "epoch": 0.512336690116291, "grad_norm": NaN, "learning_rate": 0.00046376413291966174, "loss": 0.0, "step": 37470 }, { "epoch": 0.512473422619658, "grad_norm": NaN, "learning_rate": 0.000463733190416118, "loss": 0.0, "step": 37480 }, { "epoch": 0.5126101551230251, "grad_norm": NaN, "learning_rate": 0.00046370223574017556, "loss": 0.0, "step": 37490 }, { "epoch": 0.5127468876263921, "grad_norm": NaN, "learning_rate": 0.00046367126889359725, "loss": 0.0, "step": 37500 }, { "epoch": 0.5128836201297592, "grad_norm": NaN, "learning_rate": 0.0004636402898781468, "loss": 0.0, "step": 37510 }, { "epoch": 0.5130203526331262, "grad_norm": NaN, "learning_rate": 0.0004636092986955883, "loss": 0.0, "step": 37520 }, { "epoch": 0.5131570851364933, "grad_norm": NaN, "learning_rate": 0.00046357829534768703, "loss": 0.0, "step": 37530 }, { "epoch": 0.5132938176398603, "grad_norm": NaN, "learning_rate": 0.0004635472798362084, "loss": 0.0, "step": 37540 }, { "epoch": 0.5134305501432272, "grad_norm": NaN, "learning_rate": 0.00046351625216291893, "loss": 0.0, "step": 37550 }, { "epoch": 0.5135672826465943, "grad_norm": NaN, "learning_rate": 0.00046348521232958563, "loss": 0.0, "step": 37560 }, { "epoch": 0.5137040151499613, "grad_norm": NaN, "learning_rate": 0.00046345416033797627, "loss": 0.0, "step": 37570 }, { "epoch": 0.5138407476533284, "grad_norm": NaN, "learning_rate": 0.0004634230961898594, "loss": 0.0, "step": 37580 }, { "epoch": 0.5139774801566954, "grad_norm": NaN, "learning_rate": 0.00046339201988700407, "loss": 0.0, "step": 37590 }, { "epoch": 0.5141142126600625, "grad_norm": NaN, "learning_rate": 0.0004633609314311801, "loss": 0.0, "step": 37600 }, { "epoch": 0.5142509451634295, "grad_norm": NaN, "learning_rate": 0.00046332983082415807, "loss": 0.0, "step": 37610 }, { "epoch": 0.5143876776667966, "grad_norm": NaN, "learning_rate": 0.0004632987180677092, "loss": 0.0, "step": 37620 }, { "epoch": 0.5145244101701636, "grad_norm": NaN, "learning_rate": 0.0004632675931636053, "loss": 0.0, "step": 37630 }, { "epoch": 0.5146611426735307, "grad_norm": NaN, "learning_rate": 0.0004632364561136192, "loss": 0.0, "step": 37640 }, { "epoch": 0.5147978751768977, "grad_norm": NaN, "learning_rate": 0.000463205306919524, "loss": 0.0, "step": 37650 }, { "epoch": 0.5149346076802647, "grad_norm": NaN, "learning_rate": 0.00046317414558309376, "loss": 0.0, "step": 37660 }, { "epoch": 0.5150713401836318, "grad_norm": NaN, "learning_rate": 0.00046314297210610323, "loss": 0.0, "step": 37670 }, { "epoch": 0.5152080726869988, "grad_norm": NaN, "learning_rate": 0.0004631117864903276, "loss": 0.0, "step": 37680 }, { "epoch": 0.5153448051903659, "grad_norm": NaN, "learning_rate": 0.0004630805887375431, "loss": 0.0, "step": 37690 }, { "epoch": 0.5154815376937328, "grad_norm": NaN, "learning_rate": 0.0004630493788495264, "loss": 0.0, "step": 37700 }, { "epoch": 0.5156182701970999, "grad_norm": NaN, "learning_rate": 0.00046301815682805504, "loss": 0.0, "step": 37710 }, { "epoch": 0.5157550027004669, "grad_norm": NaN, "learning_rate": 0.000462986922674907, "loss": 0.0, "step": 37720 }, { "epoch": 0.515891735203834, "grad_norm": NaN, "learning_rate": 0.0004629556763918613, "loss": 0.0, "step": 37730 }, { "epoch": 0.516028467707201, "grad_norm": NaN, "learning_rate": 0.00046292441798069723, "loss": 0.0, "step": 37740 }, { "epoch": 0.5161652002105681, "grad_norm": NaN, "learning_rate": 0.0004628931474431952, "loss": 0.0, "step": 37750 }, { "epoch": 0.5163019327139351, "grad_norm": NaN, "learning_rate": 0.00046286186478113604, "loss": 0.0, "step": 37760 }, { "epoch": 0.5164386652173021, "grad_norm": NaN, "learning_rate": 0.0004628305699963013, "loss": 0.0, "step": 37770 }, { "epoch": 0.5165753977206692, "grad_norm": NaN, "learning_rate": 0.0004627992630904733, "loss": 0.0, "step": 37780 }, { "epoch": 0.5167121302240362, "grad_norm": NaN, "learning_rate": 0.00046276794406543504, "loss": 0.0, "step": 37790 }, { "epoch": 0.5168488627274033, "grad_norm": NaN, "learning_rate": 0.00046273661292297013, "loss": 0.0, "step": 37800 }, { "epoch": 0.5169855952307703, "grad_norm": NaN, "learning_rate": 0.00046270526966486297, "loss": 0.0, "step": 37810 }, { "epoch": 0.5171223277341374, "grad_norm": NaN, "learning_rate": 0.00046267391429289853, "loss": 0.0, "step": 37820 }, { "epoch": 0.5172590602375043, "grad_norm": NaN, "learning_rate": 0.00046264254680886265, "loss": 0.0, "step": 37830 }, { "epoch": 0.5173957927408714, "grad_norm": NaN, "learning_rate": 0.00046261116721454167, "loss": 0.0, "step": 37840 }, { "epoch": 0.5175325252442384, "grad_norm": NaN, "learning_rate": 0.0004625797755117227, "loss": 0.0, "step": 37850 }, { "epoch": 0.5176692577476055, "grad_norm": NaN, "learning_rate": 0.00046254837170219356, "loss": 0.0, "step": 37860 }, { "epoch": 0.5178059902509725, "grad_norm": NaN, "learning_rate": 0.0004625169557877427, "loss": 0.0, "step": 37870 }, { "epoch": 0.5179427227543395, "grad_norm": NaN, "learning_rate": 0.0004624855277701594, "loss": 0.0, "step": 37880 }, { "epoch": 0.5180794552577066, "grad_norm": NaN, "learning_rate": 0.00046245408765123344, "loss": 0.0, "step": 37890 }, { "epoch": 0.5182161877610736, "grad_norm": NaN, "learning_rate": 0.00046242263543275534, "loss": 0.0, "step": 37900 }, { "epoch": 0.5183529202644407, "grad_norm": NaN, "learning_rate": 0.0004623911711165165, "loss": 0.0, "step": 37910 }, { "epoch": 0.5184896527678077, "grad_norm": NaN, "learning_rate": 0.0004623596947043087, "loss": 0.0, "step": 37920 }, { "epoch": 0.5186263852711748, "grad_norm": NaN, "learning_rate": 0.0004623282061979247, "loss": 0.0, "step": 37930 }, { "epoch": 0.5187631177745418, "grad_norm": NaN, "learning_rate": 0.0004622967055991577, "loss": 0.0, "step": 37940 }, { "epoch": 0.5188998502779089, "grad_norm": NaN, "learning_rate": 0.0004622651929098018, "loss": 0.0, "step": 37950 }, { "epoch": 0.5190365827812758, "grad_norm": NaN, "learning_rate": 0.00046223366813165156, "loss": 0.0, "step": 37960 }, { "epoch": 0.5191733152846428, "grad_norm": NaN, "learning_rate": 0.0004622021312665025, "loss": 0.0, "step": 37970 }, { "epoch": 0.5193100477880099, "grad_norm": NaN, "learning_rate": 0.0004621705823161506, "loss": 0.0, "step": 37980 }, { "epoch": 0.5194467802913769, "grad_norm": NaN, "learning_rate": 0.0004621390212823927, "loss": 0.0, "step": 37990 }, { "epoch": 0.519583512794744, "grad_norm": NaN, "learning_rate": 0.0004621074481670261, "loss": 0.0, "step": 38000 }, { "epoch": 0.519720245298111, "grad_norm": NaN, "learning_rate": 0.0004620758629718491, "loss": 0.0, "step": 38010 }, { "epoch": 0.5198569778014781, "grad_norm": NaN, "learning_rate": 0.00046204426569866044, "loss": 0.0, "step": 38020 }, { "epoch": 0.5199937103048451, "grad_norm": NaN, "learning_rate": 0.0004620126563492595, "loss": 0.0, "step": 38030 }, { "epoch": 0.5201304428082122, "grad_norm": NaN, "learning_rate": 0.00046198103492544674, "loss": 0.0, "step": 38040 }, { "epoch": 0.5202671753115792, "grad_norm": NaN, "learning_rate": 0.00046194940142902285, "loss": 0.0, "step": 38050 }, { "epoch": 0.5204039078149463, "grad_norm": NaN, "learning_rate": 0.0004619177558617894, "loss": 0.0, "step": 38060 }, { "epoch": 0.5205406403183133, "grad_norm": NaN, "learning_rate": 0.00046188609822554884, "loss": 0.0, "step": 38070 }, { "epoch": 0.5206773728216803, "grad_norm": NaN, "learning_rate": 0.0004618544285221039, "loss": 0.0, "step": 38080 }, { "epoch": 0.5208141053250473, "grad_norm": NaN, "learning_rate": 0.0004618227467532583, "loss": 0.0, "step": 38090 }, { "epoch": 0.5209508378284143, "grad_norm": NaN, "learning_rate": 0.00046179105292081635, "loss": 0.0, "step": 38100 }, { "epoch": 0.5210875703317814, "grad_norm": NaN, "learning_rate": 0.00046175934702658304, "loss": 0.0, "step": 38110 }, { "epoch": 0.5212243028351484, "grad_norm": NaN, "learning_rate": 0.00046172762907236423, "loss": 0.0, "step": 38120 }, { "epoch": 0.5213610353385155, "grad_norm": NaN, "learning_rate": 0.000461695899059966, "loss": 0.0, "step": 38130 }, { "epoch": 0.5214977678418825, "grad_norm": NaN, "learning_rate": 0.00046166415699119563, "loss": 0.0, "step": 38140 }, { "epoch": 0.5216345003452496, "grad_norm": NaN, "learning_rate": 0.0004616324028678608, "loss": 0.0, "step": 38150 }, { "epoch": 0.5217712328486166, "grad_norm": NaN, "learning_rate": 0.00046160063669177, "loss": 0.0, "step": 38160 }, { "epoch": 0.5219079653519837, "grad_norm": NaN, "learning_rate": 0.0004615688584647323, "loss": 0.0, "step": 38170 }, { "epoch": 0.5220446978553507, "grad_norm": NaN, "learning_rate": 0.00046153706818855754, "loss": 0.0, "step": 38180 }, { "epoch": 0.5221814303587177, "grad_norm": NaN, "learning_rate": 0.00046150526586505634, "loss": 0.0, "step": 38190 }, { "epoch": 0.5223181628620848, "grad_norm": NaN, "learning_rate": 0.0004614734514960396, "loss": 0.0, "step": 38200 }, { "epoch": 0.5224548953654518, "grad_norm": NaN, "learning_rate": 0.00046144162508331946, "loss": 0.0, "step": 38210 }, { "epoch": 0.5225916278688189, "grad_norm": NaN, "learning_rate": 0.0004614097866287084, "loss": 0.0, "step": 38220 }, { "epoch": 0.5227283603721858, "grad_norm": NaN, "learning_rate": 0.0004613779361340196, "loss": 0.0, "step": 38230 }, { "epoch": 0.5228650928755529, "grad_norm": NaN, "learning_rate": 0.00046134607360106693, "loss": 0.0, "step": 38240 }, { "epoch": 0.5230018253789199, "grad_norm": NaN, "learning_rate": 0.0004613141990316653, "loss": 0.0, "step": 38250 }, { "epoch": 0.523138557882287, "grad_norm": NaN, "learning_rate": 0.00046128231242762964, "loss": 0.0, "step": 38260 }, { "epoch": 0.523275290385654, "grad_norm": NaN, "learning_rate": 0.00046125041379077624, "loss": 0.0, "step": 38270 }, { "epoch": 0.5234120228890211, "grad_norm": NaN, "learning_rate": 0.0004612185031229216, "loss": 0.0, "step": 38280 }, { "epoch": 0.5235487553923881, "grad_norm": NaN, "learning_rate": 0.00046118658042588307, "loss": 0.0, "step": 38290 }, { "epoch": 0.5236854878957551, "grad_norm": NaN, "learning_rate": 0.0004611546457014788, "loss": 0.0, "step": 38300 }, { "epoch": 0.5238222203991222, "grad_norm": NaN, "learning_rate": 0.00046112269895152747, "loss": 0.0, "step": 38310 }, { "epoch": 0.5239589529024892, "grad_norm": NaN, "learning_rate": 0.0004610907401778485, "loss": 0.0, "step": 38320 }, { "epoch": 0.5240956854058563, "grad_norm": NaN, "learning_rate": 0.00046105876938226187, "loss": 0.0, "step": 38330 }, { "epoch": 0.5242324179092233, "grad_norm": NaN, "learning_rate": 0.00046102678656658846, "loss": 0.0, "step": 38340 }, { "epoch": 0.5243691504125904, "grad_norm": NaN, "learning_rate": 0.0004609947917326498, "loss": 0.0, "step": 38350 }, { "epoch": 0.5245058829159573, "grad_norm": NaN, "learning_rate": 0.00046096278488226794, "loss": 0.0, "step": 38360 }, { "epoch": 0.5246426154193244, "grad_norm": NaN, "learning_rate": 0.00046093076601726575, "loss": 0.0, "step": 38370 }, { "epoch": 0.5247793479226914, "grad_norm": NaN, "learning_rate": 0.0004608987351394668, "loss": 0.0, "step": 38380 }, { "epoch": 0.5249160804260585, "grad_norm": NaN, "learning_rate": 0.0004608666922506951, "loss": 0.0, "step": 38390 }, { "epoch": 0.5250528129294255, "grad_norm": NaN, "learning_rate": 0.00046083463735277583, "loss": 0.0, "step": 38400 }, { "epoch": 0.5251895454327925, "grad_norm": NaN, "learning_rate": 0.00046080257044753427, "loss": 0.0, "step": 38410 }, { "epoch": 0.5253262779361596, "grad_norm": NaN, "learning_rate": 0.0004607704915367969, "loss": 0.0, "step": 38420 }, { "epoch": 0.5254630104395266, "grad_norm": NaN, "learning_rate": 0.00046073840062239055, "loss": 0.0, "step": 38430 }, { "epoch": 0.5255997429428937, "grad_norm": NaN, "learning_rate": 0.0004607062977061428, "loss": 0.0, "step": 38440 }, { "epoch": 0.5257364754462607, "grad_norm": NaN, "learning_rate": 0.00046067418278988203, "loss": 0.0, "step": 38450 }, { "epoch": 0.5258732079496278, "grad_norm": NaN, "learning_rate": 0.0004606420558754373, "loss": 0.0, "step": 38460 }, { "epoch": 0.5260099404529948, "grad_norm": NaN, "learning_rate": 0.0004606099169646381, "loss": 0.0, "step": 38470 }, { "epoch": 0.5261466729563619, "grad_norm": NaN, "learning_rate": 0.0004605777660593149, "loss": 0.0, "step": 38480 }, { "epoch": 0.5262834054597288, "grad_norm": NaN, "learning_rate": 0.0004605456031612987, "loss": 0.0, "step": 38490 }, { "epoch": 0.526420137963096, "grad_norm": NaN, "learning_rate": 0.0004605134282724213, "loss": 0.0, "step": 38500 }, { "epoch": 0.5265568704664629, "grad_norm": NaN, "learning_rate": 0.000460481241394515, "loss": 0.0, "step": 38510 }, { "epoch": 0.5266936029698299, "grad_norm": NaN, "learning_rate": 0.00046044904252941286, "loss": 0.0, "step": 38520 }, { "epoch": 0.526830335473197, "grad_norm": NaN, "learning_rate": 0.0004604168316789488, "loss": 0.0, "step": 38530 }, { "epoch": 0.526967067976564, "grad_norm": NaN, "learning_rate": 0.0004603846088449572, "loss": 0.0, "step": 38540 }, { "epoch": 0.5271038004799311, "grad_norm": NaN, "learning_rate": 0.0004603523740292732, "loss": 0.0, "step": 38550 }, { "epoch": 0.5272405329832981, "grad_norm": NaN, "learning_rate": 0.0004603201272337325, "loss": 0.0, "step": 38560 }, { "epoch": 0.5273772654866652, "grad_norm": NaN, "learning_rate": 0.0004602878684601718, "loss": 0.0, "step": 38570 }, { "epoch": 0.5275139979900322, "grad_norm": NaN, "learning_rate": 0.00046025559771042815, "loss": 0.0, "step": 38580 }, { "epoch": 0.5276507304933993, "grad_norm": NaN, "learning_rate": 0.0004602233149863394, "loss": 0.0, "step": 38590 }, { "epoch": 0.5277874629967663, "grad_norm": NaN, "learning_rate": 0.0004601910202897442, "loss": 0.0, "step": 38600 }, { "epoch": 0.5279241955001334, "grad_norm": NaN, "learning_rate": 0.0004601587136224817, "loss": 0.0, "step": 38610 }, { "epoch": 0.5280609280035004, "grad_norm": NaN, "learning_rate": 0.00046012639498639184, "loss": 0.0, "step": 38620 }, { "epoch": 0.5281976605068673, "grad_norm": NaN, "learning_rate": 0.0004600940643833151, "loss": 0.0, "step": 38630 }, { "epoch": 0.5283343930102344, "grad_norm": NaN, "learning_rate": 0.000460061721815093, "loss": 0.0, "step": 38640 }, { "epoch": 0.5284711255136014, "grad_norm": NaN, "learning_rate": 0.00046002936728356726, "loss": 0.0, "step": 38650 }, { "epoch": 0.5286078580169685, "grad_norm": NaN, "learning_rate": 0.0004599970007905806, "loss": 0.0, "step": 38660 }, { "epoch": 0.5287445905203355, "grad_norm": NaN, "learning_rate": 0.00045996462233797644, "loss": 0.0, "step": 38670 }, { "epoch": 0.5288813230237026, "grad_norm": NaN, "learning_rate": 0.0004599322319275986, "loss": 0.0, "step": 38680 }, { "epoch": 0.5290180555270696, "grad_norm": NaN, "learning_rate": 0.0004598998295612918, "loss": 0.0, "step": 38690 }, { "epoch": 0.5291547880304367, "grad_norm": NaN, "learning_rate": 0.00045986741524090143, "loss": 0.0, "step": 38700 }, { "epoch": 0.5292915205338037, "grad_norm": NaN, "learning_rate": 0.0004598349889682736, "loss": 0.0, "step": 38710 }, { "epoch": 0.5294282530371708, "grad_norm": NaN, "learning_rate": 0.0004598025507452549, "loss": 0.0, "step": 38720 }, { "epoch": 0.5295649855405378, "grad_norm": NaN, "learning_rate": 0.00045977010057369274, "loss": 0.0, "step": 38730 }, { "epoch": 0.5297017180439048, "grad_norm": NaN, "learning_rate": 0.0004597376384554354, "loss": 0.0, "step": 38740 }, { "epoch": 0.5298384505472719, "grad_norm": NaN, "learning_rate": 0.0004597051643923313, "loss": 0.0, "step": 38750 }, { "epoch": 0.5299751830506388, "grad_norm": NaN, "learning_rate": 0.0004596726783862303, "loss": 0.0, "step": 38760 }, { "epoch": 0.5301119155540059, "grad_norm": NaN, "learning_rate": 0.00045964018043898215, "loss": 0.0, "step": 38770 }, { "epoch": 0.5302486480573729, "grad_norm": NaN, "learning_rate": 0.0004596076705524379, "loss": 0.0, "step": 38780 }, { "epoch": 0.53038538056074, "grad_norm": NaN, "learning_rate": 0.00045957514872844887, "loss": 0.0, "step": 38790 }, { "epoch": 0.530522113064107, "grad_norm": NaN, "learning_rate": 0.0004595426149688673, "loss": 0.0, "step": 38800 }, { "epoch": 0.5306588455674741, "grad_norm": NaN, "learning_rate": 0.00045951006927554607, "loss": 0.0, "step": 38810 }, { "epoch": 0.5307955780708411, "grad_norm": NaN, "learning_rate": 0.0004594775116503386, "loss": 0.0, "step": 38820 }, { "epoch": 0.5309323105742082, "grad_norm": NaN, "learning_rate": 0.00045944494209509913, "loss": 0.0, "step": 38830 }, { "epoch": 0.5310690430775752, "grad_norm": NaN, "learning_rate": 0.00045941236061168263, "loss": 0.0, "step": 38840 }, { "epoch": 0.5312057755809422, "grad_norm": NaN, "learning_rate": 0.00045937976720194455, "loss": 0.0, "step": 38850 }, { "epoch": 0.5313425080843093, "grad_norm": NaN, "learning_rate": 0.0004593471618677412, "loss": 0.0, "step": 38860 }, { "epoch": 0.5314792405876763, "grad_norm": NaN, "learning_rate": 0.0004593145446109294, "loss": 0.0, "step": 38870 }, { "epoch": 0.5316159730910434, "grad_norm": NaN, "learning_rate": 0.00045928191543336683, "loss": 0.0, "step": 38880 }, { "epoch": 0.5317527055944103, "grad_norm": NaN, "learning_rate": 0.00045924927433691176, "loss": 0.0, "step": 38890 }, { "epoch": 0.5318894380977774, "grad_norm": NaN, "learning_rate": 0.00045921662132342313, "loss": 0.0, "step": 38900 }, { "epoch": 0.5320261706011444, "grad_norm": NaN, "learning_rate": 0.00045918395639476064, "loss": 0.0, "step": 38910 }, { "epoch": 0.5321629031045115, "grad_norm": NaN, "learning_rate": 0.0004591512795527844, "loss": 0.0, "step": 38920 }, { "epoch": 0.5322996356078785, "grad_norm": NaN, "learning_rate": 0.0004591185907993556, "loss": 0.0, "step": 38930 }, { "epoch": 0.5324363681112456, "grad_norm": NaN, "learning_rate": 0.0004590858901363359, "loss": 0.0, "step": 38940 }, { "epoch": 0.5325731006146126, "grad_norm": NaN, "learning_rate": 0.0004590531775655876, "loss": 0.0, "step": 38950 }, { "epoch": 0.5327098331179796, "grad_norm": NaN, "learning_rate": 0.00045902045308897365, "loss": 0.0, "step": 38960 }, { "epoch": 0.5328465656213467, "grad_norm": NaN, "learning_rate": 0.0004589877167083579, "loss": 0.0, "step": 38970 }, { "epoch": 0.5329832981247137, "grad_norm": NaN, "learning_rate": 0.0004589549684256047, "loss": 0.0, "step": 38980 }, { "epoch": 0.5331200306280808, "grad_norm": NaN, "learning_rate": 0.00045892220824257895, "loss": 0.0, "step": 38990 }, { "epoch": 0.5332567631314478, "grad_norm": NaN, "learning_rate": 0.00045888943616114656, "loss": 0.0, "step": 39000 }, { "epoch": 0.5333934956348149, "grad_norm": NaN, "learning_rate": 0.00045885665218317395, "loss": 0.0, "step": 39010 }, { "epoch": 0.5335302281381819, "grad_norm": NaN, "learning_rate": 0.0004588238563105281, "loss": 0.0, "step": 39020 }, { "epoch": 0.533666960641549, "grad_norm": NaN, "learning_rate": 0.0004587910485450768, "loss": 0.0, "step": 39030 }, { "epoch": 0.5338036931449159, "grad_norm": NaN, "learning_rate": 0.0004587582288886886, "loss": 0.0, "step": 39040 }, { "epoch": 0.533940425648283, "grad_norm": NaN, "learning_rate": 0.00045872539734323253, "loss": 0.0, "step": 39050 }, { "epoch": 0.53407715815165, "grad_norm": NaN, "learning_rate": 0.0004586925539105784, "loss": 0.0, "step": 39060 }, { "epoch": 0.534213890655017, "grad_norm": NaN, "learning_rate": 0.0004586596985925967, "loss": 0.0, "step": 39070 }, { "epoch": 0.5343506231583841, "grad_norm": NaN, "learning_rate": 0.00045862683139115867, "loss": 0.0, "step": 39080 }, { "epoch": 0.5344873556617511, "grad_norm": NaN, "learning_rate": 0.00045859395230813597, "loss": 0.0, "step": 39090 }, { "epoch": 0.5346240881651182, "grad_norm": NaN, "learning_rate": 0.0004585610613454013, "loss": 0.0, "step": 39100 }, { "epoch": 0.5347608206684852, "grad_norm": NaN, "learning_rate": 0.00045852815850482763, "loss": 0.0, "step": 39110 }, { "epoch": 0.5348975531718523, "grad_norm": NaN, "learning_rate": 0.00045849524378828904, "loss": 0.0, "step": 39120 }, { "epoch": 0.5350342856752193, "grad_norm": NaN, "learning_rate": 0.00045846231719766, "loss": 0.0, "step": 39130 }, { "epoch": 0.5351710181785864, "grad_norm": NaN, "learning_rate": 0.0004584293787348156, "loss": 0.0, "step": 39140 }, { "epoch": 0.5353077506819534, "grad_norm": NaN, "learning_rate": 0.0004583964284016318, "loss": 0.0, "step": 39150 }, { "epoch": 0.5354444831853205, "grad_norm": NaN, "learning_rate": 0.00045836346619998523, "loss": 0.0, "step": 39160 }, { "epoch": 0.5355812156886874, "grad_norm": NaN, "learning_rate": 0.00045833049213175317, "loss": 0.0, "step": 39170 }, { "epoch": 0.5357179481920544, "grad_norm": NaN, "learning_rate": 0.00045829750619881337, "loss": 0.0, "step": 39180 }, { "epoch": 0.5358546806954215, "grad_norm": NaN, "learning_rate": 0.00045826450840304455, "loss": 0.0, "step": 39190 }, { "epoch": 0.5359914131987885, "grad_norm": NaN, "learning_rate": 0.00045823149874632595, "loss": 0.0, "step": 39200 }, { "epoch": 0.5361281457021556, "grad_norm": NaN, "learning_rate": 0.0004581984772305375, "loss": 0.0, "step": 39210 }, { "epoch": 0.5362648782055226, "grad_norm": NaN, "learning_rate": 0.0004581654438575599, "loss": 0.0, "step": 39220 }, { "epoch": 0.5364016107088897, "grad_norm": NaN, "learning_rate": 0.00045813239862927427, "loss": 0.0, "step": 39230 }, { "epoch": 0.5365383432122567, "grad_norm": NaN, "learning_rate": 0.0004580993415475627, "loss": 0.0, "step": 39240 }, { "epoch": 0.5366750757156238, "grad_norm": NaN, "learning_rate": 0.0004580662726143079, "loss": 0.0, "step": 39250 }, { "epoch": 0.5368118082189908, "grad_norm": NaN, "learning_rate": 0.0004580331918313931, "loss": 0.0, "step": 39260 }, { "epoch": 0.5369485407223579, "grad_norm": NaN, "learning_rate": 0.0004580000992007023, "loss": 0.0, "step": 39270 }, { "epoch": 0.5370852732257249, "grad_norm": NaN, "learning_rate": 0.00045796699472412016, "loss": 0.0, "step": 39280 }, { "epoch": 0.5372220057290918, "grad_norm": NaN, "learning_rate": 0.0004579338784035321, "loss": 0.0, "step": 39290 }, { "epoch": 0.5373587382324589, "grad_norm": NaN, "learning_rate": 0.000457900750240824, "loss": 0.0, "step": 39300 }, { "epoch": 0.5374954707358259, "grad_norm": NaN, "learning_rate": 0.0004578676102378827, "loss": 0.0, "step": 39310 }, { "epoch": 0.537632203239193, "grad_norm": NaN, "learning_rate": 0.00045783445839659554, "loss": 0.0, "step": 39320 }, { "epoch": 0.53776893574256, "grad_norm": NaN, "learning_rate": 0.00045780129471885054, "loss": 0.0, "step": 39330 }, { "epoch": 0.5379056682459271, "grad_norm": NaN, "learning_rate": 0.0004577681192065364, "loss": 0.0, "step": 39340 }, { "epoch": 0.5380424007492941, "grad_norm": NaN, "learning_rate": 0.00045773493186154247, "loss": 0.0, "step": 39350 }, { "epoch": 0.5381791332526612, "grad_norm": NaN, "learning_rate": 0.0004577017326857589, "loss": 0.0, "step": 39360 }, { "epoch": 0.5383158657560282, "grad_norm": NaN, "learning_rate": 0.00045766852168107637, "loss": 0.0, "step": 39370 }, { "epoch": 0.5384525982593953, "grad_norm": NaN, "learning_rate": 0.0004576352988493864, "loss": 0.0, "step": 39380 }, { "epoch": 0.5385893307627623, "grad_norm": NaN, "learning_rate": 0.00045760206419258087, "loss": 0.0, "step": 39390 }, { "epoch": 0.5387260632661293, "grad_norm": NaN, "learning_rate": 0.0004575688177125528, "loss": 0.0, "step": 39400 }, { "epoch": 0.5388627957694964, "grad_norm": NaN, "learning_rate": 0.0004575355594111954, "loss": 0.0, "step": 39410 }, { "epoch": 0.5389995282728633, "grad_norm": NaN, "learning_rate": 0.0004575022892904028, "loss": 0.0, "step": 39420 }, { "epoch": 0.5391362607762304, "grad_norm": NaN, "learning_rate": 0.00045746900735206997, "loss": 0.0, "step": 39430 }, { "epoch": 0.5392729932795974, "grad_norm": NaN, "learning_rate": 0.0004574357135980922, "loss": 0.0, "step": 39440 }, { "epoch": 0.5394097257829645, "grad_norm": NaN, "learning_rate": 0.00045740240803036566, "loss": 0.0, "step": 39450 }, { "epoch": 0.5395464582863315, "grad_norm": NaN, "learning_rate": 0.0004573690906507871, "loss": 0.0, "step": 39460 }, { "epoch": 0.5396831907896986, "grad_norm": NaN, "learning_rate": 0.00045733576146125403, "loss": 0.0, "step": 39470 }, { "epoch": 0.5398199232930656, "grad_norm": NaN, "learning_rate": 0.0004573024204636646, "loss": 0.0, "step": 39480 }, { "epoch": 0.5399566557964326, "grad_norm": NaN, "learning_rate": 0.0004572690676599176, "loss": 0.0, "step": 39490 }, { "epoch": 0.5400933882997997, "grad_norm": NaN, "learning_rate": 0.0004572357030519126, "loss": 0.0, "step": 39500 }, { "epoch": 0.5402301208031667, "grad_norm": NaN, "learning_rate": 0.0004572023266415496, "loss": 0.0, "step": 39510 }, { "epoch": 0.5403668533065338, "grad_norm": NaN, "learning_rate": 0.0004571689384307296, "loss": 0.0, "step": 39520 }, { "epoch": 0.5405035858099008, "grad_norm": NaN, "learning_rate": 0.000457135538421354, "loss": 0.0, "step": 39530 }, { "epoch": 0.5406403183132679, "grad_norm": NaN, "learning_rate": 0.000457102126615325, "loss": 0.0, "step": 39540 }, { "epoch": 0.5407770508166349, "grad_norm": NaN, "learning_rate": 0.0004570687030145455, "loss": 0.0, "step": 39550 }, { "epoch": 0.540913783320002, "grad_norm": NaN, "learning_rate": 0.000457035267620919, "loss": 0.0, "step": 39560 }, { "epoch": 0.5410505158233689, "grad_norm": NaN, "learning_rate": 0.0004570018204363495, "loss": 0.0, "step": 39570 }, { "epoch": 0.541187248326736, "grad_norm": NaN, "learning_rate": 0.00045696836146274226, "loss": 0.0, "step": 39580 }, { "epoch": 0.541323980830103, "grad_norm": NaN, "learning_rate": 0.00045693489070200243, "loss": 0.0, "step": 39590 }, { "epoch": 0.54146071333347, "grad_norm": NaN, "learning_rate": 0.00045690140815603643, "loss": 0.0, "step": 39600 }, { "epoch": 0.5415974458368371, "grad_norm": NaN, "learning_rate": 0.00045686791382675105, "loss": 0.0, "step": 39610 }, { "epoch": 0.5417341783402041, "grad_norm": NaN, "learning_rate": 0.00045683440771605383, "loss": 0.0, "step": 39620 }, { "epoch": 0.5418709108435712, "grad_norm": NaN, "learning_rate": 0.00045680088982585313, "loss": 0.0, "step": 39630 }, { "epoch": 0.5420076433469382, "grad_norm": NaN, "learning_rate": 0.00045676736015805763, "loss": 0.0, "step": 39640 }, { "epoch": 0.5421443758503053, "grad_norm": NaN, "learning_rate": 0.00045673381871457706, "loss": 0.0, "step": 39650 }, { "epoch": 0.5422811083536723, "grad_norm": NaN, "learning_rate": 0.0004567002654973216, "loss": 0.0, "step": 39660 }, { "epoch": 0.5424178408570394, "grad_norm": NaN, "learning_rate": 0.000456666700508202, "loss": 0.0, "step": 39670 }, { "epoch": 0.5425545733604064, "grad_norm": NaN, "learning_rate": 0.00045663312374913016, "loss": 0.0, "step": 39680 }, { "epoch": 0.5426913058637735, "grad_norm": NaN, "learning_rate": 0.00045659953522201804, "loss": 0.0, "step": 39690 }, { "epoch": 0.5428280383671404, "grad_norm": NaN, "learning_rate": 0.00045656593492877863, "loss": 0.0, "step": 39700 }, { "epoch": 0.5429647708705074, "grad_norm": NaN, "learning_rate": 0.0004565323228713255, "loss": 0.0, "step": 39710 }, { "epoch": 0.5431015033738745, "grad_norm": NaN, "learning_rate": 0.00045649869905157304, "loss": 0.0, "step": 39720 }, { "epoch": 0.5432382358772415, "grad_norm": NaN, "learning_rate": 0.00045646506347143603, "loss": 0.0, "step": 39730 }, { "epoch": 0.5433749683806086, "grad_norm": NaN, "learning_rate": 0.00045643141613283, "loss": 0.0, "step": 39740 }, { "epoch": 0.5435117008839756, "grad_norm": NaN, "learning_rate": 0.0004563977570376714, "loss": 0.0, "step": 39750 }, { "epoch": 0.5436484333873427, "grad_norm": NaN, "learning_rate": 0.00045636408618787705, "loss": 0.0, "step": 39760 }, { "epoch": 0.5437851658907097, "grad_norm": NaN, "learning_rate": 0.0004563304035853646, "loss": 0.0, "step": 39770 }, { "epoch": 0.5439218983940768, "grad_norm": NaN, "learning_rate": 0.00045629670923205223, "loss": 0.0, "step": 39780 }, { "epoch": 0.5440586308974438, "grad_norm": NaN, "learning_rate": 0.00045626300312985905, "loss": 0.0, "step": 39790 }, { "epoch": 0.5441953634008109, "grad_norm": NaN, "learning_rate": 0.0004562292852807044, "loss": 0.0, "step": 39800 }, { "epoch": 0.5443320959041779, "grad_norm": NaN, "learning_rate": 0.00045619555568650886, "loss": 0.0, "step": 39810 }, { "epoch": 0.5444688284075448, "grad_norm": NaN, "learning_rate": 0.0004561618143491931, "loss": 0.0, "step": 39820 }, { "epoch": 0.544605560910912, "grad_norm": NaN, "learning_rate": 0.000456128061270679, "loss": 0.0, "step": 39830 }, { "epoch": 0.5447422934142789, "grad_norm": NaN, "learning_rate": 0.0004560942964528887, "loss": 0.0, "step": 39840 }, { "epoch": 0.544879025917646, "grad_norm": NaN, "learning_rate": 0.00045606051989774513, "loss": 0.0, "step": 39850 }, { "epoch": 0.545015758421013, "grad_norm": NaN, "learning_rate": 0.000456026731607172, "loss": 0.0, "step": 39860 }, { "epoch": 0.5451524909243801, "grad_norm": NaN, "learning_rate": 0.00045599293158309344, "loss": 0.0, "step": 39870 }, { "epoch": 0.5452892234277471, "grad_norm": NaN, "learning_rate": 0.00045595911982743457, "loss": 0.0, "step": 39880 }, { "epoch": 0.5454259559311142, "grad_norm": NaN, "learning_rate": 0.00045592529634212096, "loss": 0.0, "step": 39890 }, { "epoch": 0.5455626884344812, "grad_norm": NaN, "learning_rate": 0.000455891461129079, "loss": 0.0, "step": 39900 }, { "epoch": 0.5456994209378483, "grad_norm": NaN, "learning_rate": 0.0004558576141902354, "loss": 0.0, "step": 39910 }, { "epoch": 0.5458361534412153, "grad_norm": NaN, "learning_rate": 0.0004558237555275181, "loss": 0.0, "step": 39920 }, { "epoch": 0.5459728859445823, "grad_norm": NaN, "learning_rate": 0.0004557898851428551, "loss": 0.0, "step": 39930 }, { "epoch": 0.5461096184479494, "grad_norm": NaN, "learning_rate": 0.00045575600303817565, "loss": 0.0, "step": 39940 }, { "epoch": 0.5462463509513164, "grad_norm": NaN, "learning_rate": 0.0004557221092154091, "loss": 0.0, "step": 39950 }, { "epoch": 0.5463830834546834, "grad_norm": NaN, "learning_rate": 0.000455688203676486, "loss": 0.0, "step": 39960 }, { "epoch": 0.5465198159580504, "grad_norm": NaN, "learning_rate": 0.0004556542864233372, "loss": 0.0, "step": 39970 }, { "epoch": 0.5466565484614175, "grad_norm": NaN, "learning_rate": 0.00045562035745789434, "loss": 0.0, "step": 39980 }, { "epoch": 0.5467932809647845, "grad_norm": NaN, "learning_rate": 0.0004555864167820897, "loss": 0.0, "step": 39990 }, { "epoch": 0.5469300134681516, "grad_norm": NaN, "learning_rate": 0.00045555246439785623, "loss": 0.0, "step": 40000 }, { "epoch": 0.5470667459715186, "grad_norm": NaN, "learning_rate": 0.0004555185003071277, "loss": 0.0, "step": 40010 }, { "epoch": 0.5472034784748857, "grad_norm": NaN, "learning_rate": 0.0004554845245118382, "loss": 0.0, "step": 40020 }, { "epoch": 0.5473402109782527, "grad_norm": NaN, "learning_rate": 0.0004554505370139229, "loss": 0.0, "step": 40030 }, { "epoch": 0.5474769434816197, "grad_norm": NaN, "learning_rate": 0.00045541653781531734, "loss": 0.0, "step": 40040 }, { "epoch": 0.5476136759849868, "grad_norm": NaN, "learning_rate": 0.00045538252691795783, "loss": 0.0, "step": 40050 }, { "epoch": 0.5477504084883538, "grad_norm": NaN, "learning_rate": 0.00045534850432378136, "loss": 0.0, "step": 40060 }, { "epoch": 0.5478871409917209, "grad_norm": NaN, "learning_rate": 0.00045531447003472556, "loss": 0.0, "step": 40070 }, { "epoch": 0.5480238734950879, "grad_norm": NaN, "learning_rate": 0.0004552804240527286, "loss": 0.0, "step": 40080 }, { "epoch": 0.548160605998455, "grad_norm": NaN, "learning_rate": 0.0004552463663797297, "loss": 0.0, "step": 40090 }, { "epoch": 0.5482973385018219, "grad_norm": NaN, "learning_rate": 0.0004552122970176683, "loss": 0.0, "step": 40100 }, { "epoch": 0.548434071005189, "grad_norm": NaN, "learning_rate": 0.00045517821596848467, "loss": 0.0, "step": 40110 }, { "epoch": 0.548570803508556, "grad_norm": NaN, "learning_rate": 0.0004551441232341199, "loss": 0.0, "step": 40120 }, { "epoch": 0.5487075360119231, "grad_norm": NaN, "learning_rate": 0.0004551100188165156, "loss": 0.0, "step": 40130 }, { "epoch": 0.5488442685152901, "grad_norm": NaN, "learning_rate": 0.00045507590271761403, "loss": 0.0, "step": 40140 }, { "epoch": 0.5489810010186571, "grad_norm": NaN, "learning_rate": 0.0004550417749393582, "loss": 0.0, "step": 40150 }, { "epoch": 0.5491177335220242, "grad_norm": NaN, "learning_rate": 0.00045500763548369155, "loss": 0.0, "step": 40160 }, { "epoch": 0.5492544660253912, "grad_norm": NaN, "learning_rate": 0.0004549734843525586, "loss": 0.0, "step": 40170 }, { "epoch": 0.5493911985287583, "grad_norm": NaN, "learning_rate": 0.0004549393215479042, "loss": 0.0, "step": 40180 }, { "epoch": 0.5495279310321253, "grad_norm": NaN, "learning_rate": 0.0004549051470716739, "loss": 0.0, "step": 40190 }, { "epoch": 0.5496646635354924, "grad_norm": NaN, "learning_rate": 0.00045487096092581414, "loss": 0.0, "step": 40200 }, { "epoch": 0.5498013960388594, "grad_norm": NaN, "learning_rate": 0.00045483676311227174, "loss": 0.0, "step": 40210 }, { "epoch": 0.5499381285422265, "grad_norm": NaN, "learning_rate": 0.0004548025536329944, "loss": 0.0, "step": 40220 }, { "epoch": 0.5500748610455934, "grad_norm": NaN, "learning_rate": 0.00045476833248993026, "loss": 0.0, "step": 40230 }, { "epoch": 0.5502115935489605, "grad_norm": NaN, "learning_rate": 0.00045473409968502845, "loss": 0.0, "step": 40240 }, { "epoch": 0.5503483260523275, "grad_norm": NaN, "learning_rate": 0.0004546998552202384, "loss": 0.0, "step": 40250 }, { "epoch": 0.5504850585556945, "grad_norm": NaN, "learning_rate": 0.00045466559909751057, "loss": 0.0, "step": 40260 }, { "epoch": 0.5506217910590616, "grad_norm": NaN, "learning_rate": 0.00045463133131879574, "loss": 0.0, "step": 40270 }, { "epoch": 0.5507585235624286, "grad_norm": NaN, "learning_rate": 0.0004545970518860455, "loss": 0.0, "step": 40280 }, { "epoch": 0.5508952560657957, "grad_norm": NaN, "learning_rate": 0.0004545627608012122, "loss": 0.0, "step": 40290 }, { "epoch": 0.5510319885691627, "grad_norm": NaN, "learning_rate": 0.0004545284580662488, "loss": 0.0, "step": 40300 }, { "epoch": 0.5511687210725298, "grad_norm": NaN, "learning_rate": 0.0004544941436831087, "loss": 0.0, "step": 40310 }, { "epoch": 0.5513054535758968, "grad_norm": NaN, "learning_rate": 0.00045445981765374635, "loss": 0.0, "step": 40320 }, { "epoch": 0.5514421860792639, "grad_norm": NaN, "learning_rate": 0.0004544254799801165, "loss": 0.0, "step": 40330 }, { "epoch": 0.5515789185826309, "grad_norm": NaN, "learning_rate": 0.00045439113066417485, "loss": 0.0, "step": 40340 }, { "epoch": 0.551715651085998, "grad_norm": NaN, "learning_rate": 0.0004543567697078777, "loss": 0.0, "step": 40350 }, { "epoch": 0.551852383589365, "grad_norm": NaN, "learning_rate": 0.0004543223971131817, "loss": 0.0, "step": 40360 }, { "epoch": 0.5519891160927319, "grad_norm": NaN, "learning_rate": 0.00045428801288204456, "loss": 0.0, "step": 40370 }, { "epoch": 0.552125848596099, "grad_norm": NaN, "learning_rate": 0.00045425361701642456, "loss": 0.0, "step": 40380 }, { "epoch": 0.552262581099466, "grad_norm": NaN, "learning_rate": 0.0004542192095182806, "loss": 0.0, "step": 40390 }, { "epoch": 0.5523993136028331, "grad_norm": NaN, "learning_rate": 0.00045418479038957215, "loss": 0.0, "step": 40400 }, { "epoch": 0.5525360461062001, "grad_norm": NaN, "learning_rate": 0.00045415035963225947, "loss": 0.0, "step": 40410 }, { "epoch": 0.5526727786095672, "grad_norm": NaN, "learning_rate": 0.00045411591724830335, "loss": 0.0, "step": 40420 }, { "epoch": 0.5528095111129342, "grad_norm": NaN, "learning_rate": 0.00045408146323966554, "loss": 0.0, "step": 40430 }, { "epoch": 0.5529462436163013, "grad_norm": NaN, "learning_rate": 0.000454046997608308, "loss": 0.0, "step": 40440 }, { "epoch": 0.5530829761196683, "grad_norm": NaN, "learning_rate": 0.0004540125203561938, "loss": 0.0, "step": 40450 }, { "epoch": 0.5532197086230354, "grad_norm": NaN, "learning_rate": 0.00045397803148528634, "loss": 0.0, "step": 40460 }, { "epoch": 0.5533564411264024, "grad_norm": NaN, "learning_rate": 0.0004539435309975498, "loss": 0.0, "step": 40470 }, { "epoch": 0.5534931736297694, "grad_norm": NaN, "learning_rate": 0.0004539090188949492, "loss": 0.0, "step": 40480 }, { "epoch": 0.5536299061331365, "grad_norm": NaN, "learning_rate": 0.0004538744951794498, "loss": 0.0, "step": 40490 }, { "epoch": 0.5537666386365034, "grad_norm": NaN, "learning_rate": 0.00045383995985301797, "loss": 0.0, "step": 40500 }, { "epoch": 0.5539033711398705, "grad_norm": NaN, "learning_rate": 0.00045380541291762045, "loss": 0.0, "step": 40510 }, { "epoch": 0.5540401036432375, "grad_norm": NaN, "learning_rate": 0.0004537708543752247, "loss": 0.0, "step": 40520 }, { "epoch": 0.5541768361466046, "grad_norm": NaN, "learning_rate": 0.000453736284227799, "loss": 0.0, "step": 40530 }, { "epoch": 0.5543135686499716, "grad_norm": NaN, "learning_rate": 0.00045370170247731214, "loss": 0.0, "step": 40540 }, { "epoch": 0.5544503011533387, "grad_norm": NaN, "learning_rate": 0.0004536671091257335, "loss": 0.0, "step": 40550 }, { "epoch": 0.5545870336567057, "grad_norm": NaN, "learning_rate": 0.00045363250417503325, "loss": 0.0, "step": 40560 }, { "epoch": 0.5547237661600728, "grad_norm": NaN, "learning_rate": 0.00045359788762718223, "loss": 0.0, "step": 40570 }, { "epoch": 0.5548604986634398, "grad_norm": NaN, "learning_rate": 0.000453563259484152, "loss": 0.0, "step": 40580 }, { "epoch": 0.5549972311668068, "grad_norm": NaN, "learning_rate": 0.00045352861974791447, "loss": 0.0, "step": 40590 }, { "epoch": 0.5551339636701739, "grad_norm": NaN, "learning_rate": 0.00045349396842044254, "loss": 0.0, "step": 40600 }, { "epoch": 0.5552706961735409, "grad_norm": NaN, "learning_rate": 0.0004534593055037096, "loss": 0.0, "step": 40610 }, { "epoch": 0.555407428676908, "grad_norm": NaN, "learning_rate": 0.0004534246309996898, "loss": 0.0, "step": 40620 }, { "epoch": 0.5555441611802749, "grad_norm": NaN, "learning_rate": 0.0004533899449103579, "loss": 0.0, "step": 40630 }, { "epoch": 0.555680893683642, "grad_norm": NaN, "learning_rate": 0.00045335524723768926, "loss": 0.0, "step": 40640 }, { "epoch": 0.555817626187009, "grad_norm": NaN, "learning_rate": 0.00045332053798366, "loss": 0.0, "step": 40650 }, { "epoch": 0.5559543586903761, "grad_norm": NaN, "learning_rate": 0.000453285817150247, "loss": 0.0, "step": 40660 }, { "epoch": 0.5560910911937431, "grad_norm": NaN, "learning_rate": 0.00045325108473942734, "loss": 0.0, "step": 40670 }, { "epoch": 0.5562278236971102, "grad_norm": NaN, "learning_rate": 0.0004532163407531794, "loss": 0.0, "step": 40680 }, { "epoch": 0.5563645562004772, "grad_norm": NaN, "learning_rate": 0.0004531815851934817, "loss": 0.0, "step": 40690 }, { "epoch": 0.5565012887038442, "grad_norm": NaN, "learning_rate": 0.0004531468180623136, "loss": 0.0, "step": 40700 }, { "epoch": 0.5566380212072113, "grad_norm": NaN, "learning_rate": 0.00045311203936165535, "loss": 0.0, "step": 40710 }, { "epoch": 0.5567747537105783, "grad_norm": NaN, "learning_rate": 0.0004530772490934874, "loss": 0.0, "step": 40720 }, { "epoch": 0.5569114862139454, "grad_norm": NaN, "learning_rate": 0.0004530424472597913, "loss": 0.0, "step": 40730 }, { "epoch": 0.5570482187173124, "grad_norm": NaN, "learning_rate": 0.00045300763386254883, "loss": 0.0, "step": 40740 }, { "epoch": 0.5571849512206795, "grad_norm": NaN, "learning_rate": 0.0004529728089037429, "loss": 0.0, "step": 40750 }, { "epoch": 0.5573216837240464, "grad_norm": NaN, "learning_rate": 0.00045293797238535674, "loss": 0.0, "step": 40760 }, { "epoch": 0.5574584162274135, "grad_norm": NaN, "learning_rate": 0.0004529031243093743, "loss": 0.0, "step": 40770 }, { "epoch": 0.5575951487307805, "grad_norm": NaN, "learning_rate": 0.00045286826467778033, "loss": 0.0, "step": 40780 }, { "epoch": 0.5577318812341476, "grad_norm": NaN, "learning_rate": 0.00045283339349256, "loss": 0.0, "step": 40790 }, { "epoch": 0.5578686137375146, "grad_norm": NaN, "learning_rate": 0.00045279851075569937, "loss": 0.0, "step": 40800 }, { "epoch": 0.5580053462408816, "grad_norm": NaN, "learning_rate": 0.00045276361646918507, "loss": 0.0, "step": 40810 }, { "epoch": 0.5581420787442487, "grad_norm": NaN, "learning_rate": 0.00045272871063500424, "loss": 0.0, "step": 40820 }, { "epoch": 0.5582788112476157, "grad_norm": NaN, "learning_rate": 0.000452693793255145, "loss": 0.0, "step": 40830 }, { "epoch": 0.5584155437509828, "grad_norm": NaN, "learning_rate": 0.0004526588643315958, "loss": 0.0, "step": 40840 }, { "epoch": 0.5585522762543498, "grad_norm": NaN, "learning_rate": 0.000452623923866346, "loss": 0.0, "step": 40850 }, { "epoch": 0.5586890087577169, "grad_norm": NaN, "learning_rate": 0.0004525889718613854, "loss": 0.0, "step": 40860 }, { "epoch": 0.5588257412610839, "grad_norm": NaN, "learning_rate": 0.00045255400831870464, "loss": 0.0, "step": 40870 }, { "epoch": 0.558962473764451, "grad_norm": NaN, "learning_rate": 0.00045251903324029487, "loss": 0.0, "step": 40880 }, { "epoch": 0.559099206267818, "grad_norm": NaN, "learning_rate": 0.00045248404662814813, "loss": 0.0, "step": 40890 }, { "epoch": 0.5592359387711849, "grad_norm": NaN, "learning_rate": 0.0004524490484842567, "loss": 0.0, "step": 40900 }, { "epoch": 0.559372671274552, "grad_norm": NaN, "learning_rate": 0.00045241403881061406, "loss": 0.0, "step": 40910 }, { "epoch": 0.559509403777919, "grad_norm": NaN, "learning_rate": 0.0004523790176092138, "loss": 0.0, "step": 40920 }, { "epoch": 0.5596461362812861, "grad_norm": NaN, "learning_rate": 0.0004523439848820506, "loss": 0.0, "step": 40930 }, { "epoch": 0.5597828687846531, "grad_norm": NaN, "learning_rate": 0.00045230894063111955, "loss": 0.0, "step": 40940 }, { "epoch": 0.5599196012880202, "grad_norm": NaN, "learning_rate": 0.00045227388485841646, "loss": 0.0, "step": 40950 }, { "epoch": 0.5600563337913872, "grad_norm": NaN, "learning_rate": 0.0004522388175659379, "loss": 0.0, "step": 40960 }, { "epoch": 0.5601930662947543, "grad_norm": NaN, "learning_rate": 0.0004522037387556809, "loss": 0.0, "step": 40970 }, { "epoch": 0.5603297987981213, "grad_norm": NaN, "learning_rate": 0.0004521686484296432, "loss": 0.0, "step": 40980 }, { "epoch": 0.5604665313014884, "grad_norm": NaN, "learning_rate": 0.00045213354658982333, "loss": 0.0, "step": 40990 }, { "epoch": 0.5606032638048554, "grad_norm": NaN, "learning_rate": 0.0004520984332382204, "loss": 0.0, "step": 41000 }, { "epoch": 0.5607399963082224, "grad_norm": NaN, "learning_rate": 0.0004520633083768341, "loss": 0.0, "step": 41010 }, { "epoch": 0.5608767288115895, "grad_norm": NaN, "learning_rate": 0.00045202817200766494, "loss": 0.0, "step": 41020 }, { "epoch": 0.5610134613149564, "grad_norm": NaN, "learning_rate": 0.00045199302413271393, "loss": 0.0, "step": 41030 }, { "epoch": 0.5611501938183235, "grad_norm": NaN, "learning_rate": 0.00045195786475398273, "loss": 0.0, "step": 41040 }, { "epoch": 0.5612869263216905, "grad_norm": NaN, "learning_rate": 0.0004519226938734737, "loss": 0.0, "step": 41050 }, { "epoch": 0.5614236588250576, "grad_norm": NaN, "learning_rate": 0.0004518875114931901, "loss": 0.0, "step": 41060 }, { "epoch": 0.5615603913284246, "grad_norm": NaN, "learning_rate": 0.0004518523176151353, "loss": 0.0, "step": 41070 }, { "epoch": 0.5616971238317917, "grad_norm": NaN, "learning_rate": 0.0004518171122413138, "loss": 0.0, "step": 41080 }, { "epoch": 0.5618338563351587, "grad_norm": NaN, "learning_rate": 0.0004517818953737307, "loss": 0.0, "step": 41090 }, { "epoch": 0.5619705888385258, "grad_norm": NaN, "learning_rate": 0.0004517466670143914, "loss": 0.0, "step": 41100 }, { "epoch": 0.5621073213418928, "grad_norm": NaN, "learning_rate": 0.00045171142716530246, "loss": 0.0, "step": 41110 }, { "epoch": 0.5622440538452598, "grad_norm": NaN, "learning_rate": 0.0004516761758284706, "loss": 0.0, "step": 41120 }, { "epoch": 0.5623807863486269, "grad_norm": NaN, "learning_rate": 0.0004516409130059036, "loss": 0.0, "step": 41130 }, { "epoch": 0.5625175188519939, "grad_norm": NaN, "learning_rate": 0.0004516056386996097, "loss": 0.0, "step": 41140 }, { "epoch": 0.562654251355361, "grad_norm": NaN, "learning_rate": 0.0004515703529115977, "loss": 0.0, "step": 41150 }, { "epoch": 0.562790983858728, "grad_norm": NaN, "learning_rate": 0.0004515350556438773, "loss": 0.0, "step": 41160 }, { "epoch": 0.562927716362095, "grad_norm": NaN, "learning_rate": 0.0004514997468984587, "loss": 0.0, "step": 41170 }, { "epoch": 0.563064448865462, "grad_norm": NaN, "learning_rate": 0.00045146442667735285, "loss": 0.0, "step": 41180 }, { "epoch": 0.5632011813688291, "grad_norm": NaN, "learning_rate": 0.0004514290949825711, "loss": 0.0, "step": 41190 }, { "epoch": 0.5633379138721961, "grad_norm": NaN, "learning_rate": 0.0004513937518161258, "loss": 0.0, "step": 41200 }, { "epoch": 0.5634746463755632, "grad_norm": NaN, "learning_rate": 0.00045135839718002976, "loss": 0.0, "step": 41210 }, { "epoch": 0.5636113788789302, "grad_norm": NaN, "learning_rate": 0.0004513230310762965, "loss": 0.0, "step": 41220 }, { "epoch": 0.5637481113822972, "grad_norm": NaN, "learning_rate": 0.00045128765350694, "loss": 0.0, "step": 41230 }, { "epoch": 0.5638848438856643, "grad_norm": NaN, "learning_rate": 0.00045125226447397524, "loss": 0.0, "step": 41240 }, { "epoch": 0.5640215763890313, "grad_norm": NaN, "learning_rate": 0.00045121686397941763, "loss": 0.0, "step": 41250 }, { "epoch": 0.5641583088923984, "grad_norm": NaN, "learning_rate": 0.0004511814520252832, "loss": 0.0, "step": 41260 }, { "epoch": 0.5642950413957654, "grad_norm": NaN, "learning_rate": 0.0004511460286135889, "loss": 0.0, "step": 41270 }, { "epoch": 0.5644317738991325, "grad_norm": NaN, "learning_rate": 0.0004511105937463519, "loss": 0.0, "step": 41280 }, { "epoch": 0.5645685064024994, "grad_norm": NaN, "learning_rate": 0.0004510751474255904, "loss": 0.0, "step": 41290 }, { "epoch": 0.5647052389058665, "grad_norm": NaN, "learning_rate": 0.0004510396896533232, "loss": 0.0, "step": 41300 }, { "epoch": 0.5648419714092335, "grad_norm": NaN, "learning_rate": 0.00045100422043156945, "loss": 0.0, "step": 41310 }, { "epoch": 0.5649787039126006, "grad_norm": NaN, "learning_rate": 0.0004509687397623493, "loss": 0.0, "step": 41320 }, { "epoch": 0.5651154364159676, "grad_norm": NaN, "learning_rate": 0.0004509332476476835, "loss": 0.0, "step": 41330 }, { "epoch": 0.5652521689193346, "grad_norm": NaN, "learning_rate": 0.00045089774408959323, "loss": 0.0, "step": 41340 }, { "epoch": 0.5653889014227017, "grad_norm": NaN, "learning_rate": 0.0004508622290901005, "loss": 0.0, "step": 41350 }, { "epoch": 0.5655256339260687, "grad_norm": NaN, "learning_rate": 0.00045082670265122793, "loss": 0.0, "step": 41360 }, { "epoch": 0.5656623664294358, "grad_norm": NaN, "learning_rate": 0.0004507911647749989, "loss": 0.0, "step": 41370 }, { "epoch": 0.5657990989328028, "grad_norm": NaN, "learning_rate": 0.0004507556154634372, "loss": 0.0, "step": 41380 }, { "epoch": 0.5659358314361699, "grad_norm": NaN, "learning_rate": 0.00045072005471856756, "loss": 0.0, "step": 41390 }, { "epoch": 0.5660725639395369, "grad_norm": NaN, "learning_rate": 0.0004506844825424151, "loss": 0.0, "step": 41400 }, { "epoch": 0.566209296442904, "grad_norm": NaN, "learning_rate": 0.0004506488989370058, "loss": 0.0, "step": 41410 }, { "epoch": 0.566346028946271, "grad_norm": NaN, "learning_rate": 0.00045061330390436607, "loss": 0.0, "step": 41420 }, { "epoch": 0.566482761449638, "grad_norm": NaN, "learning_rate": 0.00045057769744652324, "loss": 0.0, "step": 41430 }, { "epoch": 0.566619493953005, "grad_norm": NaN, "learning_rate": 0.00045054207956550497, "loss": 0.0, "step": 41440 }, { "epoch": 0.566756226456372, "grad_norm": NaN, "learning_rate": 0.00045050645026333984, "loss": 0.0, "step": 41450 }, { "epoch": 0.5668929589597391, "grad_norm": NaN, "learning_rate": 0.00045047080954205705, "loss": 0.0, "step": 41460 }, { "epoch": 0.5670296914631061, "grad_norm": NaN, "learning_rate": 0.0004504351574036863, "loss": 0.0, "step": 41470 }, { "epoch": 0.5671664239664732, "grad_norm": NaN, "learning_rate": 0.00045039949385025803, "loss": 0.0, "step": 41480 }, { "epoch": 0.5673031564698402, "grad_norm": NaN, "learning_rate": 0.0004503638188838034, "loss": 0.0, "step": 41490 }, { "epoch": 0.5674398889732073, "grad_norm": NaN, "learning_rate": 0.00045032813250635414, "loss": 0.0, "step": 41500 }, { "epoch": 0.5675766214765743, "grad_norm": NaN, "learning_rate": 0.00045029243471994253, "loss": 0.0, "step": 41510 }, { "epoch": 0.5677133539799414, "grad_norm": NaN, "learning_rate": 0.00045025672552660175, "loss": 0.0, "step": 41520 }, { "epoch": 0.5678500864833084, "grad_norm": NaN, "learning_rate": 0.0004502210049283654, "loss": 0.0, "step": 41530 }, { "epoch": 0.5679868189866755, "grad_norm": NaN, "learning_rate": 0.00045018527292726773, "loss": 0.0, "step": 41540 }, { "epoch": 0.5681235514900425, "grad_norm": NaN, "learning_rate": 0.00045014952952534393, "loss": 0.0, "step": 41550 }, { "epoch": 0.5682602839934094, "grad_norm": NaN, "learning_rate": 0.00045011377472462953, "loss": 0.0, "step": 41560 }, { "epoch": 0.5683970164967765, "grad_norm": NaN, "learning_rate": 0.0004500780085271609, "loss": 0.0, "step": 41570 }, { "epoch": 0.5685337490001435, "grad_norm": NaN, "learning_rate": 0.00045004223093497475, "loss": 0.0, "step": 41580 }, { "epoch": 0.5686704815035106, "grad_norm": NaN, "learning_rate": 0.0004500064419501089, "loss": 0.0, "step": 41590 }, { "epoch": 0.5688072140068776, "grad_norm": NaN, "learning_rate": 0.00044997064157460155, "loss": 0.0, "step": 41600 }, { "epoch": 0.5689439465102447, "grad_norm": NaN, "learning_rate": 0.0004499348298104914, "loss": 0.0, "step": 41610 }, { "epoch": 0.5690806790136117, "grad_norm": NaN, "learning_rate": 0.0004498990066598181, "loss": 0.0, "step": 41620 }, { "epoch": 0.5692174115169788, "grad_norm": NaN, "learning_rate": 0.0004498631721246219, "loss": 0.0, "step": 41630 }, { "epoch": 0.5693541440203458, "grad_norm": NaN, "learning_rate": 0.00044982732620694354, "loss": 0.0, "step": 41640 }, { "epoch": 0.5694908765237129, "grad_norm": NaN, "learning_rate": 0.00044979146890882453, "loss": 0.0, "step": 41650 }, { "epoch": 0.5696276090270799, "grad_norm": NaN, "learning_rate": 0.000449755600232307, "loss": 0.0, "step": 41660 }, { "epoch": 0.5697643415304469, "grad_norm": NaN, "learning_rate": 0.0004497197201794336, "loss": 0.0, "step": 41670 }, { "epoch": 0.569901074033814, "grad_norm": NaN, "learning_rate": 0.0004496838287522479, "loss": 0.0, "step": 41680 }, { "epoch": 0.570037806537181, "grad_norm": NaN, "learning_rate": 0.0004496479259527939, "loss": 0.0, "step": 41690 }, { "epoch": 0.570174539040548, "grad_norm": NaN, "learning_rate": 0.00044961201178311626, "loss": 0.0, "step": 41700 }, { "epoch": 0.570311271543915, "grad_norm": NaN, "learning_rate": 0.0004495760862452605, "loss": 0.0, "step": 41710 }, { "epoch": 0.5704480040472821, "grad_norm": NaN, "learning_rate": 0.00044954014934127254, "loss": 0.0, "step": 41720 }, { "epoch": 0.5705847365506491, "grad_norm": NaN, "learning_rate": 0.00044950420107319893, "loss": 0.0, "step": 41730 }, { "epoch": 0.5707214690540162, "grad_norm": NaN, "learning_rate": 0.00044946824144308705, "loss": 0.0, "step": 41740 }, { "epoch": 0.5708582015573832, "grad_norm": NaN, "learning_rate": 0.000449432270452985, "loss": 0.0, "step": 41750 }, { "epoch": 0.5709949340607503, "grad_norm": NaN, "learning_rate": 0.0004493962881049411, "loss": 0.0, "step": 41760 }, { "epoch": 0.5711316665641173, "grad_norm": NaN, "learning_rate": 0.0004493602944010049, "loss": 0.0, "step": 41770 }, { "epoch": 0.5712683990674843, "grad_norm": NaN, "learning_rate": 0.0004493242893432261, "loss": 0.0, "step": 41780 }, { "epoch": 0.5714051315708514, "grad_norm": NaN, "learning_rate": 0.0004492882729336552, "loss": 0.0, "step": 41790 }, { "epoch": 0.5715418640742184, "grad_norm": NaN, "learning_rate": 0.0004492522451743435, "loss": 0.0, "step": 41800 }, { "epoch": 0.5716785965775855, "grad_norm": NaN, "learning_rate": 0.00044921620606734277, "loss": 0.0, "step": 41810 }, { "epoch": 0.5718153290809525, "grad_norm": NaN, "learning_rate": 0.0004491801556147055, "loss": 0.0, "step": 41820 }, { "epoch": 0.5719520615843195, "grad_norm": NaN, "learning_rate": 0.00044914409381848484, "loss": 0.0, "step": 41830 }, { "epoch": 0.5720887940876865, "grad_norm": NaN, "learning_rate": 0.00044910802068073445, "loss": 0.0, "step": 41840 }, { "epoch": 0.5722255265910536, "grad_norm": NaN, "learning_rate": 0.0004490719362035089, "loss": 0.0, "step": 41850 }, { "epoch": 0.5723622590944206, "grad_norm": NaN, "learning_rate": 0.0004490358403888632, "loss": 0.0, "step": 41860 }, { "epoch": 0.5724989915977877, "grad_norm": NaN, "learning_rate": 0.000448999733238853, "loss": 0.0, "step": 41870 }, { "epoch": 0.5726357241011547, "grad_norm": NaN, "learning_rate": 0.00044896361475553465, "loss": 0.0, "step": 41880 }, { "epoch": 0.5727724566045217, "grad_norm": NaN, "learning_rate": 0.00044892748494096527, "loss": 0.0, "step": 41890 }, { "epoch": 0.5729091891078888, "grad_norm": NaN, "learning_rate": 0.00044889134379720233, "loss": 0.0, "step": 41900 }, { "epoch": 0.5730459216112558, "grad_norm": NaN, "learning_rate": 0.00044885519132630426, "loss": 0.0, "step": 41910 }, { "epoch": 0.5731826541146229, "grad_norm": NaN, "learning_rate": 0.0004488190275303299, "loss": 0.0, "step": 41920 }, { "epoch": 0.5733193866179899, "grad_norm": NaN, "learning_rate": 0.0004487828524113389, "loss": 0.0, "step": 41930 }, { "epoch": 0.573456119121357, "grad_norm": NaN, "learning_rate": 0.0004487466659713914, "loss": 0.0, "step": 41940 }, { "epoch": 0.573592851624724, "grad_norm": NaN, "learning_rate": 0.00044871046821254833, "loss": 0.0, "step": 41950 }, { "epoch": 0.573729584128091, "grad_norm": NaN, "learning_rate": 0.00044867425913687123, "loss": 0.0, "step": 41960 }, { "epoch": 0.573866316631458, "grad_norm": NaN, "learning_rate": 0.00044863803874642215, "loss": 0.0, "step": 41970 }, { "epoch": 0.5740030491348251, "grad_norm": NaN, "learning_rate": 0.00044860180704326406, "loss": 0.0, "step": 41980 }, { "epoch": 0.5741397816381921, "grad_norm": NaN, "learning_rate": 0.0004485655640294601, "loss": 0.0, "step": 41990 }, { "epoch": 0.5742765141415591, "grad_norm": NaN, "learning_rate": 0.00044852930970707474, "loss": 0.0, "step": 42000 }, { "epoch": 0.5744132466449262, "grad_norm": NaN, "learning_rate": 0.0004484930440781725, "loss": 0.0, "step": 42010 }, { "epoch": 0.5745499791482932, "grad_norm": NaN, "learning_rate": 0.00044845676714481877, "loss": 0.0, "step": 42020 }, { "epoch": 0.5746867116516603, "grad_norm": NaN, "learning_rate": 0.00044842047890907954, "loss": 0.0, "step": 42030 }, { "epoch": 0.5748234441550273, "grad_norm": NaN, "learning_rate": 0.0004483841793730216, "loss": 0.0, "step": 42040 }, { "epoch": 0.5749601766583944, "grad_norm": NaN, "learning_rate": 0.00044834786853871223, "loss": 0.0, "step": 42050 }, { "epoch": 0.5750969091617614, "grad_norm": NaN, "learning_rate": 0.00044831154640821925, "loss": 0.0, "step": 42060 }, { "epoch": 0.5752336416651285, "grad_norm": NaN, "learning_rate": 0.00044827521298361137, "loss": 0.0, "step": 42070 }, { "epoch": 0.5753703741684955, "grad_norm": NaN, "learning_rate": 0.00044823886826695774, "loss": 0.0, "step": 42080 }, { "epoch": 0.5755071066718626, "grad_norm": NaN, "learning_rate": 0.00044820251226032837, "loss": 0.0, "step": 42090 }, { "epoch": 0.5756438391752295, "grad_norm": NaN, "learning_rate": 0.00044816614496579366, "loss": 0.0, "step": 42100 }, { "epoch": 0.5757805716785965, "grad_norm": NaN, "learning_rate": 0.0004481297663854249, "loss": 0.0, "step": 42110 }, { "epoch": 0.5759173041819636, "grad_norm": NaN, "learning_rate": 0.0004480933765212938, "loss": 0.0, "step": 42120 }, { "epoch": 0.5760540366853306, "grad_norm": NaN, "learning_rate": 0.00044805697537547283, "loss": 0.0, "step": 42130 }, { "epoch": 0.5761907691886977, "grad_norm": NaN, "learning_rate": 0.0004480205629500352, "loss": 0.0, "step": 42140 }, { "epoch": 0.5763275016920647, "grad_norm": NaN, "learning_rate": 0.00044798413924705444, "loss": 0.0, "step": 42150 }, { "epoch": 0.5764642341954318, "grad_norm": NaN, "learning_rate": 0.00044794770426860505, "loss": 0.0, "step": 42160 }, { "epoch": 0.5766009666987988, "grad_norm": NaN, "learning_rate": 0.0004479112580167621, "loss": 0.0, "step": 42170 }, { "epoch": 0.5767376992021659, "grad_norm": NaN, "learning_rate": 0.0004478748004936011, "loss": 0.0, "step": 42180 }, { "epoch": 0.5768744317055329, "grad_norm": NaN, "learning_rate": 0.0004478383317011985, "loss": 0.0, "step": 42190 }, { "epoch": 0.5770111642089, "grad_norm": NaN, "learning_rate": 0.00044780185164163124, "loss": 0.0, "step": 42200 }, { "epoch": 0.577147896712267, "grad_norm": NaN, "learning_rate": 0.00044776536031697686, "loss": 0.0, "step": 42210 }, { "epoch": 0.577284629215634, "grad_norm": NaN, "learning_rate": 0.0004477288577293136, "loss": 0.0, "step": 42220 }, { "epoch": 0.577421361719001, "grad_norm": NaN, "learning_rate": 0.0004476923438807202, "loss": 0.0, "step": 42230 }, { "epoch": 0.577558094222368, "grad_norm": NaN, "learning_rate": 0.00044765581877327643, "loss": 0.0, "step": 42240 }, { "epoch": 0.5776948267257351, "grad_norm": NaN, "learning_rate": 0.00044761928240906235, "loss": 0.0, "step": 42250 }, { "epoch": 0.5778315592291021, "grad_norm": NaN, "learning_rate": 0.00044758273479015865, "loss": 0.0, "step": 42260 }, { "epoch": 0.5779682917324692, "grad_norm": NaN, "learning_rate": 0.0004475461759186469, "loss": 0.0, "step": 42270 }, { "epoch": 0.5781050242358362, "grad_norm": NaN, "learning_rate": 0.00044750960579660907, "loss": 0.0, "step": 42280 }, { "epoch": 0.5782417567392033, "grad_norm": NaN, "learning_rate": 0.000447473024426128, "loss": 0.0, "step": 42290 }, { "epoch": 0.5783784892425703, "grad_norm": NaN, "learning_rate": 0.00044743643180928695, "loss": 0.0, "step": 42300 }, { "epoch": 0.5785152217459373, "grad_norm": NaN, "learning_rate": 0.0004473998279481699, "loss": 0.0, "step": 42310 }, { "epoch": 0.5786519542493044, "grad_norm": NaN, "learning_rate": 0.0004473632128448616, "loss": 0.0, "step": 42320 }, { "epoch": 0.5787886867526714, "grad_norm": NaN, "learning_rate": 0.0004473265865014472, "loss": 0.0, "step": 42330 }, { "epoch": 0.5789254192560385, "grad_norm": NaN, "learning_rate": 0.0004472899489200128, "loss": 0.0, "step": 42340 }, { "epoch": 0.5790621517594055, "grad_norm": NaN, "learning_rate": 0.00044725330010264477, "loss": 0.0, "step": 42350 }, { "epoch": 0.5791988842627726, "grad_norm": NaN, "learning_rate": 0.0004472166400514304, "loss": 0.0, "step": 42360 }, { "epoch": 0.5793356167661395, "grad_norm": NaN, "learning_rate": 0.00044717996876845757, "loss": 0.0, "step": 42370 }, { "epoch": 0.5794723492695066, "grad_norm": NaN, "learning_rate": 0.0004471432862558147, "loss": 0.0, "step": 42380 }, { "epoch": 0.5796090817728736, "grad_norm": NaN, "learning_rate": 0.0004471065925155908, "loss": 0.0, "step": 42390 }, { "epoch": 0.5797458142762407, "grad_norm": NaN, "learning_rate": 0.00044706988754987585, "loss": 0.0, "step": 42400 }, { "epoch": 0.5798825467796077, "grad_norm": NaN, "learning_rate": 0.0004470331713607602, "loss": 0.0, "step": 42410 }, { "epoch": 0.5800192792829747, "grad_norm": NaN, "learning_rate": 0.0004469964439503348, "loss": 0.0, "step": 42420 }, { "epoch": 0.5801560117863418, "grad_norm": NaN, "learning_rate": 0.0004469597053206913, "loss": 0.0, "step": 42430 }, { "epoch": 0.5802927442897088, "grad_norm": NaN, "learning_rate": 0.00044692295547392214, "loss": 0.0, "step": 42440 }, { "epoch": 0.5804294767930759, "grad_norm": NaN, "learning_rate": 0.0004468861944121202, "loss": 0.0, "step": 42450 }, { "epoch": 0.5805662092964429, "grad_norm": NaN, "learning_rate": 0.0004468494221373791, "loss": 0.0, "step": 42460 }, { "epoch": 0.58070294179981, "grad_norm": NaN, "learning_rate": 0.00044681263865179306, "loss": 0.0, "step": 42470 }, { "epoch": 0.580839674303177, "grad_norm": NaN, "learning_rate": 0.00044677584395745693, "loss": 0.0, "step": 42480 }, { "epoch": 0.580976406806544, "grad_norm": NaN, "learning_rate": 0.0004467390380564662, "loss": 0.0, "step": 42490 }, { "epoch": 0.581113139309911, "grad_norm": NaN, "learning_rate": 0.0004467022209509171, "loss": 0.0, "step": 42500 }, { "epoch": 0.5812498718132781, "grad_norm": NaN, "learning_rate": 0.00044666539264290637, "loss": 0.0, "step": 42510 }, { "epoch": 0.5813866043166451, "grad_norm": NaN, "learning_rate": 0.00044662855313453136, "loss": 0.0, "step": 42520 }, { "epoch": 0.5815233368200121, "grad_norm": NaN, "learning_rate": 0.00044659170242789025, "loss": 0.0, "step": 42530 }, { "epoch": 0.5816600693233792, "grad_norm": NaN, "learning_rate": 0.0004465548405250817, "loss": 0.0, "step": 42540 }, { "epoch": 0.5817968018267462, "grad_norm": NaN, "learning_rate": 0.00044651796742820504, "loss": 0.0, "step": 42550 }, { "epoch": 0.5819335343301133, "grad_norm": NaN, "learning_rate": 0.0004464810831393602, "loss": 0.0, "step": 42560 }, { "epoch": 0.5820702668334803, "grad_norm": NaN, "learning_rate": 0.00044644418766064787, "loss": 0.0, "step": 42570 }, { "epoch": 0.5822069993368474, "grad_norm": NaN, "learning_rate": 0.00044640728099416924, "loss": 0.0, "step": 42580 }, { "epoch": 0.5823437318402144, "grad_norm": NaN, "learning_rate": 0.00044637036314202624, "loss": 0.0, "step": 42590 }, { "epoch": 0.5824804643435815, "grad_norm": NaN, "learning_rate": 0.00044633343410632135, "loss": 0.0, "step": 42600 }, { "epoch": 0.5826171968469485, "grad_norm": NaN, "learning_rate": 0.0004462964938891577, "loss": 0.0, "step": 42610 }, { "epoch": 0.5827539293503156, "grad_norm": NaN, "learning_rate": 0.0004462595424926391, "loss": 0.0, "step": 42620 }, { "epoch": 0.5828906618536825, "grad_norm": NaN, "learning_rate": 0.0004462225799188701, "loss": 0.0, "step": 42630 }, { "epoch": 0.5830273943570495, "grad_norm": NaN, "learning_rate": 0.00044618560616995563, "loss": 0.0, "step": 42640 }, { "epoch": 0.5831641268604166, "grad_norm": NaN, "learning_rate": 0.0004461486212480015, "loss": 0.0, "step": 42650 }, { "epoch": 0.5833008593637836, "grad_norm": NaN, "learning_rate": 0.00044611162515511394, "loss": 0.0, "step": 42660 }, { "epoch": 0.5834375918671507, "grad_norm": NaN, "learning_rate": 0.0004460746178934001, "loss": 0.0, "step": 42670 }, { "epoch": 0.5835743243705177, "grad_norm": NaN, "learning_rate": 0.00044603759946496734, "loss": 0.0, "step": 42680 }, { "epoch": 0.5837110568738848, "grad_norm": NaN, "learning_rate": 0.00044600056987192417, "loss": 0.0, "step": 42690 }, { "epoch": 0.5838477893772518, "grad_norm": NaN, "learning_rate": 0.00044596352911637925, "loss": 0.0, "step": 42700 }, { "epoch": 0.5839845218806189, "grad_norm": NaN, "learning_rate": 0.0004459264772004423, "loss": 0.0, "step": 42710 }, { "epoch": 0.5841212543839859, "grad_norm": NaN, "learning_rate": 0.0004458894141262233, "loss": 0.0, "step": 42720 }, { "epoch": 0.584257986887353, "grad_norm": NaN, "learning_rate": 0.00044585233989583316, "loss": 0.0, "step": 42730 }, { "epoch": 0.58439471939072, "grad_norm": NaN, "learning_rate": 0.0004458152545113834, "loss": 0.0, "step": 42740 }, { "epoch": 0.584531451894087, "grad_norm": NaN, "learning_rate": 0.00044577815797498577, "loss": 0.0, "step": 42750 }, { "epoch": 0.584668184397454, "grad_norm": NaN, "learning_rate": 0.00044574105028875335, "loss": 0.0, "step": 42760 }, { "epoch": 0.584804916900821, "grad_norm": NaN, "learning_rate": 0.0004457039314547991, "loss": 0.0, "step": 42770 }, { "epoch": 0.5849416494041881, "grad_norm": NaN, "learning_rate": 0.0004456668014752373, "loss": 0.0, "step": 42780 }, { "epoch": 0.5850783819075551, "grad_norm": NaN, "learning_rate": 0.00044562966035218245, "loss": 0.0, "step": 42790 }, { "epoch": 0.5852151144109222, "grad_norm": NaN, "learning_rate": 0.00044559250808774963, "loss": 0.0, "step": 42800 }, { "epoch": 0.5853518469142892, "grad_norm": NaN, "learning_rate": 0.000445555344684055, "loss": 0.0, "step": 42810 }, { "epoch": 0.5854885794176563, "grad_norm": NaN, "learning_rate": 0.00044551817014321484, "loss": 0.0, "step": 42820 }, { "epoch": 0.5856253119210233, "grad_norm": NaN, "learning_rate": 0.00044548098446734634, "loss": 0.0, "step": 42830 }, { "epoch": 0.5857620444243904, "grad_norm": NaN, "learning_rate": 0.00044544378765856734, "loss": 0.0, "step": 42840 }, { "epoch": 0.5858987769277574, "grad_norm": NaN, "learning_rate": 0.00044540657971899625, "loss": 0.0, "step": 42850 }, { "epoch": 0.5860355094311244, "grad_norm": NaN, "learning_rate": 0.00044536936065075203, "loss": 0.0, "step": 42860 }, { "epoch": 0.5861722419344915, "grad_norm": NaN, "learning_rate": 0.0004453321304559543, "loss": 0.0, "step": 42870 }, { "epoch": 0.5863089744378585, "grad_norm": NaN, "learning_rate": 0.00044529488913672366, "loss": 0.0, "step": 42880 }, { "epoch": 0.5864457069412256, "grad_norm": NaN, "learning_rate": 0.00044525763669518075, "loss": 0.0, "step": 42890 }, { "epoch": 0.5865824394445925, "grad_norm": NaN, "learning_rate": 0.00044522037313344734, "loss": 0.0, "step": 42900 }, { "epoch": 0.5867191719479596, "grad_norm": NaN, "learning_rate": 0.00044518309845364545, "loss": 0.0, "step": 42910 }, { "epoch": 0.5868559044513266, "grad_norm": NaN, "learning_rate": 0.00044514581265789815, "loss": 0.0, "step": 42920 }, { "epoch": 0.5869926369546937, "grad_norm": NaN, "learning_rate": 0.0004451085157483288, "loss": 0.0, "step": 42930 }, { "epoch": 0.5871293694580607, "grad_norm": NaN, "learning_rate": 0.0004450712077270614, "loss": 0.0, "step": 42940 }, { "epoch": 0.5872661019614278, "grad_norm": NaN, "learning_rate": 0.00044503388859622097, "loss": 0.0, "step": 42950 }, { "epoch": 0.5874028344647948, "grad_norm": NaN, "learning_rate": 0.0004449965583579326, "loss": 0.0, "step": 42960 }, { "epoch": 0.5875395669681618, "grad_norm": NaN, "learning_rate": 0.0004449592170143225, "loss": 0.0, "step": 42970 }, { "epoch": 0.5876762994715289, "grad_norm": NaN, "learning_rate": 0.00044492186456751717, "loss": 0.0, "step": 42980 }, { "epoch": 0.5878130319748959, "grad_norm": NaN, "learning_rate": 0.00044488450101964395, "loss": 0.0, "step": 42990 }, { "epoch": 0.587949764478263, "grad_norm": NaN, "learning_rate": 0.0004448471263728308, "loss": 0.0, "step": 43000 }, { "epoch": 0.58808649698163, "grad_norm": NaN, "learning_rate": 0.0004448097406292062, "loss": 0.0, "step": 43010 }, { "epoch": 0.5882232294849971, "grad_norm": NaN, "learning_rate": 0.00044477234379089925, "loss": 0.0, "step": 43020 }, { "epoch": 0.588359961988364, "grad_norm": NaN, "learning_rate": 0.00044473493586003987, "loss": 0.0, "step": 43030 }, { "epoch": 0.5884966944917311, "grad_norm": NaN, "learning_rate": 0.00044469751683875845, "loss": 0.0, "step": 43040 }, { "epoch": 0.5886334269950981, "grad_norm": NaN, "learning_rate": 0.00044466008672918605, "loss": 0.0, "step": 43050 }, { "epoch": 0.5887701594984652, "grad_norm": NaN, "learning_rate": 0.00044462264553345437, "loss": 0.0, "step": 43060 }, { "epoch": 0.5889068920018322, "grad_norm": NaN, "learning_rate": 0.0004445851932536957, "loss": 0.0, "step": 43070 }, { "epoch": 0.5890436245051992, "grad_norm": NaN, "learning_rate": 0.00044454772989204307, "loss": 0.0, "step": 43080 }, { "epoch": 0.5891803570085663, "grad_norm": NaN, "learning_rate": 0.00044451025545063006, "loss": 0.0, "step": 43090 }, { "epoch": 0.5893170895119333, "grad_norm": NaN, "learning_rate": 0.0004444727699315908, "loss": 0.0, "step": 43100 }, { "epoch": 0.5894538220153004, "grad_norm": NaN, "learning_rate": 0.00044443527333706026, "loss": 0.0, "step": 43110 }, { "epoch": 0.5895905545186674, "grad_norm": NaN, "learning_rate": 0.00044439776566917376, "loss": 0.0, "step": 43120 }, { "epoch": 0.5897272870220345, "grad_norm": NaN, "learning_rate": 0.00044436024693006763, "loss": 0.0, "step": 43130 }, { "epoch": 0.5898640195254015, "grad_norm": NaN, "learning_rate": 0.00044432271712187846, "loss": 0.0, "step": 43140 }, { "epoch": 0.5900007520287686, "grad_norm": NaN, "learning_rate": 0.0004442851762467436, "loss": 0.0, "step": 43150 }, { "epoch": 0.5901374845321355, "grad_norm": NaN, "learning_rate": 0.00044424762430680123, "loss": 0.0, "step": 43160 }, { "epoch": 0.5902742170355026, "grad_norm": NaN, "learning_rate": 0.00044421006130418986, "loss": 0.0, "step": 43170 }, { "epoch": 0.5904109495388696, "grad_norm": NaN, "learning_rate": 0.00044417248724104867, "loss": 0.0, "step": 43180 }, { "epoch": 0.5905476820422366, "grad_norm": NaN, "learning_rate": 0.0004441349021195178, "loss": 0.0, "step": 43190 }, { "epoch": 0.5906844145456037, "grad_norm": NaN, "learning_rate": 0.00044409730594173745, "loss": 0.0, "step": 43200 }, { "epoch": 0.5908211470489707, "grad_norm": NaN, "learning_rate": 0.00044405969870984916, "loss": 0.0, "step": 43210 }, { "epoch": 0.5909578795523378, "grad_norm": NaN, "learning_rate": 0.0004440220804259944, "loss": 0.0, "step": 43220 }, { "epoch": 0.5910946120557048, "grad_norm": NaN, "learning_rate": 0.00044398445109231565, "loss": 0.0, "step": 43230 }, { "epoch": 0.5912313445590719, "grad_norm": NaN, "learning_rate": 0.0004439468107109561, "loss": 0.0, "step": 43240 }, { "epoch": 0.5913680770624389, "grad_norm": NaN, "learning_rate": 0.0004439091592840593, "loss": 0.0, "step": 43250 }, { "epoch": 0.591504809565806, "grad_norm": NaN, "learning_rate": 0.00044387149681376946, "loss": 0.0, "step": 43260 }, { "epoch": 0.591641542069173, "grad_norm": NaN, "learning_rate": 0.00044383382330223165, "loss": 0.0, "step": 43270 }, { "epoch": 0.5917782745725401, "grad_norm": NaN, "learning_rate": 0.0004437961387515914, "loss": 0.0, "step": 43280 }, { "epoch": 0.591915007075907, "grad_norm": NaN, "learning_rate": 0.00044375844316399496, "loss": 0.0, "step": 43290 }, { "epoch": 0.592051739579274, "grad_norm": NaN, "learning_rate": 0.00044372073654158905, "loss": 0.0, "step": 43300 }, { "epoch": 0.5921884720826411, "grad_norm": NaN, "learning_rate": 0.0004436830188865211, "loss": 0.0, "step": 43310 }, { "epoch": 0.5923252045860081, "grad_norm": NaN, "learning_rate": 0.00044364529020093923, "loss": 0.0, "step": 43320 }, { "epoch": 0.5924619370893752, "grad_norm": NaN, "learning_rate": 0.0004436075504869921, "loss": 0.0, "step": 43330 }, { "epoch": 0.5925986695927422, "grad_norm": NaN, "learning_rate": 0.00044356979974682914, "loss": 0.0, "step": 43340 }, { "epoch": 0.5927354020961093, "grad_norm": NaN, "learning_rate": 0.0004435320379826002, "loss": 0.0, "step": 43350 }, { "epoch": 0.5928721345994763, "grad_norm": NaN, "learning_rate": 0.00044349426519645584, "loss": 0.0, "step": 43360 }, { "epoch": 0.5930088671028434, "grad_norm": NaN, "learning_rate": 0.00044345648139054744, "loss": 0.0, "step": 43370 }, { "epoch": 0.5931455996062104, "grad_norm": NaN, "learning_rate": 0.0004434186865670267, "loss": 0.0, "step": 43380 }, { "epoch": 0.5932823321095775, "grad_norm": NaN, "learning_rate": 0.0004433808807280461, "loss": 0.0, "step": 43390 }, { "epoch": 0.5934190646129445, "grad_norm": NaN, "learning_rate": 0.00044334306387575875, "loss": 0.0, "step": 43400 }, { "epoch": 0.5935557971163115, "grad_norm": NaN, "learning_rate": 0.0004433052360123184, "loss": 0.0, "step": 43410 }, { "epoch": 0.5936925296196786, "grad_norm": NaN, "learning_rate": 0.0004432673971398794, "loss": 0.0, "step": 43420 }, { "epoch": 0.5938292621230455, "grad_norm": NaN, "learning_rate": 0.0004432295472605966, "loss": 0.0, "step": 43430 }, { "epoch": 0.5939659946264126, "grad_norm": NaN, "learning_rate": 0.0004431916863766258, "loss": 0.0, "step": 43440 }, { "epoch": 0.5941027271297796, "grad_norm": NaN, "learning_rate": 0.00044315381449012303, "loss": 0.0, "step": 43450 }, { "epoch": 0.5942394596331467, "grad_norm": NaN, "learning_rate": 0.0004431159316032454, "loss": 0.0, "step": 43460 }, { "epoch": 0.5943761921365137, "grad_norm": NaN, "learning_rate": 0.0004430780377181501, "loss": 0.0, "step": 43470 }, { "epoch": 0.5945129246398808, "grad_norm": NaN, "learning_rate": 0.0004430401328369954, "loss": 0.0, "step": 43480 }, { "epoch": 0.5946496571432478, "grad_norm": NaN, "learning_rate": 0.00044300221696194007, "loss": 0.0, "step": 43490 }, { "epoch": 0.5947863896466149, "grad_norm": NaN, "learning_rate": 0.0004429642900951434, "loss": 0.0, "step": 43500 }, { "epoch": 0.5949231221499819, "grad_norm": NaN, "learning_rate": 0.00044292635223876545, "loss": 0.0, "step": 43510 }, { "epoch": 0.5950598546533489, "grad_norm": NaN, "learning_rate": 0.00044288840339496663, "loss": 0.0, "step": 43520 }, { "epoch": 0.595196587156716, "grad_norm": NaN, "learning_rate": 0.0004428504435659084, "loss": 0.0, "step": 43530 }, { "epoch": 0.595333319660083, "grad_norm": NaN, "learning_rate": 0.0004428124727537526, "loss": 0.0, "step": 43540 }, { "epoch": 0.5954700521634501, "grad_norm": NaN, "learning_rate": 0.00044277449096066156, "loss": 0.0, "step": 43550 }, { "epoch": 0.595606784666817, "grad_norm": NaN, "learning_rate": 0.0004427364981887986, "loss": 0.0, "step": 43560 }, { "epoch": 0.5957435171701841, "grad_norm": NaN, "learning_rate": 0.00044269849444032727, "loss": 0.0, "step": 43570 }, { "epoch": 0.5958802496735511, "grad_norm": NaN, "learning_rate": 0.0004426604797174121, "loss": 0.0, "step": 43580 }, { "epoch": 0.5960169821769182, "grad_norm": NaN, "learning_rate": 0.00044262245402221803, "loss": 0.0, "step": 43590 }, { "epoch": 0.5961537146802852, "grad_norm": NaN, "learning_rate": 0.00044258441735691063, "loss": 0.0, "step": 43600 }, { "epoch": 0.5962904471836523, "grad_norm": NaN, "learning_rate": 0.00044254636972365614, "loss": 0.0, "step": 43610 }, { "epoch": 0.5964271796870193, "grad_norm": NaN, "learning_rate": 0.0004425083111246215, "loss": 0.0, "step": 43620 }, { "epoch": 0.5965639121903863, "grad_norm": NaN, "learning_rate": 0.00044247024156197413, "loss": 0.0, "step": 43630 }, { "epoch": 0.5967006446937534, "grad_norm": NaN, "learning_rate": 0.0004424321610378822, "loss": 0.0, "step": 43640 }, { "epoch": 0.5968373771971204, "grad_norm": NaN, "learning_rate": 0.0004423940695545143, "loss": 0.0, "step": 43650 }, { "epoch": 0.5969741097004875, "grad_norm": NaN, "learning_rate": 0.00044235596711404, "loss": 0.0, "step": 43660 }, { "epoch": 0.5971108422038545, "grad_norm": NaN, "learning_rate": 0.0004423178537186292, "loss": 0.0, "step": 43670 }, { "epoch": 0.5972475747072216, "grad_norm": NaN, "learning_rate": 0.00044227972937045245, "loss": 0.0, "step": 43680 }, { "epoch": 0.5973843072105885, "grad_norm": NaN, "learning_rate": 0.00044224159407168107, "loss": 0.0, "step": 43690 }, { "epoch": 0.5975210397139556, "grad_norm": NaN, "learning_rate": 0.00044220344782448687, "loss": 0.0, "step": 43700 }, { "epoch": 0.5976577722173226, "grad_norm": NaN, "learning_rate": 0.00044216529063104235, "loss": 0.0, "step": 43710 }, { "epoch": 0.5977945047206896, "grad_norm": NaN, "learning_rate": 0.0004421271224935206, "loss": 0.0, "step": 43720 }, { "epoch": 0.5979312372240567, "grad_norm": NaN, "learning_rate": 0.0004420889434140953, "loss": 0.0, "step": 43730 }, { "epoch": 0.5980679697274237, "grad_norm": NaN, "learning_rate": 0.0004420507533949409, "loss": 0.0, "step": 43740 }, { "epoch": 0.5982047022307908, "grad_norm": NaN, "learning_rate": 0.00044201255243823246, "loss": 0.0, "step": 43750 }, { "epoch": 0.5983414347341578, "grad_norm": NaN, "learning_rate": 0.0004419743405461453, "loss": 0.0, "step": 43760 }, { "epoch": 0.5984781672375249, "grad_norm": NaN, "learning_rate": 0.00044193611772085583, "loss": 0.0, "step": 43770 }, { "epoch": 0.5986148997408919, "grad_norm": NaN, "learning_rate": 0.0004418978839645409, "loss": 0.0, "step": 43780 }, { "epoch": 0.598751632244259, "grad_norm": NaN, "learning_rate": 0.0004418596392793778, "loss": 0.0, "step": 43790 }, { "epoch": 0.598888364747626, "grad_norm": NaN, "learning_rate": 0.00044182138366754485, "loss": 0.0, "step": 43800 }, { "epoch": 0.5990250972509931, "grad_norm": NaN, "learning_rate": 0.00044178311713122064, "loss": 0.0, "step": 43810 }, { "epoch": 0.59916182975436, "grad_norm": NaN, "learning_rate": 0.0004417448396725845, "loss": 0.0, "step": 43820 }, { "epoch": 0.599298562257727, "grad_norm": NaN, "learning_rate": 0.0004417065512938164, "loss": 0.0, "step": 43830 }, { "epoch": 0.5994352947610941, "grad_norm": NaN, "learning_rate": 0.00044166825199709695, "loss": 0.0, "step": 43840 }, { "epoch": 0.5995720272644611, "grad_norm": NaN, "learning_rate": 0.00044162994178460725, "loss": 0.0, "step": 43850 }, { "epoch": 0.5997087597678282, "grad_norm": NaN, "learning_rate": 0.0004415916206585292, "loss": 0.0, "step": 43860 }, { "epoch": 0.5998454922711952, "grad_norm": NaN, "learning_rate": 0.00044155328862104527, "loss": 0.0, "step": 43870 }, { "epoch": 0.5999822247745623, "grad_norm": NaN, "learning_rate": 0.00044151494567433837, "loss": 0.0, "step": 43880 }, { "epoch": 0.6001189572779293, "grad_norm": NaN, "learning_rate": 0.0004414765918205924, "loss": 0.0, "step": 43890 }, { "epoch": 0.6002556897812964, "grad_norm": NaN, "learning_rate": 0.0004414382270619915, "loss": 0.0, "step": 43900 }, { "epoch": 0.6003924222846634, "grad_norm": NaN, "learning_rate": 0.00044139985140072063, "loss": 0.0, "step": 43910 }, { "epoch": 0.6005291547880305, "grad_norm": NaN, "learning_rate": 0.00044136146483896537, "loss": 0.0, "step": 43920 }, { "epoch": 0.6006658872913975, "grad_norm": NaN, "learning_rate": 0.00044132306737891194, "loss": 0.0, "step": 43930 }, { "epoch": 0.6008026197947645, "grad_norm": NaN, "learning_rate": 0.0004412846590227469, "loss": 0.0, "step": 43940 }, { "epoch": 0.6009393522981316, "grad_norm": NaN, "learning_rate": 0.000441246239772658, "loss": 0.0, "step": 43950 }, { "epoch": 0.6010760848014985, "grad_norm": NaN, "learning_rate": 0.00044120780963083296, "loss": 0.0, "step": 43960 }, { "epoch": 0.6012128173048656, "grad_norm": NaN, "learning_rate": 0.0004411693685994607, "loss": 0.0, "step": 43970 }, { "epoch": 0.6013495498082326, "grad_norm": NaN, "learning_rate": 0.0004411309166807302, "loss": 0.0, "step": 43980 }, { "epoch": 0.6014862823115997, "grad_norm": NaN, "learning_rate": 0.0004410924538768315, "loss": 0.0, "step": 43990 }, { "epoch": 0.6016230148149667, "grad_norm": NaN, "learning_rate": 0.0004410539801899552, "loss": 0.0, "step": 44000 }, { "epoch": 0.6017597473183338, "grad_norm": NaN, "learning_rate": 0.00044101549562229224, "loss": 0.0, "step": 44010 }, { "epoch": 0.6018964798217008, "grad_norm": NaN, "learning_rate": 0.00044097700017603454, "loss": 0.0, "step": 44020 }, { "epoch": 0.6020332123250679, "grad_norm": NaN, "learning_rate": 0.00044093849385337436, "loss": 0.0, "step": 44030 }, { "epoch": 0.6021699448284349, "grad_norm": NaN, "learning_rate": 0.0004408999766565047, "loss": 0.0, "step": 44040 }, { "epoch": 0.6023066773318019, "grad_norm": NaN, "learning_rate": 0.0004408614485876192, "loss": 0.0, "step": 44050 }, { "epoch": 0.602443409835169, "grad_norm": NaN, "learning_rate": 0.00044082290964891213, "loss": 0.0, "step": 44060 }, { "epoch": 0.602580142338536, "grad_norm": NaN, "learning_rate": 0.00044078435984257826, "loss": 0.0, "step": 44070 }, { "epoch": 0.6027168748419031, "grad_norm": NaN, "learning_rate": 0.00044074579917081306, "loss": 0.0, "step": 44080 }, { "epoch": 0.60285360734527, "grad_norm": NaN, "learning_rate": 0.00044070722763581263, "loss": 0.0, "step": 44090 }, { "epoch": 0.6029903398486371, "grad_norm": NaN, "learning_rate": 0.00044066864523977363, "loss": 0.0, "step": 44100 }, { "epoch": 0.6031270723520041, "grad_norm": NaN, "learning_rate": 0.00044063005198489346, "loss": 0.0, "step": 44110 }, { "epoch": 0.6032638048553712, "grad_norm": NaN, "learning_rate": 0.00044059144787337, "loss": 0.0, "step": 44120 }, { "epoch": 0.6034005373587382, "grad_norm": NaN, "learning_rate": 0.00044055283290740183, "loss": 0.0, "step": 44130 }, { "epoch": 0.6035372698621053, "grad_norm": NaN, "learning_rate": 0.0004405142070891881, "loss": 0.0, "step": 44140 }, { "epoch": 0.6036740023654723, "grad_norm": NaN, "learning_rate": 0.00044047557042092866, "loss": 0.0, "step": 44150 }, { "epoch": 0.6038107348688393, "grad_norm": NaN, "learning_rate": 0.00044043692290482385, "loss": 0.0, "step": 44160 }, { "epoch": 0.6039474673722064, "grad_norm": NaN, "learning_rate": 0.0004403982645430748, "loss": 0.0, "step": 44170 }, { "epoch": 0.6040841998755734, "grad_norm": NaN, "learning_rate": 0.0004403595953378831, "loss": 0.0, "step": 44180 }, { "epoch": 0.6042209323789405, "grad_norm": NaN, "learning_rate": 0.00044032091529145093, "loss": 0.0, "step": 44190 }, { "epoch": 0.6043576648823075, "grad_norm": NaN, "learning_rate": 0.0004402822244059813, "loss": 0.0, "step": 44200 }, { "epoch": 0.6044943973856746, "grad_norm": NaN, "learning_rate": 0.00044024352268367764, "loss": 0.0, "step": 44210 }, { "epoch": 0.6046311298890416, "grad_norm": NaN, "learning_rate": 0.00044020481012674404, "loss": 0.0, "step": 44220 }, { "epoch": 0.6047678623924087, "grad_norm": NaN, "learning_rate": 0.00044016608673738526, "loss": 0.0, "step": 44230 }, { "epoch": 0.6049045948957756, "grad_norm": NaN, "learning_rate": 0.0004401273525178068, "loss": 0.0, "step": 44240 }, { "epoch": 0.6050413273991427, "grad_norm": NaN, "learning_rate": 0.0004400886074702144, "loss": 0.0, "step": 44250 }, { "epoch": 0.6051780599025097, "grad_norm": NaN, "learning_rate": 0.00044004985159681474, "loss": 0.0, "step": 44260 }, { "epoch": 0.6053147924058767, "grad_norm": NaN, "learning_rate": 0.000440011084899815, "loss": 0.0, "step": 44270 }, { "epoch": 0.6054515249092438, "grad_norm": NaN, "learning_rate": 0.00043997230738142306, "loss": 0.0, "step": 44280 }, { "epoch": 0.6055882574126108, "grad_norm": NaN, "learning_rate": 0.0004399335190438473, "loss": 0.0, "step": 44290 }, { "epoch": 0.6057249899159779, "grad_norm": NaN, "learning_rate": 0.00043989471988929675, "loss": 0.0, "step": 44300 }, { "epoch": 0.6058617224193449, "grad_norm": NaN, "learning_rate": 0.00043985590991998117, "loss": 0.0, "step": 44310 }, { "epoch": 0.605998454922712, "grad_norm": NaN, "learning_rate": 0.0004398170891381107, "loss": 0.0, "step": 44320 }, { "epoch": 0.606135187426079, "grad_norm": NaN, "learning_rate": 0.0004397782575458963, "loss": 0.0, "step": 44330 }, { "epoch": 0.6062719199294461, "grad_norm": NaN, "learning_rate": 0.00043973941514554953, "loss": 0.0, "step": 44340 }, { "epoch": 0.6064086524328131, "grad_norm": NaN, "learning_rate": 0.0004397005619392825, "loss": 0.0, "step": 44350 }, { "epoch": 0.6065453849361802, "grad_norm": NaN, "learning_rate": 0.00043966169792930787, "loss": 0.0, "step": 44360 }, { "epoch": 0.6066821174395471, "grad_norm": NaN, "learning_rate": 0.00043962282311783905, "loss": 0.0, "step": 44370 }, { "epoch": 0.6068188499429141, "grad_norm": NaN, "learning_rate": 0.00043958393750709014, "loss": 0.0, "step": 44380 }, { "epoch": 0.6069555824462812, "grad_norm": NaN, "learning_rate": 0.0004395450410992755, "loss": 0.0, "step": 44390 }, { "epoch": 0.6070923149496482, "grad_norm": NaN, "learning_rate": 0.00043950613389661053, "loss": 0.0, "step": 44400 }, { "epoch": 0.6072290474530153, "grad_norm": NaN, "learning_rate": 0.00043946721590131096, "loss": 0.0, "step": 44410 }, { "epoch": 0.6073657799563823, "grad_norm": NaN, "learning_rate": 0.0004394282871155932, "loss": 0.0, "step": 44420 }, { "epoch": 0.6075025124597494, "grad_norm": NaN, "learning_rate": 0.0004393893475416743, "loss": 0.0, "step": 44430 }, { "epoch": 0.6076392449631164, "grad_norm": NaN, "learning_rate": 0.000439350397181772, "loss": 0.0, "step": 44440 }, { "epoch": 0.6077759774664835, "grad_norm": NaN, "learning_rate": 0.0004393114360381045, "loss": 0.0, "step": 44450 }, { "epoch": 0.6079127099698505, "grad_norm": NaN, "learning_rate": 0.0004392724641128908, "loss": 0.0, "step": 44460 }, { "epoch": 0.6080494424732176, "grad_norm": NaN, "learning_rate": 0.00043923348140835017, "loss": 0.0, "step": 44470 }, { "epoch": 0.6081861749765846, "grad_norm": NaN, "learning_rate": 0.000439194487926703, "loss": 0.0, "step": 44480 }, { "epoch": 0.6083229074799515, "grad_norm": NaN, "learning_rate": 0.0004391554836701699, "loss": 0.0, "step": 44490 }, { "epoch": 0.6084596399833186, "grad_norm": NaN, "learning_rate": 0.00043911646864097214, "loss": 0.0, "step": 44500 }, { "epoch": 0.6085963724866856, "grad_norm": NaN, "learning_rate": 0.0004390774428413318, "loss": 0.0, "step": 44510 }, { "epoch": 0.6087331049900527, "grad_norm": NaN, "learning_rate": 0.00043903840627347146, "loss": 0.0, "step": 44520 }, { "epoch": 0.6088698374934197, "grad_norm": NaN, "learning_rate": 0.00043899935893961417, "loss": 0.0, "step": 44530 }, { "epoch": 0.6090065699967868, "grad_norm": NaN, "learning_rate": 0.00043896030084198393, "loss": 0.0, "step": 44540 }, { "epoch": 0.6091433025001538, "grad_norm": NaN, "learning_rate": 0.000438921231982805, "loss": 0.0, "step": 44550 }, { "epoch": 0.6092800350035209, "grad_norm": NaN, "learning_rate": 0.00043888215236430236, "loss": 0.0, "step": 44560 }, { "epoch": 0.6094167675068879, "grad_norm": NaN, "learning_rate": 0.0004388430619887018, "loss": 0.0, "step": 44570 }, { "epoch": 0.609553500010255, "grad_norm": NaN, "learning_rate": 0.0004388039608582295, "loss": 0.0, "step": 44580 }, { "epoch": 0.609690232513622, "grad_norm": NaN, "learning_rate": 0.0004387648489751123, "loss": 0.0, "step": 44590 }, { "epoch": 0.609826965016989, "grad_norm": NaN, "learning_rate": 0.0004387257263415777, "loss": 0.0, "step": 44600 }, { "epoch": 0.6099636975203561, "grad_norm": NaN, "learning_rate": 0.0004386865929598538, "loss": 0.0, "step": 44610 }, { "epoch": 0.610100430023723, "grad_norm": NaN, "learning_rate": 0.00043864744883216936, "loss": 0.0, "step": 44620 }, { "epoch": 0.6102371625270901, "grad_norm": NaN, "learning_rate": 0.0004386082939607535, "loss": 0.0, "step": 44630 }, { "epoch": 0.6103738950304571, "grad_norm": NaN, "learning_rate": 0.00043856912834783636, "loss": 0.0, "step": 44640 }, { "epoch": 0.6105106275338242, "grad_norm": NaN, "learning_rate": 0.00043852995199564833, "loss": 0.0, "step": 44650 }, { "epoch": 0.6106473600371912, "grad_norm": NaN, "learning_rate": 0.0004384907649064206, "loss": 0.0, "step": 44660 }, { "epoch": 0.6107840925405583, "grad_norm": NaN, "learning_rate": 0.000438451567082385, "loss": 0.0, "step": 44670 }, { "epoch": 0.6109208250439253, "grad_norm": NaN, "learning_rate": 0.00043841235852577374, "loss": 0.0, "step": 44680 }, { "epoch": 0.6110575575472924, "grad_norm": NaN, "learning_rate": 0.00043837313923882, "loss": 0.0, "step": 44690 }, { "epoch": 0.6111942900506594, "grad_norm": NaN, "learning_rate": 0.0004383339092237572, "loss": 0.0, "step": 44700 }, { "epoch": 0.6113310225540264, "grad_norm": NaN, "learning_rate": 0.00043829466848281965, "loss": 0.0, "step": 44710 }, { "epoch": 0.6114677550573935, "grad_norm": NaN, "learning_rate": 0.0004382554170182421, "loss": 0.0, "step": 44720 }, { "epoch": 0.6116044875607605, "grad_norm": NaN, "learning_rate": 0.00043821615483226003, "loss": 0.0, "step": 44730 }, { "epoch": 0.6117412200641276, "grad_norm": NaN, "learning_rate": 0.00043817688192710944, "loss": 0.0, "step": 44740 }, { "epoch": 0.6118779525674946, "grad_norm": NaN, "learning_rate": 0.00043813759830502695, "loss": 0.0, "step": 44750 }, { "epoch": 0.6120146850708617, "grad_norm": NaN, "learning_rate": 0.0004380983039682499, "loss": 0.0, "step": 44760 }, { "epoch": 0.6121514175742286, "grad_norm": NaN, "learning_rate": 0.00043805899891901604, "loss": 0.0, "step": 44770 }, { "epoch": 0.6122881500775957, "grad_norm": NaN, "learning_rate": 0.00043801968315956396, "loss": 0.0, "step": 44780 }, { "epoch": 0.6124248825809627, "grad_norm": NaN, "learning_rate": 0.0004379803566921327, "loss": 0.0, "step": 44790 }, { "epoch": 0.6125616150843298, "grad_norm": NaN, "learning_rate": 0.00043794101951896193, "loss": 0.0, "step": 44800 }, { "epoch": 0.6126983475876968, "grad_norm": NaN, "learning_rate": 0.000437901671642292, "loss": 0.0, "step": 44810 }, { "epoch": 0.6128350800910638, "grad_norm": NaN, "learning_rate": 0.00043786231306436373, "loss": 0.0, "step": 44820 }, { "epoch": 0.6129718125944309, "grad_norm": NaN, "learning_rate": 0.0004378229437874188, "loss": 0.0, "step": 44830 }, { "epoch": 0.6131085450977979, "grad_norm": NaN, "learning_rate": 0.0004377835638136993, "loss": 0.0, "step": 44840 }, { "epoch": 0.613245277601165, "grad_norm": NaN, "learning_rate": 0.0004377441731454479, "loss": 0.0, "step": 44850 }, { "epoch": 0.613382010104532, "grad_norm": NaN, "learning_rate": 0.000437704771784908, "loss": 0.0, "step": 44860 }, { "epoch": 0.6135187426078991, "grad_norm": NaN, "learning_rate": 0.00043766535973432355, "loss": 0.0, "step": 44870 }, { "epoch": 0.6136554751112661, "grad_norm": NaN, "learning_rate": 0.0004376259369959391, "loss": 0.0, "step": 44880 }, { "epoch": 0.6137922076146332, "grad_norm": NaN, "learning_rate": 0.00043758650357199986, "loss": 0.0, "step": 44890 }, { "epoch": 0.6139289401180001, "grad_norm": NaN, "learning_rate": 0.0004375470594647516, "loss": 0.0, "step": 44900 }, { "epoch": 0.6140656726213672, "grad_norm": NaN, "learning_rate": 0.00043750760467644077, "loss": 0.0, "step": 44910 }, { "epoch": 0.6142024051247342, "grad_norm": NaN, "learning_rate": 0.00043746813920931426, "loss": 0.0, "step": 44920 }, { "epoch": 0.6143391376281012, "grad_norm": NaN, "learning_rate": 0.00043742866306561984, "loss": 0.0, "step": 44930 }, { "epoch": 0.6144758701314683, "grad_norm": NaN, "learning_rate": 0.0004373891762476056, "loss": 0.0, "step": 44940 }, { "epoch": 0.6146126026348353, "grad_norm": NaN, "learning_rate": 0.0004373496787575204, "loss": 0.0, "step": 44950 }, { "epoch": 0.6147493351382024, "grad_norm": NaN, "learning_rate": 0.0004373101705976137, "loss": 0.0, "step": 44960 }, { "epoch": 0.6148860676415694, "grad_norm": NaN, "learning_rate": 0.0004372706517701356, "loss": 0.0, "step": 44970 }, { "epoch": 0.6150228001449365, "grad_norm": NaN, "learning_rate": 0.0004372311222773366, "loss": 0.0, "step": 44980 }, { "epoch": 0.6151595326483035, "grad_norm": NaN, "learning_rate": 0.000437191582121468, "loss": 0.0, "step": 44990 }, { "epoch": 0.6152962651516706, "grad_norm": NaN, "learning_rate": 0.00043715203130478185, "loss": 0.0, "step": 45000 }, { "epoch": 0.6154329976550376, "grad_norm": NaN, "learning_rate": 0.0004371124698295304, "loss": 0.0, "step": 45010 }, { "epoch": 0.6155697301584047, "grad_norm": NaN, "learning_rate": 0.00043707289769796684, "loss": 0.0, "step": 45020 }, { "epoch": 0.6157064626617716, "grad_norm": NaN, "learning_rate": 0.00043703331491234476, "loss": 0.0, "step": 45030 }, { "epoch": 0.6158431951651386, "grad_norm": NaN, "learning_rate": 0.0004369937214749186, "loss": 0.0, "step": 45040 }, { "epoch": 0.6159799276685057, "grad_norm": NaN, "learning_rate": 0.0004369541173879432, "loss": 0.0, "step": 45050 }, { "epoch": 0.6161166601718727, "grad_norm": NaN, "learning_rate": 0.000436914502653674, "loss": 0.0, "step": 45060 }, { "epoch": 0.6162533926752398, "grad_norm": NaN, "learning_rate": 0.0004368748772743672, "loss": 0.0, "step": 45070 }, { "epoch": 0.6163901251786068, "grad_norm": NaN, "learning_rate": 0.0004368352412522795, "loss": 0.0, "step": 45080 }, { "epoch": 0.6165268576819739, "grad_norm": NaN, "learning_rate": 0.00043679559458966816, "loss": 0.0, "step": 45090 }, { "epoch": 0.6166635901853409, "grad_norm": NaN, "learning_rate": 0.00043675593728879117, "loss": 0.0, "step": 45100 }, { "epoch": 0.616800322688708, "grad_norm": NaN, "learning_rate": 0.00043671626935190707, "loss": 0.0, "step": 45110 }, { "epoch": 0.616937055192075, "grad_norm": NaN, "learning_rate": 0.000436676590781275, "loss": 0.0, "step": 45120 }, { "epoch": 0.617073787695442, "grad_norm": NaN, "learning_rate": 0.0004366369015791547, "loss": 0.0, "step": 45130 }, { "epoch": 0.6172105201988091, "grad_norm": NaN, "learning_rate": 0.0004365972017478066, "loss": 0.0, "step": 45140 }, { "epoch": 0.617347252702176, "grad_norm": NaN, "learning_rate": 0.0004365574912894915, "loss": 0.0, "step": 45150 }, { "epoch": 0.6174839852055432, "grad_norm": NaN, "learning_rate": 0.00043651777020647107, "loss": 0.0, "step": 45160 }, { "epoch": 0.6176207177089101, "grad_norm": NaN, "learning_rate": 0.00043647803850100747, "loss": 0.0, "step": 45170 }, { "epoch": 0.6177574502122772, "grad_norm": NaN, "learning_rate": 0.00043643829617536346, "loss": 0.0, "step": 45180 }, { "epoch": 0.6178941827156442, "grad_norm": NaN, "learning_rate": 0.0004363985432318024, "loss": 0.0, "step": 45190 }, { "epoch": 0.6180309152190113, "grad_norm": NaN, "learning_rate": 0.00043635877967258834, "loss": 0.0, "step": 45200 }, { "epoch": 0.6181676477223783, "grad_norm": NaN, "learning_rate": 0.0004363190054999859, "loss": 0.0, "step": 45210 }, { "epoch": 0.6183043802257454, "grad_norm": NaN, "learning_rate": 0.00043627922071626013, "loss": 0.0, "step": 45220 }, { "epoch": 0.6184411127291124, "grad_norm": NaN, "learning_rate": 0.0004362394253236769, "loss": 0.0, "step": 45230 }, { "epoch": 0.6185778452324794, "grad_norm": NaN, "learning_rate": 0.0004361996193245026, "loss": 0.0, "step": 45240 }, { "epoch": 0.6187145777358465, "grad_norm": NaN, "learning_rate": 0.0004361598027210043, "loss": 0.0, "step": 45250 }, { "epoch": 0.6188513102392135, "grad_norm": NaN, "learning_rate": 0.00043611997551544956, "loss": 0.0, "step": 45260 }, { "epoch": 0.6189880427425806, "grad_norm": NaN, "learning_rate": 0.0004360801377101066, "loss": 0.0, "step": 45270 }, { "epoch": 0.6191247752459476, "grad_norm": NaN, "learning_rate": 0.0004360402893072442, "loss": 0.0, "step": 45280 }, { "epoch": 0.6192615077493147, "grad_norm": NaN, "learning_rate": 0.00043600043030913183, "loss": 0.0, "step": 45290 }, { "epoch": 0.6193982402526816, "grad_norm": NaN, "learning_rate": 0.0004359605607180396, "loss": 0.0, "step": 45300 }, { "epoch": 0.6195349727560487, "grad_norm": NaN, "learning_rate": 0.0004359206805362379, "loss": 0.0, "step": 45310 }, { "epoch": 0.6196717052594157, "grad_norm": NaN, "learning_rate": 0.00043588078976599813, "loss": 0.0, "step": 45320 }, { "epoch": 0.6198084377627828, "grad_norm": NaN, "learning_rate": 0.00043584088840959215, "loss": 0.0, "step": 45330 }, { "epoch": 0.6199451702661498, "grad_norm": NaN, "learning_rate": 0.0004358009764692923, "loss": 0.0, "step": 45340 }, { "epoch": 0.6200819027695168, "grad_norm": NaN, "learning_rate": 0.00043576105394737164, "loss": 0.0, "step": 45350 }, { "epoch": 0.6202186352728839, "grad_norm": NaN, "learning_rate": 0.00043572112084610393, "loss": 0.0, "step": 45360 }, { "epoch": 0.6203553677762509, "grad_norm": NaN, "learning_rate": 0.00043568117716776325, "loss": 0.0, "step": 45370 }, { "epoch": 0.620492100279618, "grad_norm": NaN, "learning_rate": 0.0004356412229146245, "loss": 0.0, "step": 45380 }, { "epoch": 0.620628832782985, "grad_norm": NaN, "learning_rate": 0.0004356012580889631, "loss": 0.0, "step": 45390 }, { "epoch": 0.6207655652863521, "grad_norm": NaN, "learning_rate": 0.00043556128269305527, "loss": 0.0, "step": 45400 }, { "epoch": 0.6209022977897191, "grad_norm": NaN, "learning_rate": 0.0004355212967291775, "loss": 0.0, "step": 45410 }, { "epoch": 0.6210390302930862, "grad_norm": NaN, "learning_rate": 0.0004354813001996071, "loss": 0.0, "step": 45420 }, { "epoch": 0.6211757627964531, "grad_norm": NaN, "learning_rate": 0.0004354412931066218, "loss": 0.0, "step": 45430 }, { "epoch": 0.6213124952998202, "grad_norm": NaN, "learning_rate": 0.0004354012754525003, "loss": 0.0, "step": 45440 }, { "epoch": 0.6214492278031872, "grad_norm": NaN, "learning_rate": 0.00043536124723952155, "loss": 0.0, "step": 45450 }, { "epoch": 0.6215859603065542, "grad_norm": NaN, "learning_rate": 0.0004353212084699651, "loss": 0.0, "step": 45460 }, { "epoch": 0.6217226928099213, "grad_norm": NaN, "learning_rate": 0.0004352811591461113, "loss": 0.0, "step": 45470 }, { "epoch": 0.6218594253132883, "grad_norm": NaN, "learning_rate": 0.0004352410992702411, "loss": 0.0, "step": 45480 }, { "epoch": 0.6219961578166554, "grad_norm": NaN, "learning_rate": 0.00043520102884463585, "loss": 0.0, "step": 45490 }, { "epoch": 0.6221328903200224, "grad_norm": NaN, "learning_rate": 0.0004351609478715777, "loss": 0.0, "step": 45500 }, { "epoch": 0.6222696228233895, "grad_norm": NaN, "learning_rate": 0.0004351208563533492, "loss": 0.0, "step": 45510 }, { "epoch": 0.6224063553267565, "grad_norm": NaN, "learning_rate": 0.0004350807542922337, "loss": 0.0, "step": 45520 }, { "epoch": 0.6225430878301236, "grad_norm": NaN, "learning_rate": 0.00043504064169051505, "loss": 0.0, "step": 45530 }, { "epoch": 0.6226798203334906, "grad_norm": NaN, "learning_rate": 0.00043500051855047773, "loss": 0.0, "step": 45540 }, { "epoch": 0.6228165528368577, "grad_norm": NaN, "learning_rate": 0.00043496038487440684, "loss": 0.0, "step": 45550 }, { "epoch": 0.6229532853402246, "grad_norm": NaN, "learning_rate": 0.00043492024066458797, "loss": 0.0, "step": 45560 }, { "epoch": 0.6230900178435916, "grad_norm": NaN, "learning_rate": 0.00043488008592330743, "loss": 0.0, "step": 45570 }, { "epoch": 0.6232267503469587, "grad_norm": NaN, "learning_rate": 0.00043483992065285204, "loss": 0.0, "step": 45580 }, { "epoch": 0.6233634828503257, "grad_norm": NaN, "learning_rate": 0.00043479974485550945, "loss": 0.0, "step": 45590 }, { "epoch": 0.6235002153536928, "grad_norm": NaN, "learning_rate": 0.00043475955853356743, "loss": 0.0, "step": 45600 }, { "epoch": 0.6236369478570598, "grad_norm": NaN, "learning_rate": 0.00043471936168931486, "loss": 0.0, "step": 45610 }, { "epoch": 0.6237736803604269, "grad_norm": NaN, "learning_rate": 0.00043467915432504096, "loss": 0.0, "step": 45620 }, { "epoch": 0.6239104128637939, "grad_norm": NaN, "learning_rate": 0.0004346389364430356, "loss": 0.0, "step": 45630 }, { "epoch": 0.624047145367161, "grad_norm": NaN, "learning_rate": 0.0004345987080455892, "loss": 0.0, "step": 45640 }, { "epoch": 0.624183877870528, "grad_norm": NaN, "learning_rate": 0.0004345584691349929, "loss": 0.0, "step": 45650 }, { "epoch": 0.6243206103738951, "grad_norm": NaN, "learning_rate": 0.0004345182197135382, "loss": 0.0, "step": 45660 }, { "epoch": 0.6244573428772621, "grad_norm": NaN, "learning_rate": 0.00043447795978351754, "loss": 0.0, "step": 45670 }, { "epoch": 0.624594075380629, "grad_norm": NaN, "learning_rate": 0.0004344376893472237, "loss": 0.0, "step": 45680 }, { "epoch": 0.6247308078839962, "grad_norm": NaN, "learning_rate": 0.0004343974084069501, "loss": 0.0, "step": 45690 }, { "epoch": 0.6248675403873631, "grad_norm": NaN, "learning_rate": 0.00043435711696499093, "loss": 0.0, "step": 45700 }, { "epoch": 0.6250042728907302, "grad_norm": NaN, "learning_rate": 0.0004343168150236407, "loss": 0.0, "step": 45710 }, { "epoch": 0.6251410053940972, "grad_norm": NaN, "learning_rate": 0.00043427650258519475, "loss": 0.0, "step": 45720 }, { "epoch": 0.6252777378974643, "grad_norm": NaN, "learning_rate": 0.0004342361796519488, "loss": 0.0, "step": 45730 }, { "epoch": 0.6254144704008313, "grad_norm": NaN, "learning_rate": 0.00043419584622619953, "loss": 0.0, "step": 45740 }, { "epoch": 0.6255512029041984, "grad_norm": NaN, "learning_rate": 0.0004341555023102438, "loss": 0.0, "step": 45750 }, { "epoch": 0.6256879354075654, "grad_norm": NaN, "learning_rate": 0.00043411514790637926, "loss": 0.0, "step": 45760 }, { "epoch": 0.6258246679109325, "grad_norm": NaN, "learning_rate": 0.0004340747830169042, "loss": 0.0, "step": 45770 }, { "epoch": 0.6259614004142995, "grad_norm": NaN, "learning_rate": 0.0004340344076441174, "loss": 0.0, "step": 45780 }, { "epoch": 0.6260981329176665, "grad_norm": NaN, "learning_rate": 0.00043399402179031835, "loss": 0.0, "step": 45790 }, { "epoch": 0.6262348654210336, "grad_norm": NaN, "learning_rate": 0.000433953625457807, "loss": 0.0, "step": 45800 }, { "epoch": 0.6263715979244006, "grad_norm": NaN, "learning_rate": 0.00043391321864888417, "loss": 0.0, "step": 45810 }, { "epoch": 0.6265083304277677, "grad_norm": NaN, "learning_rate": 0.0004338728013658509, "loss": 0.0, "step": 45820 }, { "epoch": 0.6266450629311346, "grad_norm": NaN, "learning_rate": 0.0004338323736110091, "loss": 0.0, "step": 45830 }, { "epoch": 0.6267817954345017, "grad_norm": NaN, "learning_rate": 0.0004337919353866611, "loss": 0.0, "step": 45840 }, { "epoch": 0.6269185279378687, "grad_norm": NaN, "learning_rate": 0.00043375148669510996, "loss": 0.0, "step": 45850 }, { "epoch": 0.6270552604412358, "grad_norm": NaN, "learning_rate": 0.00043371102753865927, "loss": 0.0, "step": 45860 }, { "epoch": 0.6271919929446028, "grad_norm": NaN, "learning_rate": 0.0004336705579196133, "loss": 0.0, "step": 45870 }, { "epoch": 0.6273287254479699, "grad_norm": NaN, "learning_rate": 0.00043363007784027677, "loss": 0.0, "step": 45880 }, { "epoch": 0.6274654579513369, "grad_norm": NaN, "learning_rate": 0.0004335895873029551, "loss": 0.0, "step": 45890 }, { "epoch": 0.6276021904547039, "grad_norm": NaN, "learning_rate": 0.00043354908630995433, "loss": 0.0, "step": 45900 }, { "epoch": 0.627738922958071, "grad_norm": NaN, "learning_rate": 0.000433508574863581, "loss": 0.0, "step": 45910 }, { "epoch": 0.627875655461438, "grad_norm": NaN, "learning_rate": 0.00043346805296614233, "loss": 0.0, "step": 45920 }, { "epoch": 0.6280123879648051, "grad_norm": NaN, "learning_rate": 0.000433427520619946, "loss": 0.0, "step": 45930 }, { "epoch": 0.6281491204681721, "grad_norm": NaN, "learning_rate": 0.0004333869778273005, "loss": 0.0, "step": 45940 }, { "epoch": 0.6282858529715392, "grad_norm": NaN, "learning_rate": 0.00043334642459051464, "loss": 0.0, "step": 45950 }, { "epoch": 0.6284225854749061, "grad_norm": NaN, "learning_rate": 0.0004333058609118983, "loss": 0.0, "step": 45960 }, { "epoch": 0.6285593179782732, "grad_norm": NaN, "learning_rate": 0.00043326528679376127, "loss": 0.0, "step": 45970 }, { "epoch": 0.6286960504816402, "grad_norm": NaN, "learning_rate": 0.00043322470223841446, "loss": 0.0, "step": 45980 }, { "epoch": 0.6288327829850073, "grad_norm": NaN, "learning_rate": 0.0004331841072481693, "loss": 0.0, "step": 45990 }, { "epoch": 0.6289695154883743, "grad_norm": NaN, "learning_rate": 0.0004331435018253376, "loss": 0.0, "step": 46000 }, { "epoch": 0.6291062479917413, "grad_norm": NaN, "learning_rate": 0.000433102885972232, "loss": 0.0, "step": 46010 }, { "epoch": 0.6292429804951084, "grad_norm": NaN, "learning_rate": 0.00043306225969116555, "loss": 0.0, "step": 46020 }, { "epoch": 0.6293797129984754, "grad_norm": NaN, "learning_rate": 0.0004330216229844519, "loss": 0.0, "step": 46030 }, { "epoch": 0.6295164455018425, "grad_norm": NaN, "learning_rate": 0.00043298097585440555, "loss": 0.0, "step": 46040 }, { "epoch": 0.6296531780052095, "grad_norm": NaN, "learning_rate": 0.0004329403183033413, "loss": 0.0, "step": 46050 }, { "epoch": 0.6297899105085766, "grad_norm": NaN, "learning_rate": 0.0004328996503335747, "loss": 0.0, "step": 46060 }, { "epoch": 0.6299266430119436, "grad_norm": NaN, "learning_rate": 0.00043285897194742174, "loss": 0.0, "step": 46070 }, { "epoch": 0.6300633755153107, "grad_norm": NaN, "learning_rate": 0.0004328182831471993, "loss": 0.0, "step": 46080 }, { "epoch": 0.6302001080186777, "grad_norm": NaN, "learning_rate": 0.00043277758393522447, "loss": 0.0, "step": 46090 }, { "epoch": 0.6303368405220448, "grad_norm": NaN, "learning_rate": 0.0004327368743138152, "loss": 0.0, "step": 46100 }, { "epoch": 0.6304735730254117, "grad_norm": NaN, "learning_rate": 0.00043269615428529005, "loss": 0.0, "step": 46110 }, { "epoch": 0.6306103055287787, "grad_norm": NaN, "learning_rate": 0.0004326554238519679, "loss": 0.0, "step": 46120 }, { "epoch": 0.6307470380321458, "grad_norm": NaN, "learning_rate": 0.0004326146830161686, "loss": 0.0, "step": 46130 }, { "epoch": 0.6308837705355128, "grad_norm": NaN, "learning_rate": 0.0004325739317802122, "loss": 0.0, "step": 46140 }, { "epoch": 0.6310205030388799, "grad_norm": NaN, "learning_rate": 0.00043253317014641966, "loss": 0.0, "step": 46150 }, { "epoch": 0.6311572355422469, "grad_norm": NaN, "learning_rate": 0.00043249239811711244, "loss": 0.0, "step": 46160 }, { "epoch": 0.631293968045614, "grad_norm": NaN, "learning_rate": 0.0004324516156946126, "loss": 0.0, "step": 46170 }, { "epoch": 0.631430700548981, "grad_norm": NaN, "learning_rate": 0.00043241082288124246, "loss": 0.0, "step": 46180 }, { "epoch": 0.6315674330523481, "grad_norm": NaN, "learning_rate": 0.00043237001967932556, "loss": 0.0, "step": 46190 }, { "epoch": 0.6317041655557151, "grad_norm": NaN, "learning_rate": 0.0004323292060911855, "loss": 0.0, "step": 46200 }, { "epoch": 0.6318408980590822, "grad_norm": NaN, "learning_rate": 0.00043228838211914684, "loss": 0.0, "step": 46210 }, { "epoch": 0.6319776305624492, "grad_norm": NaN, "learning_rate": 0.00043224754776553445, "loss": 0.0, "step": 46220 }, { "epoch": 0.6321143630658161, "grad_norm": NaN, "learning_rate": 0.0004322067030326738, "loss": 0.0, "step": 46230 }, { "epoch": 0.6322510955691832, "grad_norm": NaN, "learning_rate": 0.0004321658479228913, "loss": 0.0, "step": 46240 }, { "epoch": 0.6323878280725502, "grad_norm": NaN, "learning_rate": 0.0004321249824385135, "loss": 0.0, "step": 46250 }, { "epoch": 0.6325245605759173, "grad_norm": NaN, "learning_rate": 0.00043208410658186785, "loss": 0.0, "step": 46260 }, { "epoch": 0.6326612930792843, "grad_norm": NaN, "learning_rate": 0.00043204322035528226, "loss": 0.0, "step": 46270 }, { "epoch": 0.6327980255826514, "grad_norm": NaN, "learning_rate": 0.00043200232376108523, "loss": 0.0, "step": 46280 }, { "epoch": 0.6329347580860184, "grad_norm": NaN, "learning_rate": 0.00043196141680160594, "loss": 0.0, "step": 46290 }, { "epoch": 0.6330714905893855, "grad_norm": NaN, "learning_rate": 0.000431920499479174, "loss": 0.0, "step": 46300 }, { "epoch": 0.6332082230927525, "grad_norm": NaN, "learning_rate": 0.0004318795717961198, "loss": 0.0, "step": 46310 }, { "epoch": 0.6333449555961196, "grad_norm": NaN, "learning_rate": 0.00043183863375477417, "loss": 0.0, "step": 46320 }, { "epoch": 0.6334816880994866, "grad_norm": NaN, "learning_rate": 0.0004317976853574687, "loss": 0.0, "step": 46330 }, { "epoch": 0.6336184206028536, "grad_norm": NaN, "learning_rate": 0.0004317567266065353, "loss": 0.0, "step": 46340 }, { "epoch": 0.6337551531062207, "grad_norm": NaN, "learning_rate": 0.00043171575750430667, "loss": 0.0, "step": 46350 }, { "epoch": 0.6338918856095876, "grad_norm": NaN, "learning_rate": 0.00043167477805311613, "loss": 0.0, "step": 46360 }, { "epoch": 0.6340286181129547, "grad_norm": NaN, "learning_rate": 0.0004316337882552974, "loss": 0.0, "step": 46370 }, { "epoch": 0.6341653506163217, "grad_norm": NaN, "learning_rate": 0.000431592788113185, "loss": 0.0, "step": 46380 }, { "epoch": 0.6343020831196888, "grad_norm": NaN, "learning_rate": 0.000431551777629114, "loss": 0.0, "step": 46390 }, { "epoch": 0.6344388156230558, "grad_norm": NaN, "learning_rate": 0.00043151075680541984, "loss": 0.0, "step": 46400 }, { "epoch": 0.6345755481264229, "grad_norm": NaN, "learning_rate": 0.0004314697256444388, "loss": 0.0, "step": 46410 }, { "epoch": 0.6347122806297899, "grad_norm": NaN, "learning_rate": 0.0004314286841485077, "loss": 0.0, "step": 46420 }, { "epoch": 0.634849013133157, "grad_norm": NaN, "learning_rate": 0.0004313876323199638, "loss": 0.0, "step": 46430 }, { "epoch": 0.634985745636524, "grad_norm": NaN, "learning_rate": 0.0004313465701611452, "loss": 0.0, "step": 46440 }, { "epoch": 0.635122478139891, "grad_norm": NaN, "learning_rate": 0.0004313054976743903, "loss": 0.0, "step": 46450 }, { "epoch": 0.6352592106432581, "grad_norm": NaN, "learning_rate": 0.0004312644148620384, "loss": 0.0, "step": 46460 }, { "epoch": 0.6353959431466251, "grad_norm": NaN, "learning_rate": 0.000431223321726429, "loss": 0.0, "step": 46470 }, { "epoch": 0.6355326756499922, "grad_norm": NaN, "learning_rate": 0.00043118221826990254, "loss": 0.0, "step": 46480 }, { "epoch": 0.6356694081533592, "grad_norm": NaN, "learning_rate": 0.0004311411044948, "loss": 0.0, "step": 46490 }, { "epoch": 0.6358061406567262, "grad_norm": NaN, "learning_rate": 0.0004310999804034628, "loss": 0.0, "step": 46500 }, { "epoch": 0.6359428731600932, "grad_norm": NaN, "learning_rate": 0.00043105884599823284, "loss": 0.0, "step": 46510 }, { "epoch": 0.6360796056634603, "grad_norm": NaN, "learning_rate": 0.00043101770128145306, "loss": 0.0, "step": 46520 }, { "epoch": 0.6362163381668273, "grad_norm": NaN, "learning_rate": 0.00043097654625546655, "loss": 0.0, "step": 46530 }, { "epoch": 0.6363530706701944, "grad_norm": NaN, "learning_rate": 0.0004309353809226172, "loss": 0.0, "step": 46540 }, { "epoch": 0.6364898031735614, "grad_norm": NaN, "learning_rate": 0.0004308942052852494, "loss": 0.0, "step": 46550 }, { "epoch": 0.6366265356769284, "grad_norm": NaN, "learning_rate": 0.0004308530193457081, "loss": 0.0, "step": 46560 }, { "epoch": 0.6367632681802955, "grad_norm": NaN, "learning_rate": 0.000430811823106339, "loss": 0.0, "step": 46570 }, { "epoch": 0.6369000006836625, "grad_norm": NaN, "learning_rate": 0.00043077061656948836, "loss": 0.0, "step": 46580 }, { "epoch": 0.6370367331870296, "grad_norm": NaN, "learning_rate": 0.00043072939973750273, "loss": 0.0, "step": 46590 }, { "epoch": 0.6371734656903966, "grad_norm": NaN, "learning_rate": 0.00043068817261272965, "loss": 0.0, "step": 46600 }, { "epoch": 0.6373101981937637, "grad_norm": NaN, "learning_rate": 0.00043064693519751684, "loss": 0.0, "step": 46610 }, { "epoch": 0.6374469306971307, "grad_norm": NaN, "learning_rate": 0.0004306056874942131, "loss": 0.0, "step": 46620 }, { "epoch": 0.6375836632004978, "grad_norm": NaN, "learning_rate": 0.0004305644295051674, "loss": 0.0, "step": 46630 }, { "epoch": 0.6377203957038647, "grad_norm": NaN, "learning_rate": 0.0004305231612327294, "loss": 0.0, "step": 46640 }, { "epoch": 0.6378571282072317, "grad_norm": NaN, "learning_rate": 0.0004304818826792495, "loss": 0.0, "step": 46650 }, { "epoch": 0.6379938607105988, "grad_norm": NaN, "learning_rate": 0.0004304405938470785, "loss": 0.0, "step": 46660 }, { "epoch": 0.6381305932139658, "grad_norm": NaN, "learning_rate": 0.00043039929473856774, "loss": 0.0, "step": 46670 }, { "epoch": 0.6382673257173329, "grad_norm": NaN, "learning_rate": 0.0004303579853560695, "loss": 0.0, "step": 46680 }, { "epoch": 0.6384040582206999, "grad_norm": NaN, "learning_rate": 0.00043031666570193626, "loss": 0.0, "step": 46690 }, { "epoch": 0.638540790724067, "grad_norm": NaN, "learning_rate": 0.0004302753357785213, "loss": 0.0, "step": 46700 }, { "epoch": 0.638677523227434, "grad_norm": NaN, "learning_rate": 0.00043023399558817833, "loss": 0.0, "step": 46710 }, { "epoch": 0.6388142557308011, "grad_norm": NaN, "learning_rate": 0.00043019264513326184, "loss": 0.0, "step": 46720 }, { "epoch": 0.6389509882341681, "grad_norm": NaN, "learning_rate": 0.00043015128441612665, "loss": 0.0, "step": 46730 }, { "epoch": 0.6390877207375352, "grad_norm": NaN, "learning_rate": 0.0004301099134391284, "loss": 0.0, "step": 46740 }, { "epoch": 0.6392244532409022, "grad_norm": NaN, "learning_rate": 0.0004300685322046233, "loss": 0.0, "step": 46750 }, { "epoch": 0.6393611857442691, "grad_norm": NaN, "learning_rate": 0.000430027140714968, "loss": 0.0, "step": 46760 }, { "epoch": 0.6394979182476362, "grad_norm": NaN, "learning_rate": 0.0004299857389725197, "loss": 0.0, "step": 46770 }, { "epoch": 0.6396346507510032, "grad_norm": NaN, "learning_rate": 0.0004299443269796364, "loss": 0.0, "step": 46780 }, { "epoch": 0.6397713832543703, "grad_norm": NaN, "learning_rate": 0.0004299029047386766, "loss": 0.0, "step": 46790 }, { "epoch": 0.6399081157577373, "grad_norm": NaN, "learning_rate": 0.00042986147225199914, "loss": 0.0, "step": 46800 }, { "epoch": 0.6400448482611044, "grad_norm": NaN, "learning_rate": 0.0004298200295219639, "loss": 0.0, "step": 46810 }, { "epoch": 0.6401815807644714, "grad_norm": NaN, "learning_rate": 0.00042977857655093104, "loss": 0.0, "step": 46820 }, { "epoch": 0.6403183132678385, "grad_norm": NaN, "learning_rate": 0.0004297371133412613, "loss": 0.0, "step": 46830 }, { "epoch": 0.6404550457712055, "grad_norm": NaN, "learning_rate": 0.0004296956398953161, "loss": 0.0, "step": 46840 }, { "epoch": 0.6405917782745726, "grad_norm": NaN, "learning_rate": 0.0004296541562154575, "loss": 0.0, "step": 46850 }, { "epoch": 0.6407285107779396, "grad_norm": NaN, "learning_rate": 0.00042961266230404793, "loss": 0.0, "step": 46860 }, { "epoch": 0.6408652432813066, "grad_norm": NaN, "learning_rate": 0.0004295711581634505, "loss": 0.0, "step": 46870 }, { "epoch": 0.6410019757846737, "grad_norm": NaN, "learning_rate": 0.00042952964379602913, "loss": 0.0, "step": 46880 }, { "epoch": 0.6411387082880406, "grad_norm": NaN, "learning_rate": 0.0004294881192041478, "loss": 0.0, "step": 46890 }, { "epoch": 0.6412754407914077, "grad_norm": NaN, "learning_rate": 0.0004294465843901717, "loss": 0.0, "step": 46900 }, { "epoch": 0.6414121732947747, "grad_norm": NaN, "learning_rate": 0.0004294050393564661, "loss": 0.0, "step": 46910 }, { "epoch": 0.6415489057981418, "grad_norm": NaN, "learning_rate": 0.00042936348410539714, "loss": 0.0, "step": 46920 }, { "epoch": 0.6416856383015088, "grad_norm": NaN, "learning_rate": 0.00042932191863933147, "loss": 0.0, "step": 46930 }, { "epoch": 0.6418223708048759, "grad_norm": NaN, "learning_rate": 0.00042928034296063616, "loss": 0.0, "step": 46940 }, { "epoch": 0.6419591033082429, "grad_norm": NaN, "learning_rate": 0.00042923875707167914, "loss": 0.0, "step": 46950 }, { "epoch": 0.64209583581161, "grad_norm": NaN, "learning_rate": 0.00042919716097482877, "loss": 0.0, "step": 46960 }, { "epoch": 0.642232568314977, "grad_norm": NaN, "learning_rate": 0.00042915555467245393, "loss": 0.0, "step": 46970 }, { "epoch": 0.642369300818344, "grad_norm": NaN, "learning_rate": 0.00042911393816692426, "loss": 0.0, "step": 46980 }, { "epoch": 0.6425060333217111, "grad_norm": NaN, "learning_rate": 0.0004290723114606097, "loss": 0.0, "step": 46990 }, { "epoch": 0.6426427658250781, "grad_norm": NaN, "learning_rate": 0.0004290306745558812, "loss": 0.0, "step": 47000 }, { "epoch": 0.6427794983284452, "grad_norm": NaN, "learning_rate": 0.0004289890274551098, "loss": 0.0, "step": 47010 }, { "epoch": 0.6429162308318122, "grad_norm": NaN, "learning_rate": 0.00042894737016066747, "loss": 0.0, "step": 47020 }, { "epoch": 0.6430529633351793, "grad_norm": NaN, "learning_rate": 0.0004289057026749267, "loss": 0.0, "step": 47030 }, { "epoch": 0.6431896958385462, "grad_norm": NaN, "learning_rate": 0.0004288640250002604, "loss": 0.0, "step": 47040 }, { "epoch": 0.6433264283419133, "grad_norm": NaN, "learning_rate": 0.0004288223371390423, "loss": 0.0, "step": 47050 }, { "epoch": 0.6434631608452803, "grad_norm": NaN, "learning_rate": 0.0004287806390936464, "loss": 0.0, "step": 47060 }, { "epoch": 0.6435998933486474, "grad_norm": NaN, "learning_rate": 0.0004287389308664477, "loss": 0.0, "step": 47070 }, { "epoch": 0.6437366258520144, "grad_norm": NaN, "learning_rate": 0.0004286972124598213, "loss": 0.0, "step": 47080 }, { "epoch": 0.6438733583553814, "grad_norm": NaN, "learning_rate": 0.00042865548387614326, "loss": 0.0, "step": 47090 }, { "epoch": 0.6440100908587485, "grad_norm": NaN, "learning_rate": 0.00042861374511779006, "loss": 0.0, "step": 47100 }, { "epoch": 0.6441468233621155, "grad_norm": NaN, "learning_rate": 0.00042857199618713873, "loss": 0.0, "step": 47110 }, { "epoch": 0.6442835558654826, "grad_norm": NaN, "learning_rate": 0.000428530237086567, "loss": 0.0, "step": 47120 }, { "epoch": 0.6444202883688496, "grad_norm": NaN, "learning_rate": 0.00042848846781845306, "loss": 0.0, "step": 47130 }, { "epoch": 0.6445570208722167, "grad_norm": NaN, "learning_rate": 0.00042844668838517576, "loss": 0.0, "step": 47140 }, { "epoch": 0.6446937533755837, "grad_norm": NaN, "learning_rate": 0.0004284048987891145, "loss": 0.0, "step": 47150 }, { "epoch": 0.6448304858789508, "grad_norm": NaN, "learning_rate": 0.0004283630990326492, "loss": 0.0, "step": 47160 }, { "epoch": 0.6449672183823177, "grad_norm": NaN, "learning_rate": 0.00042832128911816046, "loss": 0.0, "step": 47170 }, { "epoch": 0.6451039508856848, "grad_norm": NaN, "learning_rate": 0.0004282794690480294, "loss": 0.0, "step": 47180 }, { "epoch": 0.6452406833890518, "grad_norm": NaN, "learning_rate": 0.0004282376388246377, "loss": 0.0, "step": 47190 }, { "epoch": 0.6453774158924188, "grad_norm": NaN, "learning_rate": 0.0004281957984503677, "loss": 0.0, "step": 47200 }, { "epoch": 0.6455141483957859, "grad_norm": NaN, "learning_rate": 0.00042815394792760226, "loss": 0.0, "step": 47210 }, { "epoch": 0.6456508808991529, "grad_norm": NaN, "learning_rate": 0.0004281120872587248, "loss": 0.0, "step": 47220 }, { "epoch": 0.64578761340252, "grad_norm": NaN, "learning_rate": 0.00042807021644611934, "loss": 0.0, "step": 47230 }, { "epoch": 0.645924345905887, "grad_norm": NaN, "learning_rate": 0.00042802833549217045, "loss": 0.0, "step": 47240 }, { "epoch": 0.6460610784092541, "grad_norm": NaN, "learning_rate": 0.0004279864443992634, "loss": 0.0, "step": 47250 }, { "epoch": 0.6461978109126211, "grad_norm": NaN, "learning_rate": 0.0004279445431697839, "loss": 0.0, "step": 47260 }, { "epoch": 0.6463345434159882, "grad_norm": NaN, "learning_rate": 0.00042790263180611835, "loss": 0.0, "step": 47270 }, { "epoch": 0.6464712759193552, "grad_norm": NaN, "learning_rate": 0.0004278607103106534, "loss": 0.0, "step": 47280 }, { "epoch": 0.6466080084227223, "grad_norm": NaN, "learning_rate": 0.0004278187786857769, "loss": 0.0, "step": 47290 }, { "epoch": 0.6467447409260892, "grad_norm": NaN, "learning_rate": 0.0004277768369338767, "loss": 0.0, "step": 47300 }, { "epoch": 0.6468814734294562, "grad_norm": NaN, "learning_rate": 0.0004277348850573414, "loss": 0.0, "step": 47310 }, { "epoch": 0.6470182059328233, "grad_norm": NaN, "learning_rate": 0.00042769292305856033, "loss": 0.0, "step": 47320 }, { "epoch": 0.6471549384361903, "grad_norm": NaN, "learning_rate": 0.0004276509509399233, "loss": 0.0, "step": 47330 }, { "epoch": 0.6472916709395574, "grad_norm": NaN, "learning_rate": 0.0004276089687038206, "loss": 0.0, "step": 47340 }, { "epoch": 0.6474284034429244, "grad_norm": NaN, "learning_rate": 0.0004275669763526432, "loss": 0.0, "step": 47350 }, { "epoch": 0.6475651359462915, "grad_norm": NaN, "learning_rate": 0.0004275249738887826, "loss": 0.0, "step": 47360 }, { "epoch": 0.6477018684496585, "grad_norm": NaN, "learning_rate": 0.00042748296131463095, "loss": 0.0, "step": 47370 }, { "epoch": 0.6478386009530256, "grad_norm": NaN, "learning_rate": 0.00042744093863258085, "loss": 0.0, "step": 47380 }, { "epoch": 0.6479753334563926, "grad_norm": NaN, "learning_rate": 0.00042739890584502573, "loss": 0.0, "step": 47390 }, { "epoch": 0.6481120659597597, "grad_norm": NaN, "learning_rate": 0.00042735686295435917, "loss": 0.0, "step": 47400 }, { "epoch": 0.6482487984631267, "grad_norm": NaN, "learning_rate": 0.0004273148099629757, "loss": 0.0, "step": 47410 }, { "epoch": 0.6483855309664937, "grad_norm": NaN, "learning_rate": 0.00042727274687327034, "loss": 0.0, "step": 47420 }, { "epoch": 0.6485222634698607, "grad_norm": NaN, "learning_rate": 0.00042723067368763846, "loss": 0.0, "step": 47430 }, { "epoch": 0.6486589959732277, "grad_norm": NaN, "learning_rate": 0.0004271885904084764, "loss": 0.0, "step": 47440 }, { "epoch": 0.6487957284765948, "grad_norm": NaN, "learning_rate": 0.00042714649703818073, "loss": 0.0, "step": 47450 }, { "epoch": 0.6489324609799618, "grad_norm": NaN, "learning_rate": 0.00042710439357914875, "loss": 0.0, "step": 47460 }, { "epoch": 0.6490691934833289, "grad_norm": NaN, "learning_rate": 0.00042706228003377836, "loss": 0.0, "step": 47470 }, { "epoch": 0.6492059259866959, "grad_norm": NaN, "learning_rate": 0.0004270201564044679, "loss": 0.0, "step": 47480 }, { "epoch": 0.649342658490063, "grad_norm": NaN, "learning_rate": 0.00042697802269361647, "loss": 0.0, "step": 47490 }, { "epoch": 0.64947939099343, "grad_norm": NaN, "learning_rate": 0.00042693587890362363, "loss": 0.0, "step": 47500 }, { "epoch": 0.6496161234967971, "grad_norm": NaN, "learning_rate": 0.0004268937250368894, "loss": 0.0, "step": 47510 }, { "epoch": 0.6497528560001641, "grad_norm": NaN, "learning_rate": 0.0004268515610958145, "loss": 0.0, "step": 47520 }, { "epoch": 0.6498895885035311, "grad_norm": NaN, "learning_rate": 0.00042680938708280044, "loss": 0.0, "step": 47530 }, { "epoch": 0.6500263210068982, "grad_norm": NaN, "learning_rate": 0.000426767203000249, "loss": 0.0, "step": 47540 }, { "epoch": 0.6501630535102652, "grad_norm": NaN, "learning_rate": 0.0004267250088505624, "loss": 0.0, "step": 47550 }, { "epoch": 0.6502997860136323, "grad_norm": NaN, "learning_rate": 0.00042668280463614396, "loss": 0.0, "step": 47560 }, { "epoch": 0.6504365185169992, "grad_norm": NaN, "learning_rate": 0.0004266405903593971, "loss": 0.0, "step": 47570 }, { "epoch": 0.6505732510203663, "grad_norm": NaN, "learning_rate": 0.00042659836602272606, "loss": 0.0, "step": 47580 }, { "epoch": 0.6507099835237333, "grad_norm": NaN, "learning_rate": 0.00042655613162853546, "loss": 0.0, "step": 47590 }, { "epoch": 0.6508467160271004, "grad_norm": NaN, "learning_rate": 0.0004265138871792308, "loss": 0.0, "step": 47600 }, { "epoch": 0.6509834485304674, "grad_norm": NaN, "learning_rate": 0.00042647163267721773, "loss": 0.0, "step": 47610 }, { "epoch": 0.6511201810338345, "grad_norm": NaN, "learning_rate": 0.00042642936812490283, "loss": 0.0, "step": 47620 }, { "epoch": 0.6512569135372015, "grad_norm": NaN, "learning_rate": 0.00042638709352469317, "loss": 0.0, "step": 47630 }, { "epoch": 0.6513936460405685, "grad_norm": NaN, "learning_rate": 0.0004263448088789962, "loss": 0.0, "step": 47640 }, { "epoch": 0.6515303785439356, "grad_norm": NaN, "learning_rate": 0.0004263025141902203, "loss": 0.0, "step": 47650 }, { "epoch": 0.6516671110473026, "grad_norm": NaN, "learning_rate": 0.00042626020946077395, "loss": 0.0, "step": 47660 }, { "epoch": 0.6518038435506697, "grad_norm": NaN, "learning_rate": 0.0004262178946930667, "loss": 0.0, "step": 47670 }, { "epoch": 0.6519405760540367, "grad_norm": NaN, "learning_rate": 0.00042617556988950824, "loss": 0.0, "step": 47680 }, { "epoch": 0.6520773085574038, "grad_norm": NaN, "learning_rate": 0.0004261332350525091, "loss": 0.0, "step": 47690 }, { "epoch": 0.6522140410607707, "grad_norm": NaN, "learning_rate": 0.00042609089018448044, "loss": 0.0, "step": 47700 }, { "epoch": 0.6523507735641378, "grad_norm": NaN, "learning_rate": 0.00042604853528783364, "loss": 0.0, "step": 47710 }, { "epoch": 0.6524875060675048, "grad_norm": NaN, "learning_rate": 0.000426006170364981, "loss": 0.0, "step": 47720 }, { "epoch": 0.6526242385708719, "grad_norm": NaN, "learning_rate": 0.0004259637954183353, "loss": 0.0, "step": 47730 }, { "epoch": 0.6527609710742389, "grad_norm": NaN, "learning_rate": 0.0004259214104503097, "loss": 0.0, "step": 47740 }, { "epoch": 0.6528977035776059, "grad_norm": NaN, "learning_rate": 0.0004258790154633182, "loss": 0.0, "step": 47750 }, { "epoch": 0.653034436080973, "grad_norm": NaN, "learning_rate": 0.0004258366104597752, "loss": 0.0, "step": 47760 }, { "epoch": 0.65317116858434, "grad_norm": NaN, "learning_rate": 0.00042579419544209573, "loss": 0.0, "step": 47770 }, { "epoch": 0.6533079010877071, "grad_norm": NaN, "learning_rate": 0.00042575177041269543, "loss": 0.0, "step": 47780 }, { "epoch": 0.6534446335910741, "grad_norm": NaN, "learning_rate": 0.00042570933537399043, "loss": 0.0, "step": 47790 }, { "epoch": 0.6535813660944412, "grad_norm": NaN, "learning_rate": 0.00042566689032839743, "loss": 0.0, "step": 47800 }, { "epoch": 0.6537180985978082, "grad_norm": NaN, "learning_rate": 0.00042562443527833377, "loss": 0.0, "step": 47810 }, { "epoch": 0.6538548311011753, "grad_norm": NaN, "learning_rate": 0.00042558197022621736, "loss": 0.0, "step": 47820 }, { "epoch": 0.6539915636045422, "grad_norm": NaN, "learning_rate": 0.00042553949517446654, "loss": 0.0, "step": 47830 }, { "epoch": 0.6541282961079093, "grad_norm": NaN, "learning_rate": 0.00042549701012550037, "loss": 0.0, "step": 47840 }, { "epoch": 0.6542650286112763, "grad_norm": NaN, "learning_rate": 0.0004254545150817385, "loss": 0.0, "step": 47850 }, { "epoch": 0.6544017611146433, "grad_norm": NaN, "learning_rate": 0.00042541201004560103, "loss": 0.0, "step": 47860 }, { "epoch": 0.6545384936180104, "grad_norm": NaN, "learning_rate": 0.00042536949501950866, "loss": 0.0, "step": 47870 }, { "epoch": 0.6546752261213774, "grad_norm": NaN, "learning_rate": 0.0004253269700058827, "loss": 0.0, "step": 47880 }, { "epoch": 0.6548119586247445, "grad_norm": NaN, "learning_rate": 0.000425284435007145, "loss": 0.0, "step": 47890 }, { "epoch": 0.6549486911281115, "grad_norm": NaN, "learning_rate": 0.000425241890025718, "loss": 0.0, "step": 47900 }, { "epoch": 0.6550854236314786, "grad_norm": NaN, "learning_rate": 0.00042519933506402467, "loss": 0.0, "step": 47910 }, { "epoch": 0.6552221561348456, "grad_norm": NaN, "learning_rate": 0.0004251567701244886, "loss": 0.0, "step": 47920 }, { "epoch": 0.6553588886382127, "grad_norm": NaN, "learning_rate": 0.00042511419520953396, "loss": 0.0, "step": 47930 }, { "epoch": 0.6554956211415797, "grad_norm": NaN, "learning_rate": 0.0004250716103215854, "loss": 0.0, "step": 47940 }, { "epoch": 0.6556323536449468, "grad_norm": NaN, "learning_rate": 0.00042502901546306805, "loss": 0.0, "step": 47950 }, { "epoch": 0.6557690861483138, "grad_norm": NaN, "learning_rate": 0.00042498641063640804, "loss": 0.0, "step": 47960 }, { "epoch": 0.6559058186516807, "grad_norm": NaN, "learning_rate": 0.0004249437958440315, "loss": 0.0, "step": 47970 }, { "epoch": 0.6560425511550478, "grad_norm": NaN, "learning_rate": 0.0004249011710883656, "loss": 0.0, "step": 47980 }, { "epoch": 0.6561792836584148, "grad_norm": NaN, "learning_rate": 0.0004248585363718378, "loss": 0.0, "step": 47990 }, { "epoch": 0.6563160161617819, "grad_norm": NaN, "learning_rate": 0.00042481589169687616, "loss": 0.0, "step": 48000 }, { "epoch": 0.6564527486651489, "grad_norm": NaN, "learning_rate": 0.0004247732370659094, "loss": 0.0, "step": 48010 }, { "epoch": 0.656589481168516, "grad_norm": NaN, "learning_rate": 0.00042473057248136674, "loss": 0.0, "step": 48020 }, { "epoch": 0.656726213671883, "grad_norm": NaN, "learning_rate": 0.0004246878979456781, "loss": 0.0, "step": 48030 }, { "epoch": 0.6568629461752501, "grad_norm": NaN, "learning_rate": 0.00042464521346127364, "loss": 0.0, "step": 48040 }, { "epoch": 0.6569996786786171, "grad_norm": NaN, "learning_rate": 0.0004246025190305845, "loss": 0.0, "step": 48050 }, { "epoch": 0.6571364111819841, "grad_norm": NaN, "learning_rate": 0.0004245598146560421, "loss": 0.0, "step": 48060 }, { "epoch": 0.6572731436853512, "grad_norm": NaN, "learning_rate": 0.00042451710034007844, "loss": 0.0, "step": 48070 }, { "epoch": 0.6574098761887182, "grad_norm": NaN, "learning_rate": 0.0004244743760851263, "loss": 0.0, "step": 48080 }, { "epoch": 0.6575466086920853, "grad_norm": NaN, "learning_rate": 0.00042443164189361884, "loss": 0.0, "step": 48090 }, { "epoch": 0.6576833411954522, "grad_norm": NaN, "learning_rate": 0.0004243888977679897, "loss": 0.0, "step": 48100 }, { "epoch": 0.6578200736988193, "grad_norm": NaN, "learning_rate": 0.0004243461437106734, "loss": 0.0, "step": 48110 }, { "epoch": 0.6579568062021863, "grad_norm": NaN, "learning_rate": 0.00042430337972410476, "loss": 0.0, "step": 48120 }, { "epoch": 0.6580935387055534, "grad_norm": NaN, "learning_rate": 0.0004242606058107192, "loss": 0.0, "step": 48130 }, { "epoch": 0.6582302712089204, "grad_norm": NaN, "learning_rate": 0.00042421782197295287, "loss": 0.0, "step": 48140 }, { "epoch": 0.6583670037122875, "grad_norm": NaN, "learning_rate": 0.0004241750282132424, "loss": 0.0, "step": 48150 }, { "epoch": 0.6585037362156545, "grad_norm": NaN, "learning_rate": 0.00042413222453402475, "loss": 0.0, "step": 48160 }, { "epoch": 0.6586404687190215, "grad_norm": NaN, "learning_rate": 0.0004240894109377378, "loss": 0.0, "step": 48170 }, { "epoch": 0.6587772012223886, "grad_norm": NaN, "learning_rate": 0.0004240465874268198, "loss": 0.0, "step": 48180 }, { "epoch": 0.6589139337257556, "grad_norm": NaN, "learning_rate": 0.00042400375400370957, "loss": 0.0, "step": 48190 }, { "epoch": 0.6590506662291227, "grad_norm": NaN, "learning_rate": 0.0004239609106708466, "loss": 0.0, "step": 48200 }, { "epoch": 0.6591873987324897, "grad_norm": NaN, "learning_rate": 0.00042391805743067086, "loss": 0.0, "step": 48210 }, { "epoch": 0.6593241312358568, "grad_norm": NaN, "learning_rate": 0.0004238751942856229, "loss": 0.0, "step": 48220 }, { "epoch": 0.6594608637392237, "grad_norm": NaN, "learning_rate": 0.00042383232123814374, "loss": 0.0, "step": 48230 }, { "epoch": 0.6595975962425908, "grad_norm": NaN, "learning_rate": 0.00042378943829067523, "loss": 0.0, "step": 48240 }, { "epoch": 0.6597343287459578, "grad_norm": NaN, "learning_rate": 0.00042374654544565947, "loss": 0.0, "step": 48250 }, { "epoch": 0.6598710612493249, "grad_norm": NaN, "learning_rate": 0.0004237036427055394, "loss": 0.0, "step": 48260 }, { "epoch": 0.6600077937526919, "grad_norm": NaN, "learning_rate": 0.0004236607300727583, "loss": 0.0, "step": 48270 }, { "epoch": 0.6601445262560589, "grad_norm": NaN, "learning_rate": 0.0004236178075497601, "loss": 0.0, "step": 48280 }, { "epoch": 0.660281258759426, "grad_norm": NaN, "learning_rate": 0.0004235748751389893, "loss": 0.0, "step": 48290 }, { "epoch": 0.660417991262793, "grad_norm": NaN, "learning_rate": 0.0004235319328428909, "loss": 0.0, "step": 48300 }, { "epoch": 0.6605547237661601, "grad_norm": NaN, "learning_rate": 0.0004234889806639107, "loss": 0.0, "step": 48310 }, { "epoch": 0.6606914562695271, "grad_norm": NaN, "learning_rate": 0.0004234460186044948, "loss": 0.0, "step": 48320 }, { "epoch": 0.6608281887728942, "grad_norm": NaN, "learning_rate": 0.0004234030466670899, "loss": 0.0, "step": 48330 }, { "epoch": 0.6609649212762612, "grad_norm": NaN, "learning_rate": 0.00042336006485414334, "loss": 0.0, "step": 48340 }, { "epoch": 0.6611016537796283, "grad_norm": NaN, "learning_rate": 0.00042331707316810296, "loss": 0.0, "step": 48350 }, { "epoch": 0.6612383862829952, "grad_norm": NaN, "learning_rate": 0.0004232740716114173, "loss": 0.0, "step": 48360 }, { "epoch": 0.6613751187863623, "grad_norm": NaN, "learning_rate": 0.0004232310601865352, "loss": 0.0, "step": 48370 }, { "epoch": 0.6615118512897293, "grad_norm": NaN, "learning_rate": 0.0004231880388959063, "loss": 0.0, "step": 48380 }, { "epoch": 0.6616485837930963, "grad_norm": NaN, "learning_rate": 0.0004231450077419808, "loss": 0.0, "step": 48390 }, { "epoch": 0.6617853162964634, "grad_norm": NaN, "learning_rate": 0.0004231019667272093, "loss": 0.0, "step": 48400 }, { "epoch": 0.6619220487998304, "grad_norm": NaN, "learning_rate": 0.000423058915854043, "loss": 0.0, "step": 48410 }, { "epoch": 0.6620587813031975, "grad_norm": NaN, "learning_rate": 0.0004230158551249339, "loss": 0.0, "step": 48420 }, { "epoch": 0.6621955138065645, "grad_norm": NaN, "learning_rate": 0.0004229727845423341, "loss": 0.0, "step": 48430 }, { "epoch": 0.6623322463099316, "grad_norm": NaN, "learning_rate": 0.0004229297041086967, "loss": 0.0, "step": 48440 }, { "epoch": 0.6624689788132986, "grad_norm": NaN, "learning_rate": 0.0004228866138264752, "loss": 0.0, "step": 48450 }, { "epoch": 0.6626057113166657, "grad_norm": NaN, "learning_rate": 0.00042284351369812354, "loss": 0.0, "step": 48460 }, { "epoch": 0.6627424438200327, "grad_norm": NaN, "learning_rate": 0.0004228004037260964, "loss": 0.0, "step": 48470 }, { "epoch": 0.6628791763233998, "grad_norm": NaN, "learning_rate": 0.00042275728391284896, "loss": 0.0, "step": 48480 }, { "epoch": 0.6630159088267668, "grad_norm": NaN, "learning_rate": 0.00042271415426083694, "loss": 0.0, "step": 48490 }, { "epoch": 0.6631526413301337, "grad_norm": NaN, "learning_rate": 0.00042267101477251655, "loss": 0.0, "step": 48500 }, { "epoch": 0.6632893738335008, "grad_norm": NaN, "learning_rate": 0.0004226278654503448, "loss": 0.0, "step": 48510 }, { "epoch": 0.6634261063368678, "grad_norm": NaN, "learning_rate": 0.00042258470629677903, "loss": 0.0, "step": 48520 }, { "epoch": 0.6635628388402349, "grad_norm": NaN, "learning_rate": 0.0004225415373142772, "loss": 0.0, "step": 48530 }, { "epoch": 0.6636995713436019, "grad_norm": NaN, "learning_rate": 0.0004224983585052978, "loss": 0.0, "step": 48540 }, { "epoch": 0.663836303846969, "grad_norm": NaN, "learning_rate": 0.00042245516987230006, "loss": 0.0, "step": 48550 }, { "epoch": 0.663973036350336, "grad_norm": NaN, "learning_rate": 0.0004224119714177435, "loss": 0.0, "step": 48560 }, { "epoch": 0.6641097688537031, "grad_norm": NaN, "learning_rate": 0.00042236876314408835, "loss": 0.0, "step": 48570 }, { "epoch": 0.6642465013570701, "grad_norm": NaN, "learning_rate": 0.0004223255450537955, "loss": 0.0, "step": 48580 }, { "epoch": 0.6643832338604372, "grad_norm": NaN, "learning_rate": 0.00042228231714932607, "loss": 0.0, "step": 48590 }, { "epoch": 0.6645199663638042, "grad_norm": NaN, "learning_rate": 0.0004222390794331422, "loss": 0.0, "step": 48600 }, { "epoch": 0.6646566988671712, "grad_norm": NaN, "learning_rate": 0.00042219583190770607, "loss": 0.0, "step": 48610 }, { "epoch": 0.6647934313705383, "grad_norm": NaN, "learning_rate": 0.0004221525745754809, "loss": 0.0, "step": 48620 }, { "epoch": 0.6649301638739052, "grad_norm": NaN, "learning_rate": 0.00042210930743893016, "loss": 0.0, "step": 48630 }, { "epoch": 0.6650668963772723, "grad_norm": NaN, "learning_rate": 0.00042206603050051793, "loss": 0.0, "step": 48640 }, { "epoch": 0.6652036288806393, "grad_norm": NaN, "learning_rate": 0.00042202274376270906, "loss": 0.0, "step": 48650 }, { "epoch": 0.6653403613840064, "grad_norm": NaN, "learning_rate": 0.00042197944722796853, "loss": 0.0, "step": 48660 }, { "epoch": 0.6654770938873734, "grad_norm": NaN, "learning_rate": 0.0004219361408987624, "loss": 0.0, "step": 48670 }, { "epoch": 0.6656138263907405, "grad_norm": NaN, "learning_rate": 0.0004218928247775569, "loss": 0.0, "step": 48680 }, { "epoch": 0.6657505588941075, "grad_norm": NaN, "learning_rate": 0.00042184949886681897, "loss": 0.0, "step": 48690 }, { "epoch": 0.6658872913974746, "grad_norm": NaN, "learning_rate": 0.000421806163169016, "loss": 0.0, "step": 48700 }, { "epoch": 0.6660240239008416, "grad_norm": NaN, "learning_rate": 0.0004217628176866161, "loss": 0.0, "step": 48710 }, { "epoch": 0.6661607564042086, "grad_norm": NaN, "learning_rate": 0.0004217194624220878, "loss": 0.0, "step": 48720 }, { "epoch": 0.6662974889075757, "grad_norm": NaN, "learning_rate": 0.00042167609737790035, "loss": 0.0, "step": 48730 }, { "epoch": 0.6664342214109427, "grad_norm": NaN, "learning_rate": 0.00042163272255652337, "loss": 0.0, "step": 48740 }, { "epoch": 0.6665709539143098, "grad_norm": NaN, "learning_rate": 0.0004215893379604271, "loss": 0.0, "step": 48750 }, { "epoch": 0.6667076864176767, "grad_norm": NaN, "learning_rate": 0.0004215459435920823, "loss": 0.0, "step": 48760 }, { "epoch": 0.6668444189210438, "grad_norm": NaN, "learning_rate": 0.0004215025394539605, "loss": 0.0, "step": 48770 }, { "epoch": 0.6669811514244108, "grad_norm": NaN, "learning_rate": 0.0004214591255485335, "loss": 0.0, "step": 48780 }, { "epoch": 0.6671178839277779, "grad_norm": NaN, "learning_rate": 0.00042141570187827383, "loss": 0.0, "step": 48790 }, { "epoch": 0.6672546164311449, "grad_norm": NaN, "learning_rate": 0.00042137226844565453, "loss": 0.0, "step": 48800 }, { "epoch": 0.667391348934512, "grad_norm": NaN, "learning_rate": 0.00042132882525314913, "loss": 0.0, "step": 48810 }, { "epoch": 0.667528081437879, "grad_norm": NaN, "learning_rate": 0.0004212853723032319, "loss": 0.0, "step": 48820 }, { "epoch": 0.667664813941246, "grad_norm": NaN, "learning_rate": 0.00042124190959837745, "loss": 0.0, "step": 48830 }, { "epoch": 0.6678015464446131, "grad_norm": NaN, "learning_rate": 0.000421198437141061, "loss": 0.0, "step": 48840 }, { "epoch": 0.6679382789479801, "grad_norm": NaN, "learning_rate": 0.00042115495493375856, "loss": 0.0, "step": 48850 }, { "epoch": 0.6680750114513472, "grad_norm": NaN, "learning_rate": 0.00042111146297894625, "loss": 0.0, "step": 48860 }, { "epoch": 0.6682117439547142, "grad_norm": NaN, "learning_rate": 0.0004210679612791012, "loss": 0.0, "step": 48870 }, { "epoch": 0.6683484764580813, "grad_norm": NaN, "learning_rate": 0.00042102444983670075, "loss": 0.0, "step": 48880 }, { "epoch": 0.6684852089614483, "grad_norm": NaN, "learning_rate": 0.000420980928654223, "loss": 0.0, "step": 48890 }, { "epoch": 0.6686219414648154, "grad_norm": NaN, "learning_rate": 0.00042093739773414664, "loss": 0.0, "step": 48900 }, { "epoch": 0.6687586739681823, "grad_norm": NaN, "learning_rate": 0.00042089385707895055, "loss": 0.0, "step": 48910 }, { "epoch": 0.6688954064715494, "grad_norm": NaN, "learning_rate": 0.00042085030669111474, "loss": 0.0, "step": 48920 }, { "epoch": 0.6690321389749164, "grad_norm": NaN, "learning_rate": 0.0004208067465731192, "loss": 0.0, "step": 48930 }, { "epoch": 0.6691688714782834, "grad_norm": NaN, "learning_rate": 0.0004207631767274449, "loss": 0.0, "step": 48940 }, { "epoch": 0.6693056039816505, "grad_norm": NaN, "learning_rate": 0.0004207195971565733, "loss": 0.0, "step": 48950 }, { "epoch": 0.6694423364850175, "grad_norm": NaN, "learning_rate": 0.000420676007862986, "loss": 0.0, "step": 48960 }, { "epoch": 0.6695790689883846, "grad_norm": NaN, "learning_rate": 0.00042063240884916567, "loss": 0.0, "step": 48970 }, { "epoch": 0.6697158014917516, "grad_norm": NaN, "learning_rate": 0.0004205888001175954, "loss": 0.0, "step": 48980 }, { "epoch": 0.6698525339951187, "grad_norm": NaN, "learning_rate": 0.0004205451816707586, "loss": 0.0, "step": 48990 }, { "epoch": 0.6699892664984857, "grad_norm": NaN, "learning_rate": 0.0004205015535111395, "loss": 0.0, "step": 49000 }, { "epoch": 0.6701259990018528, "grad_norm": NaN, "learning_rate": 0.00042045791564122273, "loss": 0.0, "step": 49010 }, { "epoch": 0.6702627315052198, "grad_norm": NaN, "learning_rate": 0.0004204142680634936, "loss": 0.0, "step": 49020 }, { "epoch": 0.6703994640085869, "grad_norm": NaN, "learning_rate": 0.0004203706107804378, "loss": 0.0, "step": 49030 }, { "epoch": 0.6705361965119538, "grad_norm": NaN, "learning_rate": 0.0004203269437945418, "loss": 0.0, "step": 49040 }, { "epoch": 0.6706729290153208, "grad_norm": NaN, "learning_rate": 0.0004202832671082924, "loss": 0.0, "step": 49050 }, { "epoch": 0.6708096615186879, "grad_norm": NaN, "learning_rate": 0.00042023958072417707, "loss": 0.0, "step": 49060 }, { "epoch": 0.6709463940220549, "grad_norm": NaN, "learning_rate": 0.0004201958846446838, "loss": 0.0, "step": 49070 }, { "epoch": 0.671083126525422, "grad_norm": NaN, "learning_rate": 0.00042015217887230117, "loss": 0.0, "step": 49080 }, { "epoch": 0.671219859028789, "grad_norm": NaN, "learning_rate": 0.00042010846340951826, "loss": 0.0, "step": 49090 }, { "epoch": 0.6713565915321561, "grad_norm": NaN, "learning_rate": 0.00042006473825882473, "loss": 0.0, "step": 49100 }, { "epoch": 0.6714933240355231, "grad_norm": NaN, "learning_rate": 0.00042002100342271076, "loss": 0.0, "step": 49110 }, { "epoch": 0.6716300565388902, "grad_norm": NaN, "learning_rate": 0.0004199772589036672, "loss": 0.0, "step": 49120 }, { "epoch": 0.6717667890422572, "grad_norm": NaN, "learning_rate": 0.00041993350470418523, "loss": 0.0, "step": 49130 }, { "epoch": 0.6719035215456243, "grad_norm": NaN, "learning_rate": 0.0004198897408267568, "loss": 0.0, "step": 49140 }, { "epoch": 0.6720402540489913, "grad_norm": NaN, "learning_rate": 0.00041984596727387435, "loss": 0.0, "step": 49150 }, { "epoch": 0.6721769865523582, "grad_norm": NaN, "learning_rate": 0.0004198021840480308, "loss": 0.0, "step": 49160 }, { "epoch": 0.6723137190557253, "grad_norm": NaN, "learning_rate": 0.00041975839115171963, "loss": 0.0, "step": 49170 }, { "epoch": 0.6724504515590923, "grad_norm": NaN, "learning_rate": 0.00041971458858743494, "loss": 0.0, "step": 49180 }, { "epoch": 0.6725871840624594, "grad_norm": NaN, "learning_rate": 0.00041967077635767137, "loss": 0.0, "step": 49190 }, { "epoch": 0.6727239165658264, "grad_norm": NaN, "learning_rate": 0.00041962695446492407, "loss": 0.0, "step": 49200 }, { "epoch": 0.6728606490691935, "grad_norm": NaN, "learning_rate": 0.00041958312291168865, "loss": 0.0, "step": 49210 }, { "epoch": 0.6729973815725605, "grad_norm": NaN, "learning_rate": 0.00041953928170046154, "loss": 0.0, "step": 49220 }, { "epoch": 0.6731341140759276, "grad_norm": NaN, "learning_rate": 0.0004194954308337395, "loss": 0.0, "step": 49230 }, { "epoch": 0.6732708465792946, "grad_norm": NaN, "learning_rate": 0.00041945157031401985, "loss": 0.0, "step": 49240 }, { "epoch": 0.6734075790826617, "grad_norm": NaN, "learning_rate": 0.0004194077001438006, "loss": 0.0, "step": 49250 }, { "epoch": 0.6735443115860287, "grad_norm": NaN, "learning_rate": 0.00041936382032558016, "loss": 0.0, "step": 49260 }, { "epoch": 0.6736810440893957, "grad_norm": NaN, "learning_rate": 0.0004193199308618575, "loss": 0.0, "step": 49270 }, { "epoch": 0.6738177765927628, "grad_norm": NaN, "learning_rate": 0.00041927603175513227, "loss": 0.0, "step": 49280 }, { "epoch": 0.6739545090961298, "grad_norm": NaN, "learning_rate": 0.0004192321230079046, "loss": 0.0, "step": 49290 }, { "epoch": 0.6740912415994968, "grad_norm": NaN, "learning_rate": 0.00041918820462267506, "loss": 0.0, "step": 49300 }, { "epoch": 0.6742279741028638, "grad_norm": NaN, "learning_rate": 0.0004191442766019449, "loss": 0.0, "step": 49310 }, { "epoch": 0.6743647066062309, "grad_norm": NaN, "learning_rate": 0.0004191003389482158, "loss": 0.0, "step": 49320 }, { "epoch": 0.6745014391095979, "grad_norm": NaN, "learning_rate": 0.00041905639166399026, "loss": 0.0, "step": 49330 }, { "epoch": 0.674638171612965, "grad_norm": NaN, "learning_rate": 0.00041901243475177095, "loss": 0.0, "step": 49340 }, { "epoch": 0.674774904116332, "grad_norm": NaN, "learning_rate": 0.0004189684682140614, "loss": 0.0, "step": 49350 }, { "epoch": 0.6749116366196991, "grad_norm": NaN, "learning_rate": 0.0004189244920533656, "loss": 0.0, "step": 49360 }, { "epoch": 0.6750483691230661, "grad_norm": NaN, "learning_rate": 0.00041888050627218786, "loss": 0.0, "step": 49370 }, { "epoch": 0.6751851016264331, "grad_norm": NaN, "learning_rate": 0.00041883651087303345, "loss": 0.0, "step": 49380 }, { "epoch": 0.6753218341298002, "grad_norm": NaN, "learning_rate": 0.00041879250585840776, "loss": 0.0, "step": 49390 }, { "epoch": 0.6754585666331672, "grad_norm": NaN, "learning_rate": 0.00041874849123081713, "loss": 0.0, "step": 49400 }, { "epoch": 0.6755952991365343, "grad_norm": NaN, "learning_rate": 0.0004187044669927681, "loss": 0.0, "step": 49410 }, { "epoch": 0.6757320316399013, "grad_norm": NaN, "learning_rate": 0.000418660433146768, "loss": 0.0, "step": 49420 }, { "epoch": 0.6758687641432684, "grad_norm": NaN, "learning_rate": 0.0004186163896953247, "loss": 0.0, "step": 49430 }, { "epoch": 0.6760054966466353, "grad_norm": NaN, "learning_rate": 0.00041857233664094627, "loss": 0.0, "step": 49440 }, { "epoch": 0.6761422291500024, "grad_norm": NaN, "learning_rate": 0.00041852827398614187, "loss": 0.0, "step": 49450 }, { "epoch": 0.6762789616533694, "grad_norm": NaN, "learning_rate": 0.0004184842017334207, "loss": 0.0, "step": 49460 }, { "epoch": 0.6764156941567364, "grad_norm": NaN, "learning_rate": 0.00041844011988529295, "loss": 0.0, "step": 49470 }, { "epoch": 0.6765524266601035, "grad_norm": NaN, "learning_rate": 0.00041839602844426894, "loss": 0.0, "step": 49480 }, { "epoch": 0.6766891591634705, "grad_norm": NaN, "learning_rate": 0.00041835192741285985, "loss": 0.0, "step": 49490 }, { "epoch": 0.6768258916668376, "grad_norm": NaN, "learning_rate": 0.00041830781679357736, "loss": 0.0, "step": 49500 }, { "epoch": 0.6769626241702046, "grad_norm": NaN, "learning_rate": 0.0004182636965889335, "loss": 0.0, "step": 49510 }, { "epoch": 0.6770993566735717, "grad_norm": NaN, "learning_rate": 0.000418219566801441, "loss": 0.0, "step": 49520 }, { "epoch": 0.6772360891769387, "grad_norm": NaN, "learning_rate": 0.00041817542743361326, "loss": 0.0, "step": 49530 }, { "epoch": 0.6773728216803058, "grad_norm": NaN, "learning_rate": 0.00041813127848796384, "loss": 0.0, "step": 49540 }, { "epoch": 0.6775095541836728, "grad_norm": NaN, "learning_rate": 0.00041808711996700723, "loss": 0.0, "step": 49550 }, { "epoch": 0.6776462866870399, "grad_norm": NaN, "learning_rate": 0.00041804295187325825, "loss": 0.0, "step": 49560 }, { "epoch": 0.6777830191904068, "grad_norm": NaN, "learning_rate": 0.0004179987742092324, "loss": 0.0, "step": 49570 }, { "epoch": 0.6779197516937738, "grad_norm": NaN, "learning_rate": 0.0004179545869774457, "loss": 0.0, "step": 49580 }, { "epoch": 0.6780564841971409, "grad_norm": NaN, "learning_rate": 0.0004179103901804145, "loss": 0.0, "step": 49590 }, { "epoch": 0.6781932167005079, "grad_norm": NaN, "learning_rate": 0.000417866183820656, "loss": 0.0, "step": 49600 }, { "epoch": 0.678329949203875, "grad_norm": NaN, "learning_rate": 0.00041782196790068784, "loss": 0.0, "step": 49610 }, { "epoch": 0.678466681707242, "grad_norm": NaN, "learning_rate": 0.00041777774242302804, "loss": 0.0, "step": 49620 }, { "epoch": 0.6786034142106091, "grad_norm": NaN, "learning_rate": 0.00041773350739019544, "loss": 0.0, "step": 49630 }, { "epoch": 0.6787401467139761, "grad_norm": NaN, "learning_rate": 0.0004176892628047092, "loss": 0.0, "step": 49640 }, { "epoch": 0.6788768792173432, "grad_norm": NaN, "learning_rate": 0.0004176450086690891, "loss": 0.0, "step": 49650 }, { "epoch": 0.6790136117207102, "grad_norm": NaN, "learning_rate": 0.00041760074498585557, "loss": 0.0, "step": 49660 }, { "epoch": 0.6791503442240773, "grad_norm": NaN, "learning_rate": 0.00041755647175752944, "loss": 0.0, "step": 49670 }, { "epoch": 0.6792870767274443, "grad_norm": NaN, "learning_rate": 0.0004175121889866321, "loss": 0.0, "step": 49680 }, { "epoch": 0.6794238092308112, "grad_norm": NaN, "learning_rate": 0.0004174678966756855, "loss": 0.0, "step": 49690 }, { "epoch": 0.6795605417341783, "grad_norm": NaN, "learning_rate": 0.00041742359482721227, "loss": 0.0, "step": 49700 }, { "epoch": 0.6796972742375453, "grad_norm": NaN, "learning_rate": 0.00041737928344373534, "loss": 0.0, "step": 49710 }, { "epoch": 0.6798340067409124, "grad_norm": NaN, "learning_rate": 0.00041733496252777835, "loss": 0.0, "step": 49720 }, { "epoch": 0.6799707392442794, "grad_norm": NaN, "learning_rate": 0.00041729063208186543, "loss": 0.0, "step": 49730 }, { "epoch": 0.6801074717476465, "grad_norm": NaN, "learning_rate": 0.00041724629210852115, "loss": 0.0, "step": 49740 }, { "epoch": 0.6802442042510135, "grad_norm": NaN, "learning_rate": 0.000417201942610271, "loss": 0.0, "step": 49750 }, { "epoch": 0.6803809367543806, "grad_norm": NaN, "learning_rate": 0.00041715758358964046, "loss": 0.0, "step": 49760 }, { "epoch": 0.6805176692577476, "grad_norm": NaN, "learning_rate": 0.000417113215049156, "loss": 0.0, "step": 49770 }, { "epoch": 0.6806544017611147, "grad_norm": NaN, "learning_rate": 0.00041706883699134444, "loss": 0.0, "step": 49780 }, { "epoch": 0.6807911342644817, "grad_norm": NaN, "learning_rate": 0.0004170244494187332, "loss": 0.0, "step": 49790 }, { "epoch": 0.6809278667678487, "grad_norm": NaN, "learning_rate": 0.0004169800523338501, "loss": 0.0, "step": 49800 }, { "epoch": 0.6810645992712158, "grad_norm": NaN, "learning_rate": 0.0004169356457392237, "loss": 0.0, "step": 49810 }, { "epoch": 0.6812013317745828, "grad_norm": NaN, "learning_rate": 0.000416891229637383, "loss": 0.0, "step": 49820 }, { "epoch": 0.6813380642779499, "grad_norm": NaN, "learning_rate": 0.0004168468040308577, "loss": 0.0, "step": 49830 }, { "epoch": 0.6814747967813168, "grad_norm": NaN, "learning_rate": 0.0004168023689221776, "loss": 0.0, "step": 49840 }, { "epoch": 0.6816115292846839, "grad_norm": NaN, "learning_rate": 0.0004167579243138736, "loss": 0.0, "step": 49850 }, { "epoch": 0.6817482617880509, "grad_norm": NaN, "learning_rate": 0.00041671347020847673, "loss": 0.0, "step": 49860 }, { "epoch": 0.681884994291418, "grad_norm": NaN, "learning_rate": 0.0004166690066085188, "loss": 0.0, "step": 49870 }, { "epoch": 0.682021726794785, "grad_norm": NaN, "learning_rate": 0.000416624533516532, "loss": 0.0, "step": 49880 }, { "epoch": 0.6821584592981521, "grad_norm": NaN, "learning_rate": 0.00041658005093504925, "loss": 0.0, "step": 49890 }, { "epoch": 0.6822951918015191, "grad_norm": NaN, "learning_rate": 0.0004165355588666037, "loss": 0.0, "step": 49900 }, { "epoch": 0.6824319243048861, "grad_norm": NaN, "learning_rate": 0.00041649105731372945, "loss": 0.0, "step": 49910 }, { "epoch": 0.6825686568082532, "grad_norm": NaN, "learning_rate": 0.00041644654627896076, "loss": 0.0, "step": 49920 }, { "epoch": 0.6827053893116202, "grad_norm": NaN, "learning_rate": 0.0004164020257648327, "loss": 0.0, "step": 49930 }, { "epoch": 0.6828421218149873, "grad_norm": NaN, "learning_rate": 0.00041635749577388073, "loss": 0.0, "step": 49940 }, { "epoch": 0.6829788543183543, "grad_norm": NaN, "learning_rate": 0.00041631295630864095, "loss": 0.0, "step": 49950 }, { "epoch": 0.6831155868217214, "grad_norm": NaN, "learning_rate": 0.00041626840737164983, "loss": 0.0, "step": 49960 }, { "epoch": 0.6832523193250883, "grad_norm": NaN, "learning_rate": 0.0004162238489654446, "loss": 0.0, "step": 49970 }, { "epoch": 0.6833890518284554, "grad_norm": NaN, "learning_rate": 0.0004161792810925629, "loss": 0.0, "step": 49980 }, { "epoch": 0.6835257843318224, "grad_norm": NaN, "learning_rate": 0.00041613470375554285, "loss": 0.0, "step": 49990 }, { "epoch": 0.6836625168351895, "grad_norm": NaN, "learning_rate": 0.0004160901169569233, "loss": 0.0, "step": 50000 }, { "epoch": 0.6837992493385565, "grad_norm": NaN, "learning_rate": 0.0004160455206992434, "loss": 0.0, "step": 50010 }, { "epoch": 0.6839359818419235, "grad_norm": NaN, "learning_rate": 0.0004160009149850431, "loss": 0.0, "step": 50020 }, { "epoch": 0.6840727143452906, "grad_norm": NaN, "learning_rate": 0.00041595629981686273, "loss": 0.0, "step": 50030 }, { "epoch": 0.6842094468486576, "grad_norm": NaN, "learning_rate": 0.00041591167519724315, "loss": 0.0, "step": 50040 }, { "epoch": 0.6843461793520247, "grad_norm": NaN, "learning_rate": 0.00041586704112872575, "loss": 0.0, "step": 50050 }, { "epoch": 0.6844829118553917, "grad_norm": NaN, "learning_rate": 0.00041582239761385264, "loss": 0.0, "step": 50060 }, { "epoch": 0.6846196443587588, "grad_norm": NaN, "learning_rate": 0.00041577774465516624, "loss": 0.0, "step": 50070 }, { "epoch": 0.6847563768621258, "grad_norm": NaN, "learning_rate": 0.0004157330822552096, "loss": 0.0, "step": 50080 }, { "epoch": 0.6848931093654929, "grad_norm": NaN, "learning_rate": 0.0004156884104165263, "loss": 0.0, "step": 50090 }, { "epoch": 0.6850298418688598, "grad_norm": NaN, "learning_rate": 0.0004156437291416604, "loss": 0.0, "step": 50100 }, { "epoch": 0.6851665743722269, "grad_norm": NaN, "learning_rate": 0.00041559903843315667, "loss": 0.0, "step": 50110 }, { "epoch": 0.6853033068755939, "grad_norm": NaN, "learning_rate": 0.0004155543382935603, "loss": 0.0, "step": 50120 }, { "epoch": 0.6854400393789609, "grad_norm": NaN, "learning_rate": 0.000415509628725417, "loss": 0.0, "step": 50130 }, { "epoch": 0.685576771882328, "grad_norm": NaN, "learning_rate": 0.000415464909731273, "loss": 0.0, "step": 50140 }, { "epoch": 0.685713504385695, "grad_norm": NaN, "learning_rate": 0.00041542018131367507, "loss": 0.0, "step": 50150 }, { "epoch": 0.6858502368890621, "grad_norm": NaN, "learning_rate": 0.00041537544347517076, "loss": 0.0, "step": 50160 }, { "epoch": 0.6859869693924291, "grad_norm": NaN, "learning_rate": 0.0004153306962183078, "loss": 0.0, "step": 50170 }, { "epoch": 0.6861237018957962, "grad_norm": NaN, "learning_rate": 0.00041528593954563454, "loss": 0.0, "step": 50180 }, { "epoch": 0.6862604343991632, "grad_norm": NaN, "learning_rate": 0.0004152411734597, "loss": 0.0, "step": 50190 }, { "epoch": 0.6863971669025303, "grad_norm": NaN, "learning_rate": 0.00041519639796305384, "loss": 0.0, "step": 50200 }, { "epoch": 0.6865338994058973, "grad_norm": NaN, "learning_rate": 0.0004151516130582459, "loss": 0.0, "step": 50210 }, { "epoch": 0.6866706319092644, "grad_norm": NaN, "learning_rate": 0.0004151068187478267, "loss": 0.0, "step": 50220 }, { "epoch": 0.6868073644126313, "grad_norm": NaN, "learning_rate": 0.0004150620150343474, "loss": 0.0, "step": 50230 }, { "epoch": 0.6869440969159983, "grad_norm": NaN, "learning_rate": 0.0004150172019203597, "loss": 0.0, "step": 50240 }, { "epoch": 0.6870808294193654, "grad_norm": NaN, "learning_rate": 0.00041497237940841576, "loss": 0.0, "step": 50250 }, { "epoch": 0.6872175619227324, "grad_norm": NaN, "learning_rate": 0.0004149275475010682, "loss": 0.0, "step": 50260 }, { "epoch": 0.6873542944260995, "grad_norm": NaN, "learning_rate": 0.00041488270620087034, "loss": 0.0, "step": 50270 }, { "epoch": 0.6874910269294665, "grad_norm": NaN, "learning_rate": 0.0004148378555103758, "loss": 0.0, "step": 50280 }, { "epoch": 0.6876277594328336, "grad_norm": NaN, "learning_rate": 0.0004147929954321391, "loss": 0.0, "step": 50290 }, { "epoch": 0.6877644919362006, "grad_norm": NaN, "learning_rate": 0.0004147481259687149, "loss": 0.0, "step": 50300 }, { "epoch": 0.6879012244395677, "grad_norm": NaN, "learning_rate": 0.0004147032471226587, "loss": 0.0, "step": 50310 }, { "epoch": 0.6880379569429347, "grad_norm": NaN, "learning_rate": 0.0004146583588965264, "loss": 0.0, "step": 50320 }, { "epoch": 0.6881746894463018, "grad_norm": NaN, "learning_rate": 0.00041461346129287443, "loss": 0.0, "step": 50330 }, { "epoch": 0.6883114219496688, "grad_norm": NaN, "learning_rate": 0.00041456855431425973, "loss": 0.0, "step": 50340 }, { "epoch": 0.6884481544530358, "grad_norm": NaN, "learning_rate": 0.00041452363796323983, "loss": 0.0, "step": 50350 }, { "epoch": 0.6885848869564029, "grad_norm": NaN, "learning_rate": 0.00041447871224237274, "loss": 0.0, "step": 50360 }, { "epoch": 0.6887216194597698, "grad_norm": NaN, "learning_rate": 0.00041443377715421725, "loss": 0.0, "step": 50370 }, { "epoch": 0.6888583519631369, "grad_norm": NaN, "learning_rate": 0.0004143888327013322, "loss": 0.0, "step": 50380 }, { "epoch": 0.6889950844665039, "grad_norm": NaN, "learning_rate": 0.00041434387888627746, "loss": 0.0, "step": 50390 }, { "epoch": 0.689131816969871, "grad_norm": NaN, "learning_rate": 0.000414298915711613, "loss": 0.0, "step": 50400 }, { "epoch": 0.689268549473238, "grad_norm": NaN, "learning_rate": 0.0004142539431798997, "loss": 0.0, "step": 50410 }, { "epoch": 0.6894052819766051, "grad_norm": NaN, "learning_rate": 0.00041420896129369867, "loss": 0.0, "step": 50420 }, { "epoch": 0.6895420144799721, "grad_norm": NaN, "learning_rate": 0.00041416397005557184, "loss": 0.0, "step": 50430 }, { "epoch": 0.6896787469833392, "grad_norm": NaN, "learning_rate": 0.0004141189694680815, "loss": 0.0, "step": 50440 }, { "epoch": 0.6898154794867062, "grad_norm": NaN, "learning_rate": 0.00041407395953379036, "loss": 0.0, "step": 50450 }, { "epoch": 0.6899522119900732, "grad_norm": NaN, "learning_rate": 0.0004140289402552619, "loss": 0.0, "step": 50460 }, { "epoch": 0.6900889444934403, "grad_norm": NaN, "learning_rate": 0.00041398391163506, "loss": 0.0, "step": 50470 }, { "epoch": 0.6902256769968073, "grad_norm": NaN, "learning_rate": 0.0004139388736757492, "loss": 0.0, "step": 50480 }, { "epoch": 0.6903624095001744, "grad_norm": NaN, "learning_rate": 0.0004138938263798943, "loss": 0.0, "step": 50490 }, { "epoch": 0.6904991420035413, "grad_norm": NaN, "learning_rate": 0.0004138487697500609, "loss": 0.0, "step": 50500 }, { "epoch": 0.6906358745069084, "grad_norm": NaN, "learning_rate": 0.00041380370378881517, "loss": 0.0, "step": 50510 }, { "epoch": 0.6907726070102754, "grad_norm": NaN, "learning_rate": 0.00041375862849872335, "loss": 0.0, "step": 50520 }, { "epoch": 0.6909093395136425, "grad_norm": NaN, "learning_rate": 0.00041371354388235285, "loss": 0.0, "step": 50530 }, { "epoch": 0.6910460720170095, "grad_norm": NaN, "learning_rate": 0.00041366844994227117, "loss": 0.0, "step": 50540 }, { "epoch": 0.6911828045203766, "grad_norm": NaN, "learning_rate": 0.0004136233466810465, "loss": 0.0, "step": 50550 }, { "epoch": 0.6913195370237436, "grad_norm": NaN, "learning_rate": 0.00041357823410124745, "loss": 0.0, "step": 50560 }, { "epoch": 0.6914562695271106, "grad_norm": NaN, "learning_rate": 0.00041353311220544334, "loss": 0.0, "step": 50570 }, { "epoch": 0.6915930020304777, "grad_norm": NaN, "learning_rate": 0.0004134879809962039, "loss": 0.0, "step": 50580 }, { "epoch": 0.6917297345338447, "grad_norm": NaN, "learning_rate": 0.0004134428404760994, "loss": 0.0, "step": 50590 }, { "epoch": 0.6918664670372118, "grad_norm": NaN, "learning_rate": 0.0004133976906477007, "loss": 0.0, "step": 50600 }, { "epoch": 0.6920031995405788, "grad_norm": NaN, "learning_rate": 0.0004133525315135791, "loss": 0.0, "step": 50610 }, { "epoch": 0.6921399320439459, "grad_norm": NaN, "learning_rate": 0.00041330736307630645, "loss": 0.0, "step": 50620 }, { "epoch": 0.6922766645473128, "grad_norm": NaN, "learning_rate": 0.00041326218533845516, "loss": 0.0, "step": 50630 }, { "epoch": 0.69241339705068, "grad_norm": NaN, "learning_rate": 0.00041321699830259824, "loss": 0.0, "step": 50640 }, { "epoch": 0.6925501295540469, "grad_norm": NaN, "learning_rate": 0.0004131718019713091, "loss": 0.0, "step": 50650 }, { "epoch": 0.692686862057414, "grad_norm": NaN, "learning_rate": 0.00041312659634716174, "loss": 0.0, "step": 50660 }, { "epoch": 0.692823594560781, "grad_norm": NaN, "learning_rate": 0.00041308138143273065, "loss": 0.0, "step": 50670 }, { "epoch": 0.692960327064148, "grad_norm": NaN, "learning_rate": 0.00041303615723059094, "loss": 0.0, "step": 50680 }, { "epoch": 0.6930970595675151, "grad_norm": NaN, "learning_rate": 0.0004129909237433181, "loss": 0.0, "step": 50690 }, { "epoch": 0.6932337920708821, "grad_norm": NaN, "learning_rate": 0.0004129456809734884, "loss": 0.0, "step": 50700 }, { "epoch": 0.6933705245742492, "grad_norm": NaN, "learning_rate": 0.00041290042892367843, "loss": 0.0, "step": 50710 }, { "epoch": 0.6935072570776162, "grad_norm": NaN, "learning_rate": 0.00041285516759646514, "loss": 0.0, "step": 50720 }, { "epoch": 0.6936439895809833, "grad_norm": NaN, "learning_rate": 0.0004128098969944265, "loss": 0.0, "step": 50730 }, { "epoch": 0.6937807220843503, "grad_norm": NaN, "learning_rate": 0.00041276461712014056, "loss": 0.0, "step": 50740 }, { "epoch": 0.6939174545877174, "grad_norm": NaN, "learning_rate": 0.00041271932797618617, "loss": 0.0, "step": 50750 }, { "epoch": 0.6940541870910844, "grad_norm": NaN, "learning_rate": 0.00041267402956514257, "loss": 0.0, "step": 50760 }, { "epoch": 0.6941909195944514, "grad_norm": NaN, "learning_rate": 0.00041262872188958956, "loss": 0.0, "step": 50770 }, { "epoch": 0.6943276520978184, "grad_norm": NaN, "learning_rate": 0.00041258340495210744, "loss": 0.0, "step": 50780 }, { "epoch": 0.6944643846011854, "grad_norm": NaN, "learning_rate": 0.0004125380787552772, "loss": 0.0, "step": 50790 }, { "epoch": 0.6946011171045525, "grad_norm": NaN, "learning_rate": 0.00041249274330168006, "loss": 0.0, "step": 50800 }, { "epoch": 0.6947378496079195, "grad_norm": NaN, "learning_rate": 0.00041244739859389815, "loss": 0.0, "step": 50810 }, { "epoch": 0.6948745821112866, "grad_norm": NaN, "learning_rate": 0.0004124020446345137, "loss": 0.0, "step": 50820 }, { "epoch": 0.6950113146146536, "grad_norm": NaN, "learning_rate": 0.0004123566814261098, "loss": 0.0, "step": 50830 }, { "epoch": 0.6951480471180207, "grad_norm": NaN, "learning_rate": 0.0004123113089712699, "loss": 0.0, "step": 50840 }, { "epoch": 0.6952847796213877, "grad_norm": NaN, "learning_rate": 0.00041226592727257793, "loss": 0.0, "step": 50850 }, { "epoch": 0.6954215121247548, "grad_norm": NaN, "learning_rate": 0.0004122205363326187, "loss": 0.0, "step": 50860 }, { "epoch": 0.6955582446281218, "grad_norm": NaN, "learning_rate": 0.00041217513615397707, "loss": 0.0, "step": 50870 }, { "epoch": 0.6956949771314888, "grad_norm": NaN, "learning_rate": 0.0004121297267392387, "loss": 0.0, "step": 50880 }, { "epoch": 0.6958317096348559, "grad_norm": NaN, "learning_rate": 0.00041208430809098974, "loss": 0.0, "step": 50890 }, { "epoch": 0.6959684421382228, "grad_norm": NaN, "learning_rate": 0.0004120388802118168, "loss": 0.0, "step": 50900 }, { "epoch": 0.6961051746415899, "grad_norm": NaN, "learning_rate": 0.0004119934431043071, "loss": 0.0, "step": 50910 }, { "epoch": 0.6962419071449569, "grad_norm": NaN, "learning_rate": 0.0004119479967710483, "loss": 0.0, "step": 50920 }, { "epoch": 0.696378639648324, "grad_norm": NaN, "learning_rate": 0.0004119025412146288, "loss": 0.0, "step": 50930 }, { "epoch": 0.696515372151691, "grad_norm": NaN, "learning_rate": 0.0004118570764376371, "loss": 0.0, "step": 50940 }, { "epoch": 0.6966521046550581, "grad_norm": NaN, "learning_rate": 0.00041181160244266264, "loss": 0.0, "step": 50950 }, { "epoch": 0.6967888371584251, "grad_norm": NaN, "learning_rate": 0.0004117661192322952, "loss": 0.0, "step": 50960 }, { "epoch": 0.6969255696617922, "grad_norm": NaN, "learning_rate": 0.000411720626809125, "loss": 0.0, "step": 50970 }, { "epoch": 0.6970623021651592, "grad_norm": NaN, "learning_rate": 0.0004116751251757431, "loss": 0.0, "step": 50980 }, { "epoch": 0.6971990346685262, "grad_norm": NaN, "learning_rate": 0.0004116296143347408, "loss": 0.0, "step": 50990 }, { "epoch": 0.6973357671718933, "grad_norm": NaN, "learning_rate": 0.0004115840942887099, "loss": 0.0, "step": 51000 }, { "epoch": 0.6974724996752603, "grad_norm": NaN, "learning_rate": 0.000411538565040243, "loss": 0.0, "step": 51010 }, { "epoch": 0.6976092321786274, "grad_norm": NaN, "learning_rate": 0.0004114930265919329, "loss": 0.0, "step": 51020 }, { "epoch": 0.6977459646819943, "grad_norm": NaN, "learning_rate": 0.00041144747894637313, "loss": 0.0, "step": 51030 }, { "epoch": 0.6978826971853614, "grad_norm": NaN, "learning_rate": 0.00041140192210615775, "loss": 0.0, "step": 51040 }, { "epoch": 0.6980194296887284, "grad_norm": NaN, "learning_rate": 0.00041135635607388125, "loss": 0.0, "step": 51050 }, { "epoch": 0.6981561621920955, "grad_norm": NaN, "learning_rate": 0.00041131078085213855, "loss": 0.0, "step": 51060 }, { "epoch": 0.6982928946954625, "grad_norm": NaN, "learning_rate": 0.00041126519644352547, "loss": 0.0, "step": 51070 }, { "epoch": 0.6984296271988296, "grad_norm": NaN, "learning_rate": 0.00041121960285063784, "loss": 0.0, "step": 51080 }, { "epoch": 0.6985663597021966, "grad_norm": NaN, "learning_rate": 0.00041117400007607255, "loss": 0.0, "step": 51090 }, { "epoch": 0.6987030922055636, "grad_norm": NaN, "learning_rate": 0.0004111283881224265, "loss": 0.0, "step": 51100 }, { "epoch": 0.6988398247089307, "grad_norm": NaN, "learning_rate": 0.00041108276699229753, "loss": 0.0, "step": 51110 }, { "epoch": 0.6989765572122977, "grad_norm": NaN, "learning_rate": 0.0004110371366882837, "loss": 0.0, "step": 51120 }, { "epoch": 0.6991132897156648, "grad_norm": NaN, "learning_rate": 0.0004109914972129837, "loss": 0.0, "step": 51130 }, { "epoch": 0.6992500222190318, "grad_norm": NaN, "learning_rate": 0.000410945848568997, "loss": 0.0, "step": 51140 }, { "epoch": 0.6993867547223989, "grad_norm": NaN, "learning_rate": 0.00041090019075892305, "loss": 0.0, "step": 51150 }, { "epoch": 0.6995234872257658, "grad_norm": NaN, "learning_rate": 0.00041085452378536224, "loss": 0.0, "step": 51160 }, { "epoch": 0.699660219729133, "grad_norm": NaN, "learning_rate": 0.00041080884765091547, "loss": 0.0, "step": 51170 }, { "epoch": 0.6997969522324999, "grad_norm": NaN, "learning_rate": 0.00041076316235818394, "loss": 0.0, "step": 51180 }, { "epoch": 0.699933684735867, "grad_norm": NaN, "learning_rate": 0.0004107174679097695, "loss": 0.0, "step": 51190 }, { "epoch": 0.700070417239234, "grad_norm": NaN, "learning_rate": 0.0004106717643082746, "loss": 0.0, "step": 51200 }, { "epoch": 0.700207149742601, "grad_norm": NaN, "learning_rate": 0.000410626051556302, "loss": 0.0, "step": 51210 }, { "epoch": 0.7003438822459681, "grad_norm": NaN, "learning_rate": 0.0004105803296564552, "loss": 0.0, "step": 51220 }, { "epoch": 0.7004806147493351, "grad_norm": NaN, "learning_rate": 0.000410534598611338, "loss": 0.0, "step": 51230 }, { "epoch": 0.7006173472527022, "grad_norm": NaN, "learning_rate": 0.000410488858423555, "loss": 0.0, "step": 51240 }, { "epoch": 0.7007540797560692, "grad_norm": NaN, "learning_rate": 0.0004104431090957111, "loss": 0.0, "step": 51250 }, { "epoch": 0.7008908122594363, "grad_norm": NaN, "learning_rate": 0.00041039735063041176, "loss": 0.0, "step": 51260 }, { "epoch": 0.7010275447628033, "grad_norm": NaN, "learning_rate": 0.000410351583030263, "loss": 0.0, "step": 51270 }, { "epoch": 0.7011642772661704, "grad_norm": NaN, "learning_rate": 0.0004103058062978714, "loss": 0.0, "step": 51280 }, { "epoch": 0.7013010097695374, "grad_norm": NaN, "learning_rate": 0.000410260020435844, "loss": 0.0, "step": 51290 }, { "epoch": 0.7014377422729045, "grad_norm": NaN, "learning_rate": 0.0004102142254467882, "loss": 0.0, "step": 51300 }, { "epoch": 0.7015744747762714, "grad_norm": NaN, "learning_rate": 0.00041016842133331235, "loss": 0.0, "step": 51310 }, { "epoch": 0.7017112072796384, "grad_norm": NaN, "learning_rate": 0.0004101226080980249, "loss": 0.0, "step": 51320 }, { "epoch": 0.7018479397830055, "grad_norm": NaN, "learning_rate": 0.000410076785743535, "loss": 0.0, "step": 51330 }, { "epoch": 0.7019846722863725, "grad_norm": NaN, "learning_rate": 0.0004100309542724523, "loss": 0.0, "step": 51340 }, { "epoch": 0.7021214047897396, "grad_norm": NaN, "learning_rate": 0.00040998511368738695, "loss": 0.0, "step": 51350 }, { "epoch": 0.7022581372931066, "grad_norm": NaN, "learning_rate": 0.00040993926399094966, "loss": 0.0, "step": 51360 }, { "epoch": 0.7023948697964737, "grad_norm": NaN, "learning_rate": 0.0004098934051857517, "loss": 0.0, "step": 51370 }, { "epoch": 0.7025316022998407, "grad_norm": NaN, "learning_rate": 0.0004098475372744046, "loss": 0.0, "step": 51380 }, { "epoch": 0.7026683348032078, "grad_norm": NaN, "learning_rate": 0.00040980166025952083, "loss": 0.0, "step": 51390 }, { "epoch": 0.7028050673065748, "grad_norm": NaN, "learning_rate": 0.0004097557741437129, "loss": 0.0, "step": 51400 }, { "epoch": 0.7029417998099419, "grad_norm": NaN, "learning_rate": 0.0004097098789295943, "loss": 0.0, "step": 51410 }, { "epoch": 0.7030785323133089, "grad_norm": NaN, "learning_rate": 0.00040966397461977876, "loss": 0.0, "step": 51420 }, { "epoch": 0.7032152648166758, "grad_norm": NaN, "learning_rate": 0.0004096180612168806, "loss": 0.0, "step": 51430 }, { "epoch": 0.7033519973200429, "grad_norm": NaN, "learning_rate": 0.00040957213872351464, "loss": 0.0, "step": 51440 }, { "epoch": 0.7034887298234099, "grad_norm": NaN, "learning_rate": 0.00040952620714229616, "loss": 0.0, "step": 51450 }, { "epoch": 0.703625462326777, "grad_norm": NaN, "learning_rate": 0.0004094802664758411, "loss": 0.0, "step": 51460 }, { "epoch": 0.703762194830144, "grad_norm": NaN, "learning_rate": 0.0004094343167267659, "loss": 0.0, "step": 51470 }, { "epoch": 0.7038989273335111, "grad_norm": NaN, "learning_rate": 0.00040938835789768735, "loss": 0.0, "step": 51480 }, { "epoch": 0.7040356598368781, "grad_norm": NaN, "learning_rate": 0.0004093423899912229, "loss": 0.0, "step": 51490 }, { "epoch": 0.7041723923402452, "grad_norm": NaN, "learning_rate": 0.00040929641300999054, "loss": 0.0, "step": 51500 }, { "epoch": 0.7043091248436122, "grad_norm": NaN, "learning_rate": 0.00040925042695660863, "loss": 0.0, "step": 51510 }, { "epoch": 0.7044458573469793, "grad_norm": NaN, "learning_rate": 0.0004092044318336963, "loss": 0.0, "step": 51520 }, { "epoch": 0.7045825898503463, "grad_norm": NaN, "learning_rate": 0.0004091584276438728, "loss": 0.0, "step": 51530 }, { "epoch": 0.7047193223537133, "grad_norm": NaN, "learning_rate": 0.00040911241438975834, "loss": 0.0, "step": 51540 }, { "epoch": 0.7048560548570804, "grad_norm": NaN, "learning_rate": 0.00040906639207397325, "loss": 0.0, "step": 51550 }, { "epoch": 0.7049927873604473, "grad_norm": NaN, "learning_rate": 0.00040902036069913875, "loss": 0.0, "step": 51560 }, { "epoch": 0.7051295198638144, "grad_norm": NaN, "learning_rate": 0.0004089743202678763, "loss": 0.0, "step": 51570 }, { "epoch": 0.7052662523671814, "grad_norm": NaN, "learning_rate": 0.000408928270782808, "loss": 0.0, "step": 51580 }, { "epoch": 0.7054029848705485, "grad_norm": NaN, "learning_rate": 0.0004088822122465564, "loss": 0.0, "step": 51590 }, { "epoch": 0.7055397173739155, "grad_norm": NaN, "learning_rate": 0.0004088361446617446, "loss": 0.0, "step": 51600 }, { "epoch": 0.7056764498772826, "grad_norm": NaN, "learning_rate": 0.00040879006803099617, "loss": 0.0, "step": 51610 }, { "epoch": 0.7058131823806496, "grad_norm": NaN, "learning_rate": 0.0004087439823569353, "loss": 0.0, "step": 51620 }, { "epoch": 0.7059499148840167, "grad_norm": NaN, "learning_rate": 0.0004086978876421866, "loss": 0.0, "step": 51630 }, { "epoch": 0.7060866473873837, "grad_norm": NaN, "learning_rate": 0.00040865178388937534, "loss": 0.0, "step": 51640 }, { "epoch": 0.7062233798907507, "grad_norm": NaN, "learning_rate": 0.000408605671101127, "loss": 0.0, "step": 51650 }, { "epoch": 0.7063601123941178, "grad_norm": NaN, "learning_rate": 0.00040855954928006795, "loss": 0.0, "step": 51660 }, { "epoch": 0.7064968448974848, "grad_norm": NaN, "learning_rate": 0.00040851341842882473, "loss": 0.0, "step": 51670 }, { "epoch": 0.7066335774008519, "grad_norm": NaN, "learning_rate": 0.00040846727855002466, "loss": 0.0, "step": 51680 }, { "epoch": 0.7067703099042189, "grad_norm": NaN, "learning_rate": 0.0004084211296462954, "loss": 0.0, "step": 51690 }, { "epoch": 0.706907042407586, "grad_norm": NaN, "learning_rate": 0.00040837497172026526, "loss": 0.0, "step": 51700 }, { "epoch": 0.7070437749109529, "grad_norm": NaN, "learning_rate": 0.000408328804774563, "loss": 0.0, "step": 51710 }, { "epoch": 0.70718050741432, "grad_norm": NaN, "learning_rate": 0.00040828262881181777, "loss": 0.0, "step": 51720 }, { "epoch": 0.707317239917687, "grad_norm": NaN, "learning_rate": 0.00040823644383465954, "loss": 0.0, "step": 51730 }, { "epoch": 0.7074539724210541, "grad_norm": NaN, "learning_rate": 0.0004081902498457185, "loss": 0.0, "step": 51740 }, { "epoch": 0.7075907049244211, "grad_norm": NaN, "learning_rate": 0.00040814404684762543, "loss": 0.0, "step": 51750 }, { "epoch": 0.7077274374277881, "grad_norm": NaN, "learning_rate": 0.00040809783484301173, "loss": 0.0, "step": 51760 }, { "epoch": 0.7078641699311552, "grad_norm": NaN, "learning_rate": 0.0004080516138345092, "loss": 0.0, "step": 51770 }, { "epoch": 0.7080009024345222, "grad_norm": NaN, "learning_rate": 0.00040800538382475016, "loss": 0.0, "step": 51780 }, { "epoch": 0.7081376349378893, "grad_norm": NaN, "learning_rate": 0.00040795914481636756, "loss": 0.0, "step": 51790 }, { "epoch": 0.7082743674412563, "grad_norm": NaN, "learning_rate": 0.0004079128968119947, "loss": 0.0, "step": 51800 }, { "epoch": 0.7084110999446234, "grad_norm": NaN, "learning_rate": 0.0004078666398142654, "loss": 0.0, "step": 51810 }, { "epoch": 0.7085478324479904, "grad_norm": NaN, "learning_rate": 0.00040782037382581425, "loss": 0.0, "step": 51820 }, { "epoch": 0.7086845649513575, "grad_norm": NaN, "learning_rate": 0.00040777409884927606, "loss": 0.0, "step": 51830 }, { "epoch": 0.7088212974547244, "grad_norm": NaN, "learning_rate": 0.0004077278148872862, "loss": 0.0, "step": 51840 }, { "epoch": 0.7089580299580915, "grad_norm": NaN, "learning_rate": 0.0004076815219424807, "loss": 0.0, "step": 51850 }, { "epoch": 0.7090947624614585, "grad_norm": NaN, "learning_rate": 0.00040763522001749597, "loss": 0.0, "step": 51860 }, { "epoch": 0.7092314949648255, "grad_norm": NaN, "learning_rate": 0.0004075889091149689, "loss": 0.0, "step": 51870 }, { "epoch": 0.7093682274681926, "grad_norm": NaN, "learning_rate": 0.0004075425892375372, "loss": 0.0, "step": 51880 }, { "epoch": 0.7095049599715596, "grad_norm": NaN, "learning_rate": 0.0004074962603878385, "loss": 0.0, "step": 51890 }, { "epoch": 0.7096416924749267, "grad_norm": NaN, "learning_rate": 0.00040744992256851154, "loss": 0.0, "step": 51900 }, { "epoch": 0.7097784249782937, "grad_norm": NaN, "learning_rate": 0.0004074035757821952, "loss": 0.0, "step": 51910 }, { "epoch": 0.7099151574816608, "grad_norm": NaN, "learning_rate": 0.0004073572200315291, "loss": 0.0, "step": 51920 }, { "epoch": 0.7100518899850278, "grad_norm": NaN, "learning_rate": 0.00040731085531915315, "loss": 0.0, "step": 51930 }, { "epoch": 0.7101886224883949, "grad_norm": NaN, "learning_rate": 0.000407264481647708, "loss": 0.0, "step": 51940 }, { "epoch": 0.7103253549917619, "grad_norm": NaN, "learning_rate": 0.0004072180990198346, "loss": 0.0, "step": 51950 }, { "epoch": 0.710462087495129, "grad_norm": NaN, "learning_rate": 0.0004071717074381745, "loss": 0.0, "step": 51960 }, { "epoch": 0.710598819998496, "grad_norm": NaN, "learning_rate": 0.0004071253069053698, "loss": 0.0, "step": 51970 }, { "epoch": 0.7107355525018629, "grad_norm": NaN, "learning_rate": 0.00040707889742406313, "loss": 0.0, "step": 51980 }, { "epoch": 0.71087228500523, "grad_norm": NaN, "learning_rate": 0.00040703247899689753, "loss": 0.0, "step": 51990 }, { "epoch": 0.711009017508597, "grad_norm": NaN, "learning_rate": 0.00040698605162651656, "loss": 0.0, "step": 52000 }, { "epoch": 0.7111457500119641, "grad_norm": NaN, "learning_rate": 0.00040693961531556433, "loss": 0.0, "step": 52010 }, { "epoch": 0.7112824825153311, "grad_norm": NaN, "learning_rate": 0.0004068931700666855, "loss": 0.0, "step": 52020 }, { "epoch": 0.7114192150186982, "grad_norm": NaN, "learning_rate": 0.0004068467158825251, "loss": 0.0, "step": 52030 }, { "epoch": 0.7115559475220652, "grad_norm": NaN, "learning_rate": 0.0004068002527657289, "loss": 0.0, "step": 52040 }, { "epoch": 0.7116926800254323, "grad_norm": NaN, "learning_rate": 0.0004067537807189429, "loss": 0.0, "step": 52050 }, { "epoch": 0.7118294125287993, "grad_norm": NaN, "learning_rate": 0.0004067072997448138, "loss": 0.0, "step": 52060 }, { "epoch": 0.7119661450321664, "grad_norm": NaN, "learning_rate": 0.0004066608098459888, "loss": 0.0, "step": 52070 }, { "epoch": 0.7121028775355334, "grad_norm": NaN, "learning_rate": 0.00040661431102511546, "loss": 0.0, "step": 52080 }, { "epoch": 0.7122396100389004, "grad_norm": NaN, "learning_rate": 0.0004065678032848421, "loss": 0.0, "step": 52090 }, { "epoch": 0.7123763425422674, "grad_norm": NaN, "learning_rate": 0.0004065212866278172, "loss": 0.0, "step": 52100 }, { "epoch": 0.7125130750456344, "grad_norm": NaN, "learning_rate": 0.00040647476105669015, "loss": 0.0, "step": 52110 }, { "epoch": 0.7126498075490015, "grad_norm": NaN, "learning_rate": 0.0004064282265741105, "loss": 0.0, "step": 52120 }, { "epoch": 0.7127865400523685, "grad_norm": NaN, "learning_rate": 0.0004063816831827286, "loss": 0.0, "step": 52130 }, { "epoch": 0.7129232725557356, "grad_norm": NaN, "learning_rate": 0.00040633513088519495, "loss": 0.0, "step": 52140 }, { "epoch": 0.7130600050591026, "grad_norm": NaN, "learning_rate": 0.00040628856968416094, "loss": 0.0, "step": 52150 }, { "epoch": 0.7131967375624697, "grad_norm": NaN, "learning_rate": 0.0004062419995822783, "loss": 0.0, "step": 52160 }, { "epoch": 0.7133334700658367, "grad_norm": NaN, "learning_rate": 0.00040619542058219915, "loss": 0.0, "step": 52170 }, { "epoch": 0.7134702025692038, "grad_norm": NaN, "learning_rate": 0.0004061488326865762, "loss": 0.0, "step": 52180 }, { "epoch": 0.7136069350725708, "grad_norm": NaN, "learning_rate": 0.0004061022358980629, "loss": 0.0, "step": 52190 }, { "epoch": 0.7137436675759378, "grad_norm": NaN, "learning_rate": 0.00040605563021931277, "loss": 0.0, "step": 52200 }, { "epoch": 0.7138804000793049, "grad_norm": NaN, "learning_rate": 0.0004060090156529803, "loss": 0.0, "step": 52210 }, { "epoch": 0.7140171325826719, "grad_norm": NaN, "learning_rate": 0.00040596239220172005, "loss": 0.0, "step": 52220 }, { "epoch": 0.714153865086039, "grad_norm": NaN, "learning_rate": 0.00040591575986818727, "loss": 0.0, "step": 52230 }, { "epoch": 0.7142905975894059, "grad_norm": NaN, "learning_rate": 0.00040586911865503794, "loss": 0.0, "step": 52240 }, { "epoch": 0.714427330092773, "grad_norm": NaN, "learning_rate": 0.0004058224685649282, "loss": 0.0, "step": 52250 }, { "epoch": 0.71456406259614, "grad_norm": NaN, "learning_rate": 0.000405775809600515, "loss": 0.0, "step": 52260 }, { "epoch": 0.7147007950995071, "grad_norm": NaN, "learning_rate": 0.0004057291417644553, "loss": 0.0, "step": 52270 }, { "epoch": 0.7148375276028741, "grad_norm": NaN, "learning_rate": 0.0004056824650594072, "loss": 0.0, "step": 52280 }, { "epoch": 0.7149742601062412, "grad_norm": NaN, "learning_rate": 0.0004056357794880289, "loss": 0.0, "step": 52290 }, { "epoch": 0.7151109926096082, "grad_norm": NaN, "learning_rate": 0.0004055890850529792, "loss": 0.0, "step": 52300 }, { "epoch": 0.7152477251129752, "grad_norm": NaN, "learning_rate": 0.0004055423817569174, "loss": 0.0, "step": 52310 }, { "epoch": 0.7153844576163423, "grad_norm": NaN, "learning_rate": 0.0004054956696025034, "loss": 0.0, "step": 52320 }, { "epoch": 0.7155211901197093, "grad_norm": NaN, "learning_rate": 0.00040544894859239743, "loss": 0.0, "step": 52330 }, { "epoch": 0.7156579226230764, "grad_norm": NaN, "learning_rate": 0.0004054022187292603, "loss": 0.0, "step": 52340 }, { "epoch": 0.7157946551264434, "grad_norm": NaN, "learning_rate": 0.0004053554800157535, "loss": 0.0, "step": 52350 }, { "epoch": 0.7159313876298105, "grad_norm": NaN, "learning_rate": 0.0004053087324545386, "loss": 0.0, "step": 52360 }, { "epoch": 0.7160681201331774, "grad_norm": NaN, "learning_rate": 0.00040526197604827814, "loss": 0.0, "step": 52370 }, { "epoch": 0.7162048526365445, "grad_norm": NaN, "learning_rate": 0.0004052152107996349, "loss": 0.0, "step": 52380 }, { "epoch": 0.7163415851399115, "grad_norm": NaN, "learning_rate": 0.00040516843671127225, "loss": 0.0, "step": 52390 }, { "epoch": 0.7164783176432785, "grad_norm": NaN, "learning_rate": 0.0004051216537858541, "loss": 0.0, "step": 52400 }, { "epoch": 0.7166150501466456, "grad_norm": NaN, "learning_rate": 0.0004050748620260446, "loss": 0.0, "step": 52410 }, { "epoch": 0.7167517826500126, "grad_norm": NaN, "learning_rate": 0.0004050280614345088, "loss": 0.0, "step": 52420 }, { "epoch": 0.7168885151533797, "grad_norm": NaN, "learning_rate": 0.0004049812520139119, "loss": 0.0, "step": 52430 }, { "epoch": 0.7170252476567467, "grad_norm": NaN, "learning_rate": 0.00040493443376692, "loss": 0.0, "step": 52440 }, { "epoch": 0.7171619801601138, "grad_norm": NaN, "learning_rate": 0.0004048876066961992, "loss": 0.0, "step": 52450 }, { "epoch": 0.7172987126634808, "grad_norm": NaN, "learning_rate": 0.00040484077080441645, "loss": 0.0, "step": 52460 }, { "epoch": 0.7174354451668479, "grad_norm": NaN, "learning_rate": 0.00040479392609423914, "loss": 0.0, "step": 52470 }, { "epoch": 0.7175721776702149, "grad_norm": NaN, "learning_rate": 0.0004047470725683352, "loss": 0.0, "step": 52480 }, { "epoch": 0.717708910173582, "grad_norm": NaN, "learning_rate": 0.000404700210229373, "loss": 0.0, "step": 52490 }, { "epoch": 0.717845642676949, "grad_norm": NaN, "learning_rate": 0.0004046533390800213, "loss": 0.0, "step": 52500 }, { "epoch": 0.7179823751803159, "grad_norm": NaN, "learning_rate": 0.00040460645912294947, "loss": 0.0, "step": 52510 }, { "epoch": 0.718119107683683, "grad_norm": NaN, "learning_rate": 0.00040455957036082755, "loss": 0.0, "step": 52520 }, { "epoch": 0.71825584018705, "grad_norm": NaN, "learning_rate": 0.00040451267279632586, "loss": 0.0, "step": 52530 }, { "epoch": 0.7183925726904171, "grad_norm": NaN, "learning_rate": 0.0004044657664321152, "loss": 0.0, "step": 52540 }, { "epoch": 0.7185293051937841, "grad_norm": NaN, "learning_rate": 0.000404418851270867, "loss": 0.0, "step": 52550 }, { "epoch": 0.7186660376971512, "grad_norm": NaN, "learning_rate": 0.00040437192731525306, "loss": 0.0, "step": 52560 }, { "epoch": 0.7188027702005182, "grad_norm": NaN, "learning_rate": 0.00040432499456794597, "loss": 0.0, "step": 52570 }, { "epoch": 0.7189395027038853, "grad_norm": NaN, "learning_rate": 0.0004042780530316185, "loss": 0.0, "step": 52580 }, { "epoch": 0.7190762352072523, "grad_norm": NaN, "learning_rate": 0.0004042311027089439, "loss": 0.0, "step": 52590 }, { "epoch": 0.7192129677106194, "grad_norm": NaN, "learning_rate": 0.00040418414360259626, "loss": 0.0, "step": 52600 }, { "epoch": 0.7193497002139864, "grad_norm": NaN, "learning_rate": 0.00040413717571524986, "loss": 0.0, "step": 52610 }, { "epoch": 0.7194864327173534, "grad_norm": NaN, "learning_rate": 0.0004040901990495797, "loss": 0.0, "step": 52620 }, { "epoch": 0.7196231652207205, "grad_norm": NaN, "learning_rate": 0.0004040432136082609, "loss": 0.0, "step": 52630 }, { "epoch": 0.7197598977240874, "grad_norm": NaN, "learning_rate": 0.0004039962193939697, "loss": 0.0, "step": 52640 }, { "epoch": 0.7198966302274545, "grad_norm": NaN, "learning_rate": 0.00040394921640938213, "loss": 0.0, "step": 52650 }, { "epoch": 0.7200333627308215, "grad_norm": NaN, "learning_rate": 0.00040390220465717536, "loss": 0.0, "step": 52660 }, { "epoch": 0.7201700952341886, "grad_norm": NaN, "learning_rate": 0.00040385518414002663, "loss": 0.0, "step": 52670 }, { "epoch": 0.7203068277375556, "grad_norm": NaN, "learning_rate": 0.00040380815486061394, "loss": 0.0, "step": 52680 }, { "epoch": 0.7204435602409227, "grad_norm": NaN, "learning_rate": 0.0004037611168216155, "loss": 0.0, "step": 52690 }, { "epoch": 0.7205802927442897, "grad_norm": NaN, "learning_rate": 0.0004037140700257104, "loss": 0.0, "step": 52700 }, { "epoch": 0.7207170252476568, "grad_norm": NaN, "learning_rate": 0.00040366701447557777, "loss": 0.0, "step": 52710 }, { "epoch": 0.7208537577510238, "grad_norm": NaN, "learning_rate": 0.00040361995017389765, "loss": 0.0, "step": 52720 }, { "epoch": 0.7209904902543908, "grad_norm": NaN, "learning_rate": 0.00040357287712335045, "loss": 0.0, "step": 52730 }, { "epoch": 0.7211272227577579, "grad_norm": NaN, "learning_rate": 0.0004035257953266169, "loss": 0.0, "step": 52740 }, { "epoch": 0.7212639552611249, "grad_norm": NaN, "learning_rate": 0.0004034787047863785, "loss": 0.0, "step": 52750 }, { "epoch": 0.721400687764492, "grad_norm": NaN, "learning_rate": 0.0004034316055053171, "loss": 0.0, "step": 52760 }, { "epoch": 0.7215374202678589, "grad_norm": NaN, "learning_rate": 0.000403384497486115, "loss": 0.0, "step": 52770 }, { "epoch": 0.721674152771226, "grad_norm": NaN, "learning_rate": 0.0004033373807314551, "loss": 0.0, "step": 52780 }, { "epoch": 0.721810885274593, "grad_norm": NaN, "learning_rate": 0.0004032902552440208, "loss": 0.0, "step": 52790 }, { "epoch": 0.7219476177779601, "grad_norm": NaN, "learning_rate": 0.0004032431210264959, "loss": 0.0, "step": 52800 }, { "epoch": 0.7220843502813271, "grad_norm": NaN, "learning_rate": 0.0004031959780815648, "loss": 0.0, "step": 52810 }, { "epoch": 0.7222210827846942, "grad_norm": NaN, "learning_rate": 0.0004031488264119124, "loss": 0.0, "step": 52820 }, { "epoch": 0.7223578152880612, "grad_norm": NaN, "learning_rate": 0.000403101666020224, "loss": 0.0, "step": 52830 }, { "epoch": 0.7224945477914282, "grad_norm": NaN, "learning_rate": 0.0004030544969091854, "loss": 0.0, "step": 52840 }, { "epoch": 0.7226312802947953, "grad_norm": NaN, "learning_rate": 0.000403007319081483, "loss": 0.0, "step": 52850 }, { "epoch": 0.7227680127981623, "grad_norm": NaN, "learning_rate": 0.00040296013253980374, "loss": 0.0, "step": 52860 }, { "epoch": 0.7229047453015294, "grad_norm": NaN, "learning_rate": 0.00040291293728683474, "loss": 0.0, "step": 52870 }, { "epoch": 0.7230414778048964, "grad_norm": NaN, "learning_rate": 0.000402865733325264, "loss": 0.0, "step": 52880 }, { "epoch": 0.7231782103082635, "grad_norm": NaN, "learning_rate": 0.0004028185206577798, "loss": 0.0, "step": 52890 }, { "epoch": 0.7233149428116304, "grad_norm": NaN, "learning_rate": 0.000402771299287071, "loss": 0.0, "step": 52900 }, { "epoch": 0.7234516753149975, "grad_norm": NaN, "learning_rate": 0.00040272406921582684, "loss": 0.0, "step": 52910 }, { "epoch": 0.7235884078183645, "grad_norm": NaN, "learning_rate": 0.0004026768304467372, "loss": 0.0, "step": 52920 }, { "epoch": 0.7237251403217316, "grad_norm": NaN, "learning_rate": 0.0004026295829824923, "loss": 0.0, "step": 52930 }, { "epoch": 0.7238618728250986, "grad_norm": NaN, "learning_rate": 0.00040258232682578314, "loss": 0.0, "step": 52940 }, { "epoch": 0.7239986053284656, "grad_norm": NaN, "learning_rate": 0.0004025350619793009, "loss": 0.0, "step": 52950 }, { "epoch": 0.7241353378318327, "grad_norm": NaN, "learning_rate": 0.00040248778844573734, "loss": 0.0, "step": 52960 }, { "epoch": 0.7242720703351997, "grad_norm": NaN, "learning_rate": 0.00040244050622778485, "loss": 0.0, "step": 52970 }, { "epoch": 0.7244088028385668, "grad_norm": NaN, "learning_rate": 0.0004023932153281361, "loss": 0.0, "step": 52980 }, { "epoch": 0.7245455353419338, "grad_norm": NaN, "learning_rate": 0.0004023459157494845, "loss": 0.0, "step": 52990 }, { "epoch": 0.7246822678453009, "grad_norm": NaN, "learning_rate": 0.0004022986074945238, "loss": 0.0, "step": 53000 }, { "epoch": 0.7248190003486679, "grad_norm": NaN, "learning_rate": 0.0004022512905659482, "loss": 0.0, "step": 53010 }, { "epoch": 0.724955732852035, "grad_norm": NaN, "learning_rate": 0.0004022039649664525, "loss": 0.0, "step": 53020 }, { "epoch": 0.725092465355402, "grad_norm": NaN, "learning_rate": 0.000402156630698732, "loss": 0.0, "step": 53030 }, { "epoch": 0.725229197858769, "grad_norm": NaN, "learning_rate": 0.0004021092877654823, "loss": 0.0, "step": 53040 }, { "epoch": 0.725365930362136, "grad_norm": NaN, "learning_rate": 0.0004020619361693998, "loss": 0.0, "step": 53050 }, { "epoch": 0.725502662865503, "grad_norm": NaN, "learning_rate": 0.00040201457591318126, "loss": 0.0, "step": 53060 }, { "epoch": 0.7256393953688701, "grad_norm": NaN, "learning_rate": 0.00040196720699952373, "loss": 0.0, "step": 53070 }, { "epoch": 0.7257761278722371, "grad_norm": NaN, "learning_rate": 0.00040191982943112517, "loss": 0.0, "step": 53080 }, { "epoch": 0.7259128603756042, "grad_norm": NaN, "learning_rate": 0.0004018724432106836, "loss": 0.0, "step": 53090 }, { "epoch": 0.7260495928789712, "grad_norm": NaN, "learning_rate": 0.0004018250483408978, "loss": 0.0, "step": 53100 }, { "epoch": 0.7261863253823383, "grad_norm": NaN, "learning_rate": 0.00040177764482446707, "loss": 0.0, "step": 53110 }, { "epoch": 0.7263230578857053, "grad_norm": NaN, "learning_rate": 0.00040173023266409095, "loss": 0.0, "step": 53120 }, { "epoch": 0.7264597903890724, "grad_norm": NaN, "learning_rate": 0.00040168281186246966, "loss": 0.0, "step": 53130 }, { "epoch": 0.7265965228924394, "grad_norm": NaN, "learning_rate": 0.00040163538242230396, "loss": 0.0, "step": 53140 }, { "epoch": 0.7267332553958065, "grad_norm": NaN, "learning_rate": 0.00040158794434629497, "loss": 0.0, "step": 53150 }, { "epoch": 0.7268699878991735, "grad_norm": NaN, "learning_rate": 0.0004015404976371444, "loss": 0.0, "step": 53160 }, { "epoch": 0.7270067204025404, "grad_norm": NaN, "learning_rate": 0.00040149304229755435, "loss": 0.0, "step": 53170 }, { "epoch": 0.7271434529059075, "grad_norm": NaN, "learning_rate": 0.00040144557833022746, "loss": 0.0, "step": 53180 }, { "epoch": 0.7272801854092745, "grad_norm": NaN, "learning_rate": 0.00040139810573786683, "loss": 0.0, "step": 53190 }, { "epoch": 0.7274169179126416, "grad_norm": NaN, "learning_rate": 0.00040135062452317615, "loss": 0.0, "step": 53200 }, { "epoch": 0.7275536504160086, "grad_norm": NaN, "learning_rate": 0.0004013031346888596, "loss": 0.0, "step": 53210 }, { "epoch": 0.7276903829193757, "grad_norm": NaN, "learning_rate": 0.0004012556362376217, "loss": 0.0, "step": 53220 }, { "epoch": 0.7278271154227427, "grad_norm": NaN, "learning_rate": 0.00040120812917216766, "loss": 0.0, "step": 53230 }, { "epoch": 0.7279638479261098, "grad_norm": NaN, "learning_rate": 0.00040116061349520295, "loss": 0.0, "step": 53240 }, { "epoch": 0.7281005804294768, "grad_norm": NaN, "learning_rate": 0.00040111308920943366, "loss": 0.0, "step": 53250 }, { "epoch": 0.7282373129328439, "grad_norm": NaN, "learning_rate": 0.0004010655563175665, "loss": 0.0, "step": 53260 }, { "epoch": 0.7283740454362109, "grad_norm": NaN, "learning_rate": 0.0004010180148223083, "loss": 0.0, "step": 53270 }, { "epoch": 0.7285107779395779, "grad_norm": NaN, "learning_rate": 0.0004009704647263668, "loss": 0.0, "step": 53280 }, { "epoch": 0.728647510442945, "grad_norm": NaN, "learning_rate": 0.0004009229060324501, "loss": 0.0, "step": 53290 }, { "epoch": 0.7287842429463119, "grad_norm": NaN, "learning_rate": 0.0004008753387432665, "loss": 0.0, "step": 53300 }, { "epoch": 0.728920975449679, "grad_norm": NaN, "learning_rate": 0.0004008277628615251, "loss": 0.0, "step": 53310 }, { "epoch": 0.729057707953046, "grad_norm": NaN, "learning_rate": 0.0004007801783899356, "loss": 0.0, "step": 53320 }, { "epoch": 0.7291944404564131, "grad_norm": NaN, "learning_rate": 0.0004007325853312078, "loss": 0.0, "step": 53330 }, { "epoch": 0.7293311729597801, "grad_norm": NaN, "learning_rate": 0.0004006849836880522, "loss": 0.0, "step": 53340 }, { "epoch": 0.7294679054631472, "grad_norm": NaN, "learning_rate": 0.00040063737346317983, "loss": 0.0, "step": 53350 }, { "epoch": 0.7296046379665142, "grad_norm": NaN, "learning_rate": 0.00040058975465930225, "loss": 0.0, "step": 53360 }, { "epoch": 0.7297413704698813, "grad_norm": NaN, "learning_rate": 0.00040054212727913123, "loss": 0.0, "step": 53370 }, { "epoch": 0.7298781029732483, "grad_norm": NaN, "learning_rate": 0.0004004944913253794, "loss": 0.0, "step": 53380 }, { "epoch": 0.7300148354766153, "grad_norm": NaN, "learning_rate": 0.0004004468468007595, "loss": 0.0, "step": 53390 }, { "epoch": 0.7301515679799824, "grad_norm": NaN, "learning_rate": 0.00040039919370798516, "loss": 0.0, "step": 53400 }, { "epoch": 0.7302883004833494, "grad_norm": NaN, "learning_rate": 0.0004003515320497701, "loss": 0.0, "step": 53410 }, { "epoch": 0.7304250329867165, "grad_norm": NaN, "learning_rate": 0.0004003038618288288, "loss": 0.0, "step": 53420 }, { "epoch": 0.7305617654900834, "grad_norm": NaN, "learning_rate": 0.00040025618304787616, "loss": 0.0, "step": 53430 }, { "epoch": 0.7306984979934505, "grad_norm": NaN, "learning_rate": 0.0004002084957096275, "loss": 0.0, "step": 53440 }, { "epoch": 0.7308352304968175, "grad_norm": NaN, "learning_rate": 0.00040016079981679883, "loss": 0.0, "step": 53450 }, { "epoch": 0.7309719630001846, "grad_norm": NaN, "learning_rate": 0.00040011309537210627, "loss": 0.0, "step": 53460 }, { "epoch": 0.7311086955035516, "grad_norm": NaN, "learning_rate": 0.0004000653823782667, "loss": 0.0, "step": 53470 }, { "epoch": 0.7312454280069187, "grad_norm": NaN, "learning_rate": 0.0004000176608379976, "loss": 0.0, "step": 53480 }, { "epoch": 0.7313821605102857, "grad_norm": NaN, "learning_rate": 0.00039996993075401674, "loss": 0.0, "step": 53490 }, { "epoch": 0.7315188930136527, "grad_norm": NaN, "learning_rate": 0.00039992219212904226, "loss": 0.0, "step": 53500 }, { "epoch": 0.7316556255170198, "grad_norm": NaN, "learning_rate": 0.00039987444496579304, "loss": 0.0, "step": 53510 }, { "epoch": 0.7317923580203868, "grad_norm": NaN, "learning_rate": 0.0003998266892669884, "loss": 0.0, "step": 53520 }, { "epoch": 0.7319290905237539, "grad_norm": NaN, "learning_rate": 0.00039977892503534806, "loss": 0.0, "step": 53530 }, { "epoch": 0.7320658230271209, "grad_norm": NaN, "learning_rate": 0.00039973115227359213, "loss": 0.0, "step": 53540 }, { "epoch": 0.732202555530488, "grad_norm": NaN, "learning_rate": 0.00039968337098444153, "loss": 0.0, "step": 53550 }, { "epoch": 0.732339288033855, "grad_norm": NaN, "learning_rate": 0.0003996355811706173, "loss": 0.0, "step": 53560 }, { "epoch": 0.732476020537222, "grad_norm": NaN, "learning_rate": 0.00039958778283484124, "loss": 0.0, "step": 53570 }, { "epoch": 0.732612753040589, "grad_norm": NaN, "learning_rate": 0.00039953997597983555, "loss": 0.0, "step": 53580 }, { "epoch": 0.7327494855439561, "grad_norm": NaN, "learning_rate": 0.00039949216060832285, "loss": 0.0, "step": 53590 }, { "epoch": 0.7328862180473231, "grad_norm": NaN, "learning_rate": 0.00039944433672302614, "loss": 0.0, "step": 53600 }, { "epoch": 0.7330229505506901, "grad_norm": NaN, "learning_rate": 0.00039939650432666934, "loss": 0.0, "step": 53610 }, { "epoch": 0.7331596830540572, "grad_norm": NaN, "learning_rate": 0.0003993486634219764, "loss": 0.0, "step": 53620 }, { "epoch": 0.7332964155574242, "grad_norm": NaN, "learning_rate": 0.00039930081401167203, "loss": 0.0, "step": 53630 }, { "epoch": 0.7334331480607913, "grad_norm": NaN, "learning_rate": 0.00039925295609848115, "loss": 0.0, "step": 53640 }, { "epoch": 0.7335698805641583, "grad_norm": NaN, "learning_rate": 0.0003992050896851295, "loss": 0.0, "step": 53650 }, { "epoch": 0.7337066130675254, "grad_norm": NaN, "learning_rate": 0.0003991572147743431, "loss": 0.0, "step": 53660 }, { "epoch": 0.7338433455708924, "grad_norm": NaN, "learning_rate": 0.0003991093313688484, "loss": 0.0, "step": 53670 }, { "epoch": 0.7339800780742595, "grad_norm": NaN, "learning_rate": 0.0003990614394713725, "loss": 0.0, "step": 53680 }, { "epoch": 0.7341168105776265, "grad_norm": NaN, "learning_rate": 0.000399013539084643, "loss": 0.0, "step": 53690 }, { "epoch": 0.7342535430809936, "grad_norm": NaN, "learning_rate": 0.00039896563021138773, "loss": 0.0, "step": 53700 }, { "epoch": 0.7343902755843605, "grad_norm": NaN, "learning_rate": 0.00039891771285433523, "loss": 0.0, "step": 53710 }, { "epoch": 0.7345270080877275, "grad_norm": NaN, "learning_rate": 0.0003988697870162144, "loss": 0.0, "step": 53720 }, { "epoch": 0.7346637405910946, "grad_norm": NaN, "learning_rate": 0.00039882185269975477, "loss": 0.0, "step": 53730 }, { "epoch": 0.7348004730944616, "grad_norm": NaN, "learning_rate": 0.0003987739099076863, "loss": 0.0, "step": 53740 }, { "epoch": 0.7349372055978287, "grad_norm": NaN, "learning_rate": 0.00039872595864273925, "loss": 0.0, "step": 53750 }, { "epoch": 0.7350739381011957, "grad_norm": NaN, "learning_rate": 0.00039867799890764465, "loss": 0.0, "step": 53760 }, { "epoch": 0.7352106706045628, "grad_norm": NaN, "learning_rate": 0.0003986300307051338, "loss": 0.0, "step": 53770 }, { "epoch": 0.7353474031079298, "grad_norm": NaN, "learning_rate": 0.0003985820540379386, "loss": 0.0, "step": 53780 }, { "epoch": 0.7354841356112969, "grad_norm": NaN, "learning_rate": 0.0003985340689087913, "loss": 0.0, "step": 53790 }, { "epoch": 0.7356208681146639, "grad_norm": NaN, "learning_rate": 0.0003984860753204248, "loss": 0.0, "step": 53800 }, { "epoch": 0.7357576006180309, "grad_norm": NaN, "learning_rate": 0.00039843807327557244, "loss": 0.0, "step": 53810 }, { "epoch": 0.735894333121398, "grad_norm": NaN, "learning_rate": 0.0003983900627769679, "loss": 0.0, "step": 53820 }, { "epoch": 0.736031065624765, "grad_norm": NaN, "learning_rate": 0.0003983420438273455, "loss": 0.0, "step": 53830 }, { "epoch": 0.736167798128132, "grad_norm": NaN, "learning_rate": 0.00039829401642943994, "loss": 0.0, "step": 53840 }, { "epoch": 0.736304530631499, "grad_norm": NaN, "learning_rate": 0.00039824598058598647, "loss": 0.0, "step": 53850 }, { "epoch": 0.7364412631348661, "grad_norm": NaN, "learning_rate": 0.0003981979362997209, "loss": 0.0, "step": 53860 }, { "epoch": 0.7365779956382331, "grad_norm": NaN, "learning_rate": 0.0003981498835733792, "loss": 0.0, "step": 53870 }, { "epoch": 0.7367147281416002, "grad_norm": NaN, "learning_rate": 0.0003981018224096983, "loss": 0.0, "step": 53880 }, { "epoch": 0.7368514606449672, "grad_norm": NaN, "learning_rate": 0.0003980537528114151, "loss": 0.0, "step": 53890 }, { "epoch": 0.7369881931483343, "grad_norm": NaN, "learning_rate": 0.00039800567478126737, "loss": 0.0, "step": 53900 }, { "epoch": 0.7371249256517013, "grad_norm": NaN, "learning_rate": 0.0003979575883219931, "loss": 0.0, "step": 53910 }, { "epoch": 0.7372616581550683, "grad_norm": NaN, "learning_rate": 0.0003979094934363311, "loss": 0.0, "step": 53920 }, { "epoch": 0.7373983906584354, "grad_norm": NaN, "learning_rate": 0.00039786139012702027, "loss": 0.0, "step": 53930 }, { "epoch": 0.7375351231618024, "grad_norm": NaN, "learning_rate": 0.00039781327839680025, "loss": 0.0, "step": 53940 }, { "epoch": 0.7376718556651695, "grad_norm": NaN, "learning_rate": 0.00039776515824841097, "loss": 0.0, "step": 53950 }, { "epoch": 0.7378085881685364, "grad_norm": NaN, "learning_rate": 0.000397717029684593, "loss": 0.0, "step": 53960 }, { "epoch": 0.7379453206719035, "grad_norm": NaN, "learning_rate": 0.00039766889270808725, "loss": 0.0, "step": 53970 }, { "epoch": 0.7380820531752705, "grad_norm": NaN, "learning_rate": 0.00039762074732163535, "loss": 0.0, "step": 53980 }, { "epoch": 0.7382187856786376, "grad_norm": NaN, "learning_rate": 0.00039757259352797916, "loss": 0.0, "step": 53990 }, { "epoch": 0.7383555181820046, "grad_norm": NaN, "learning_rate": 0.000397524431329861, "loss": 0.0, "step": 54000 }, { "epoch": 0.7384922506853717, "grad_norm": NaN, "learning_rate": 0.00039747626073002385, "loss": 0.0, "step": 54010 }, { "epoch": 0.7386289831887387, "grad_norm": NaN, "learning_rate": 0.0003974280817312111, "loss": 0.0, "step": 54020 }, { "epoch": 0.7387657156921057, "grad_norm": NaN, "learning_rate": 0.0003973798943361667, "loss": 0.0, "step": 54030 }, { "epoch": 0.7389024481954728, "grad_norm": NaN, "learning_rate": 0.00039733169854763495, "loss": 0.0, "step": 54040 }, { "epoch": 0.7390391806988398, "grad_norm": NaN, "learning_rate": 0.00039728349436836055, "loss": 0.0, "step": 54050 }, { "epoch": 0.7391759132022069, "grad_norm": NaN, "learning_rate": 0.00039723528180108893, "loss": 0.0, "step": 54060 }, { "epoch": 0.7393126457055739, "grad_norm": NaN, "learning_rate": 0.00039718706084856573, "loss": 0.0, "step": 54070 }, { "epoch": 0.739449378208941, "grad_norm": NaN, "learning_rate": 0.00039713883151353723, "loss": 0.0, "step": 54080 }, { "epoch": 0.739586110712308, "grad_norm": NaN, "learning_rate": 0.00039709059379875025, "loss": 0.0, "step": 54090 }, { "epoch": 0.739722843215675, "grad_norm": NaN, "learning_rate": 0.00039704234770695195, "loss": 0.0, "step": 54100 }, { "epoch": 0.739859575719042, "grad_norm": NaN, "learning_rate": 0.00039699409324088996, "loss": 0.0, "step": 54110 }, { "epoch": 0.7399963082224091, "grad_norm": NaN, "learning_rate": 0.0003969458304033126, "loss": 0.0, "step": 54120 }, { "epoch": 0.7401330407257761, "grad_norm": NaN, "learning_rate": 0.00039689755919696825, "loss": 0.0, "step": 54130 }, { "epoch": 0.7402697732291431, "grad_norm": NaN, "learning_rate": 0.00039684927962460614, "loss": 0.0, "step": 54140 }, { "epoch": 0.7404065057325102, "grad_norm": NaN, "learning_rate": 0.000396800991688976, "loss": 0.0, "step": 54150 }, { "epoch": 0.7405432382358772, "grad_norm": NaN, "learning_rate": 0.00039675269539282763, "loss": 0.0, "step": 54160 }, { "epoch": 0.7406799707392443, "grad_norm": NaN, "learning_rate": 0.00039670439073891177, "loss": 0.0, "step": 54170 }, { "epoch": 0.7408167032426113, "grad_norm": NaN, "learning_rate": 0.00039665607772997936, "loss": 0.0, "step": 54180 }, { "epoch": 0.7409534357459784, "grad_norm": NaN, "learning_rate": 0.000396607756368782, "loss": 0.0, "step": 54190 }, { "epoch": 0.7410901682493454, "grad_norm": NaN, "learning_rate": 0.00039655942665807144, "loss": 0.0, "step": 54200 }, { "epoch": 0.7412269007527125, "grad_norm": NaN, "learning_rate": 0.0003965110886006003, "loss": 0.0, "step": 54210 }, { "epoch": 0.7413636332560795, "grad_norm": NaN, "learning_rate": 0.0003964627421991215, "loss": 0.0, "step": 54220 }, { "epoch": 0.7415003657594466, "grad_norm": NaN, "learning_rate": 0.0003964143874563883, "loss": 0.0, "step": 54230 }, { "epoch": 0.7416370982628135, "grad_norm": NaN, "learning_rate": 0.00039636602437515467, "loss": 0.0, "step": 54240 }, { "epoch": 0.7417738307661805, "grad_norm": NaN, "learning_rate": 0.0003963176529581749, "loss": 0.0, "step": 54250 }, { "epoch": 0.7419105632695476, "grad_norm": NaN, "learning_rate": 0.00039626927320820393, "loss": 0.0, "step": 54260 }, { "epoch": 0.7420472957729146, "grad_norm": NaN, "learning_rate": 0.0003962208851279969, "loss": 0.0, "step": 54270 }, { "epoch": 0.7421840282762817, "grad_norm": NaN, "learning_rate": 0.00039617248872030964, "loss": 0.0, "step": 54280 }, { "epoch": 0.7423207607796487, "grad_norm": NaN, "learning_rate": 0.00039612408398789843, "loss": 0.0, "step": 54290 }, { "epoch": 0.7424574932830158, "grad_norm": NaN, "learning_rate": 0.00039607567093351986, "loss": 0.0, "step": 54300 }, { "epoch": 0.7425942257863828, "grad_norm": NaN, "learning_rate": 0.0003960272495599313, "loss": 0.0, "step": 54310 }, { "epoch": 0.7427309582897499, "grad_norm": NaN, "learning_rate": 0.0003959788198698903, "loss": 0.0, "step": 54320 }, { "epoch": 0.7428676907931169, "grad_norm": NaN, "learning_rate": 0.00039593038186615503, "loss": 0.0, "step": 54330 }, { "epoch": 0.743004423296484, "grad_norm": NaN, "learning_rate": 0.0003958819355514841, "loss": 0.0, "step": 54340 }, { "epoch": 0.743141155799851, "grad_norm": NaN, "learning_rate": 0.0003958334809286366, "loss": 0.0, "step": 54350 }, { "epoch": 0.743277888303218, "grad_norm": NaN, "learning_rate": 0.00039578501800037203, "loss": 0.0, "step": 54360 }, { "epoch": 0.743414620806585, "grad_norm": NaN, "learning_rate": 0.00039573654676945046, "loss": 0.0, "step": 54370 }, { "epoch": 0.743551353309952, "grad_norm": NaN, "learning_rate": 0.0003956880672386324, "loss": 0.0, "step": 54380 }, { "epoch": 0.7436880858133191, "grad_norm": NaN, "learning_rate": 0.00039563957941067887, "loss": 0.0, "step": 54390 }, { "epoch": 0.7438248183166861, "grad_norm": NaN, "learning_rate": 0.00039559108328835125, "loss": 0.0, "step": 54400 }, { "epoch": 0.7439615508200532, "grad_norm": NaN, "learning_rate": 0.0003955425788744115, "loss": 0.0, "step": 54410 }, { "epoch": 0.7440982833234202, "grad_norm": NaN, "learning_rate": 0.000395494066171622, "loss": 0.0, "step": 54420 }, { "epoch": 0.7442350158267873, "grad_norm": NaN, "learning_rate": 0.00039544554518274554, "loss": 0.0, "step": 54430 }, { "epoch": 0.7443717483301543, "grad_norm": NaN, "learning_rate": 0.00039539701591054553, "loss": 0.0, "step": 54440 }, { "epoch": 0.7445084808335214, "grad_norm": NaN, "learning_rate": 0.00039534847835778585, "loss": 0.0, "step": 54450 }, { "epoch": 0.7446452133368884, "grad_norm": NaN, "learning_rate": 0.0003952999325272306, "loss": 0.0, "step": 54460 }, { "epoch": 0.7447819458402554, "grad_norm": NaN, "learning_rate": 0.00039525137842164477, "loss": 0.0, "step": 54470 }, { "epoch": 0.7449186783436225, "grad_norm": NaN, "learning_rate": 0.0003952028160437934, "loss": 0.0, "step": 54480 }, { "epoch": 0.7450554108469895, "grad_norm": NaN, "learning_rate": 0.00039515424539644225, "loss": 0.0, "step": 54490 }, { "epoch": 0.7451921433503566, "grad_norm": NaN, "learning_rate": 0.00039510566648235745, "loss": 0.0, "step": 54500 }, { "epoch": 0.7453288758537235, "grad_norm": NaN, "learning_rate": 0.0003950570793043056, "loss": 0.0, "step": 54510 }, { "epoch": 0.7454656083570906, "grad_norm": NaN, "learning_rate": 0.00039500848386505395, "loss": 0.0, "step": 54520 }, { "epoch": 0.7456023408604576, "grad_norm": NaN, "learning_rate": 0.00039495988016736993, "loss": 0.0, "step": 54530 }, { "epoch": 0.7457390733638247, "grad_norm": NaN, "learning_rate": 0.0003949112682140217, "loss": 0.0, "step": 54540 }, { "epoch": 0.7458758058671917, "grad_norm": NaN, "learning_rate": 0.0003948626480077777, "loss": 0.0, "step": 54550 }, { "epoch": 0.7460125383705588, "grad_norm": NaN, "learning_rate": 0.00039481401955140694, "loss": 0.0, "step": 54560 }, { "epoch": 0.7461492708739258, "grad_norm": NaN, "learning_rate": 0.00039476538284767884, "loss": 0.0, "step": 54570 }, { "epoch": 0.7462860033772928, "grad_norm": NaN, "learning_rate": 0.0003947167378993634, "loss": 0.0, "step": 54580 }, { "epoch": 0.7464227358806599, "grad_norm": NaN, "learning_rate": 0.000394668084709231, "loss": 0.0, "step": 54590 }, { "epoch": 0.7465594683840269, "grad_norm": NaN, "learning_rate": 0.0003946194232800525, "loss": 0.0, "step": 54600 }, { "epoch": 0.746696200887394, "grad_norm": NaN, "learning_rate": 0.00039457075361459923, "loss": 0.0, "step": 54610 }, { "epoch": 0.746832933390761, "grad_norm": NaN, "learning_rate": 0.0003945220757156429, "loss": 0.0, "step": 54620 }, { "epoch": 0.746969665894128, "grad_norm": NaN, "learning_rate": 0.00039447338958595595, "loss": 0.0, "step": 54630 }, { "epoch": 0.747106398397495, "grad_norm": NaN, "learning_rate": 0.00039442469522831105, "loss": 0.0, "step": 54640 }, { "epoch": 0.7472431309008621, "grad_norm": NaN, "learning_rate": 0.0003943759926454814, "loss": 0.0, "step": 54650 }, { "epoch": 0.7473798634042291, "grad_norm": NaN, "learning_rate": 0.00039432728184024063, "loss": 0.0, "step": 54660 }, { "epoch": 0.7475165959075962, "grad_norm": NaN, "learning_rate": 0.000394278562815363, "loss": 0.0, "step": 54670 }, { "epoch": 0.7476533284109632, "grad_norm": NaN, "learning_rate": 0.000394229835573623, "loss": 0.0, "step": 54680 }, { "epoch": 0.7477900609143302, "grad_norm": NaN, "learning_rate": 0.00039418110011779585, "loss": 0.0, "step": 54690 }, { "epoch": 0.7479267934176973, "grad_norm": NaN, "learning_rate": 0.000394132356450657, "loss": 0.0, "step": 54700 }, { "epoch": 0.7480635259210643, "grad_norm": NaN, "learning_rate": 0.0003940836045749825, "loss": 0.0, "step": 54710 }, { "epoch": 0.7482002584244314, "grad_norm": NaN, "learning_rate": 0.00039403484449354874, "loss": 0.0, "step": 54720 }, { "epoch": 0.7483369909277984, "grad_norm": NaN, "learning_rate": 0.00039398607620913295, "loss": 0.0, "step": 54730 }, { "epoch": 0.7484737234311655, "grad_norm": NaN, "learning_rate": 0.00039393729972451226, "loss": 0.0, "step": 54740 }, { "epoch": 0.7486104559345325, "grad_norm": NaN, "learning_rate": 0.0003938885150424647, "loss": 0.0, "step": 54750 }, { "epoch": 0.7487471884378996, "grad_norm": NaN, "learning_rate": 0.0003938397221657686, "loss": 0.0, "step": 54760 }, { "epoch": 0.7488839209412665, "grad_norm": NaN, "learning_rate": 0.0003937909210972027, "loss": 0.0, "step": 54770 }, { "epoch": 0.7490206534446336, "grad_norm": NaN, "learning_rate": 0.00039374211183954645, "loss": 0.0, "step": 54780 }, { "epoch": 0.7491573859480006, "grad_norm": NaN, "learning_rate": 0.0003936932943955795, "loss": 0.0, "step": 54790 }, { "epoch": 0.7492941184513676, "grad_norm": NaN, "learning_rate": 0.00039364446876808204, "loss": 0.0, "step": 54800 }, { "epoch": 0.7494308509547347, "grad_norm": NaN, "learning_rate": 0.0003935956349598349, "loss": 0.0, "step": 54810 }, { "epoch": 0.7495675834581017, "grad_norm": NaN, "learning_rate": 0.00039354679297361905, "loss": 0.0, "step": 54820 }, { "epoch": 0.7497043159614688, "grad_norm": NaN, "learning_rate": 0.0003934979428122162, "loss": 0.0, "step": 54830 }, { "epoch": 0.7498410484648358, "grad_norm": NaN, "learning_rate": 0.00039344908447840846, "loss": 0.0, "step": 54840 }, { "epoch": 0.7499777809682029, "grad_norm": NaN, "learning_rate": 0.00039340021797497826, "loss": 0.0, "step": 54850 }, { "epoch": 0.7501145134715699, "grad_norm": NaN, "learning_rate": 0.0003933513433047088, "loss": 0.0, "step": 54860 }, { "epoch": 0.750251245974937, "grad_norm": NaN, "learning_rate": 0.00039330246047038344, "loss": 0.0, "step": 54870 }, { "epoch": 0.750387978478304, "grad_norm": NaN, "learning_rate": 0.00039325356947478605, "loss": 0.0, "step": 54880 }, { "epoch": 0.7505247109816711, "grad_norm": NaN, "learning_rate": 0.00039320467032070126, "loss": 0.0, "step": 54890 }, { "epoch": 0.750661443485038, "grad_norm": NaN, "learning_rate": 0.00039315576301091385, "loss": 0.0, "step": 54900 }, { "epoch": 0.750798175988405, "grad_norm": NaN, "learning_rate": 0.000393106847548209, "loss": 0.0, "step": 54910 }, { "epoch": 0.7509349084917721, "grad_norm": NaN, "learning_rate": 0.00039305792393537264, "loss": 0.0, "step": 54920 }, { "epoch": 0.7510716409951391, "grad_norm": NaN, "learning_rate": 0.00039300899217519116, "loss": 0.0, "step": 54930 }, { "epoch": 0.7512083734985062, "grad_norm": NaN, "learning_rate": 0.0003929600522704511, "loss": 0.0, "step": 54940 }, { "epoch": 0.7513451060018732, "grad_norm": NaN, "learning_rate": 0.0003929111042239397, "loss": 0.0, "step": 54950 }, { "epoch": 0.7514818385052403, "grad_norm": NaN, "learning_rate": 0.0003928621480384447, "loss": 0.0, "step": 54960 }, { "epoch": 0.7516185710086073, "grad_norm": NaN, "learning_rate": 0.00039281318371675404, "loss": 0.0, "step": 54970 }, { "epoch": 0.7517553035119744, "grad_norm": NaN, "learning_rate": 0.00039276421126165653, "loss": 0.0, "step": 54980 }, { "epoch": 0.7518920360153414, "grad_norm": NaN, "learning_rate": 0.00039271523067594105, "loss": 0.0, "step": 54990 }, { "epoch": 0.7520287685187085, "grad_norm": NaN, "learning_rate": 0.0003926662419623972, "loss": 0.0, "step": 55000 }, { "epoch": 0.7521655010220755, "grad_norm": NaN, "learning_rate": 0.000392617245123815, "loss": 0.0, "step": 55010 }, { "epoch": 0.7523022335254425, "grad_norm": NaN, "learning_rate": 0.00039256824016298475, "loss": 0.0, "step": 55020 }, { "epoch": 0.7524389660288096, "grad_norm": NaN, "learning_rate": 0.0003925192270826975, "loss": 0.0, "step": 55030 }, { "epoch": 0.7525756985321765, "grad_norm": NaN, "learning_rate": 0.00039247020588574445, "loss": 0.0, "step": 55040 }, { "epoch": 0.7527124310355436, "grad_norm": NaN, "learning_rate": 0.00039242117657491756, "loss": 0.0, "step": 55050 }, { "epoch": 0.7528491635389106, "grad_norm": NaN, "learning_rate": 0.00039237213915300904, "loss": 0.0, "step": 55060 }, { "epoch": 0.7529858960422777, "grad_norm": NaN, "learning_rate": 0.00039232309362281163, "loss": 0.0, "step": 55070 }, { "epoch": 0.7531226285456447, "grad_norm": NaN, "learning_rate": 0.0003922740399871186, "loss": 0.0, "step": 55080 }, { "epoch": 0.7532593610490118, "grad_norm": NaN, "learning_rate": 0.0003922249782487236, "loss": 0.0, "step": 55090 }, { "epoch": 0.7533960935523788, "grad_norm": NaN, "learning_rate": 0.00039217590841042076, "loss": 0.0, "step": 55100 }, { "epoch": 0.7535328260557459, "grad_norm": NaN, "learning_rate": 0.00039212683047500465, "loss": 0.0, "step": 55110 }, { "epoch": 0.7536695585591129, "grad_norm": NaN, "learning_rate": 0.00039207774444527036, "loss": 0.0, "step": 55120 }, { "epoch": 0.7538062910624799, "grad_norm": NaN, "learning_rate": 0.0003920286503240134, "loss": 0.0, "step": 55130 }, { "epoch": 0.753943023565847, "grad_norm": NaN, "learning_rate": 0.00039197954811402973, "loss": 0.0, "step": 55140 }, { "epoch": 0.754079756069214, "grad_norm": NaN, "learning_rate": 0.0003919304378181158, "loss": 0.0, "step": 55150 }, { "epoch": 0.7542164885725811, "grad_norm": NaN, "learning_rate": 0.0003918813194390686, "loss": 0.0, "step": 55160 }, { "epoch": 0.754353221075948, "grad_norm": NaN, "learning_rate": 0.0003918321929796853, "loss": 0.0, "step": 55170 }, { "epoch": 0.7544899535793151, "grad_norm": NaN, "learning_rate": 0.0003917830584427638, "loss": 0.0, "step": 55180 }, { "epoch": 0.7546266860826821, "grad_norm": NaN, "learning_rate": 0.0003917339158311025, "loss": 0.0, "step": 55190 }, { "epoch": 0.7547634185860492, "grad_norm": NaN, "learning_rate": 0.00039168476514749994, "loss": 0.0, "step": 55200 }, { "epoch": 0.7549001510894162, "grad_norm": NaN, "learning_rate": 0.0003916356063947554, "loss": 0.0, "step": 55210 }, { "epoch": 0.7550368835927832, "grad_norm": NaN, "learning_rate": 0.00039158643957566864, "loss": 0.0, "step": 55220 }, { "epoch": 0.7551736160961503, "grad_norm": NaN, "learning_rate": 0.00039153726469303964, "loss": 0.0, "step": 55230 }, { "epoch": 0.7553103485995173, "grad_norm": NaN, "learning_rate": 0.0003914880817496691, "loss": 0.0, "step": 55240 }, { "epoch": 0.7554470811028844, "grad_norm": NaN, "learning_rate": 0.0003914388907483579, "loss": 0.0, "step": 55250 }, { "epoch": 0.7555838136062514, "grad_norm": NaN, "learning_rate": 0.0003913896916919076, "loss": 0.0, "step": 55260 }, { "epoch": 0.7557205461096185, "grad_norm": NaN, "learning_rate": 0.0003913404845831202, "loss": 0.0, "step": 55270 }, { "epoch": 0.7558572786129855, "grad_norm": NaN, "learning_rate": 0.0003912912694247981, "loss": 0.0, "step": 55280 }, { "epoch": 0.7559940111163526, "grad_norm": NaN, "learning_rate": 0.0003912420462197442, "loss": 0.0, "step": 55290 }, { "epoch": 0.7561307436197195, "grad_norm": NaN, "learning_rate": 0.0003911928149707618, "loss": 0.0, "step": 55300 }, { "epoch": 0.7562674761230866, "grad_norm": NaN, "learning_rate": 0.0003911435756806546, "loss": 0.0, "step": 55310 }, { "epoch": 0.7564042086264536, "grad_norm": NaN, "learning_rate": 0.0003910943283522269, "loss": 0.0, "step": 55320 }, { "epoch": 0.7565409411298206, "grad_norm": NaN, "learning_rate": 0.0003910450729882835, "loss": 0.0, "step": 55330 }, { "epoch": 0.7566776736331877, "grad_norm": NaN, "learning_rate": 0.0003909958095916294, "loss": 0.0, "step": 55340 }, { "epoch": 0.7568144061365547, "grad_norm": NaN, "learning_rate": 0.0003909465381650704, "loss": 0.0, "step": 55350 }, { "epoch": 0.7569511386399218, "grad_norm": NaN, "learning_rate": 0.0003908972587114124, "loss": 0.0, "step": 55360 }, { "epoch": 0.7570878711432888, "grad_norm": NaN, "learning_rate": 0.00039084797123346203, "loss": 0.0, "step": 55370 }, { "epoch": 0.7572246036466559, "grad_norm": NaN, "learning_rate": 0.0003907986757340263, "loss": 0.0, "step": 55380 }, { "epoch": 0.7573613361500229, "grad_norm": NaN, "learning_rate": 0.0003907493722159126, "loss": 0.0, "step": 55390 }, { "epoch": 0.75749806865339, "grad_norm": NaN, "learning_rate": 0.0003907000606819288, "loss": 0.0, "step": 55400 }, { "epoch": 0.757634801156757, "grad_norm": NaN, "learning_rate": 0.0003906507411348833, "loss": 0.0, "step": 55410 }, { "epoch": 0.7577715336601241, "grad_norm": NaN, "learning_rate": 0.00039060141357758496, "loss": 0.0, "step": 55420 }, { "epoch": 0.757908266163491, "grad_norm": NaN, "learning_rate": 0.000390552078012843, "loss": 0.0, "step": 55430 }, { "epoch": 0.758044998666858, "grad_norm": NaN, "learning_rate": 0.0003905027344434672, "loss": 0.0, "step": 55440 }, { "epoch": 0.7581817311702251, "grad_norm": NaN, "learning_rate": 0.0003904533828722676, "loss": 0.0, "step": 55450 }, { "epoch": 0.7583184636735921, "grad_norm": NaN, "learning_rate": 0.00039040402330205507, "loss": 0.0, "step": 55460 }, { "epoch": 0.7584551961769592, "grad_norm": NaN, "learning_rate": 0.0003903546557356406, "loss": 0.0, "step": 55470 }, { "epoch": 0.7585919286803262, "grad_norm": NaN, "learning_rate": 0.00039030528017583563, "loss": 0.0, "step": 55480 }, { "epoch": 0.7587286611836933, "grad_norm": NaN, "learning_rate": 0.00039025589662545227, "loss": 0.0, "step": 55490 }, { "epoch": 0.7588653936870603, "grad_norm": NaN, "learning_rate": 0.00039020650508730297, "loss": 0.0, "step": 55500 }, { "epoch": 0.7590021261904274, "grad_norm": NaN, "learning_rate": 0.0003901571055642007, "loss": 0.0, "step": 55510 }, { "epoch": 0.7591388586937944, "grad_norm": NaN, "learning_rate": 0.0003901076980589587, "loss": 0.0, "step": 55520 }, { "epoch": 0.7592755911971615, "grad_norm": NaN, "learning_rate": 0.0003900582825743909, "loss": 0.0, "step": 55530 }, { "epoch": 0.7594123237005285, "grad_norm": NaN, "learning_rate": 0.0003900088591133116, "loss": 0.0, "step": 55540 }, { "epoch": 0.7595490562038955, "grad_norm": NaN, "learning_rate": 0.00038995942767853543, "loss": 0.0, "step": 55550 }, { "epoch": 0.7596857887072626, "grad_norm": NaN, "learning_rate": 0.0003899099882728777, "loss": 0.0, "step": 55560 }, { "epoch": 0.7598225212106295, "grad_norm": NaN, "learning_rate": 0.00038986054089915393, "loss": 0.0, "step": 55570 }, { "epoch": 0.7599592537139966, "grad_norm": NaN, "learning_rate": 0.00038981108556018025, "loss": 0.0, "step": 55580 }, { "epoch": 0.7600959862173636, "grad_norm": NaN, "learning_rate": 0.0003897616222587733, "loss": 0.0, "step": 55590 }, { "epoch": 0.7602327187207307, "grad_norm": NaN, "learning_rate": 0.0003897121509977499, "loss": 0.0, "step": 55600 }, { "epoch": 0.7603694512240977, "grad_norm": NaN, "learning_rate": 0.0003896626717799278, "loss": 0.0, "step": 55610 }, { "epoch": 0.7605061837274648, "grad_norm": NaN, "learning_rate": 0.00038961318460812455, "loss": 0.0, "step": 55620 }, { "epoch": 0.7606429162308318, "grad_norm": NaN, "learning_rate": 0.0003895636894851588, "loss": 0.0, "step": 55630 }, { "epoch": 0.7607796487341989, "grad_norm": NaN, "learning_rate": 0.0003895141864138493, "loss": 0.0, "step": 55640 }, { "epoch": 0.7609163812375659, "grad_norm": NaN, "learning_rate": 0.0003894646753970151, "loss": 0.0, "step": 55650 }, { "epoch": 0.7610531137409329, "grad_norm": NaN, "learning_rate": 0.00038941515643747625, "loss": 0.0, "step": 55660 }, { "epoch": 0.7611898462443, "grad_norm": NaN, "learning_rate": 0.0003893656295380527, "loss": 0.0, "step": 55670 }, { "epoch": 0.761326578747667, "grad_norm": NaN, "learning_rate": 0.0003893160947015651, "loss": 0.0, "step": 55680 }, { "epoch": 0.7614633112510341, "grad_norm": NaN, "learning_rate": 0.00038926655193083473, "loss": 0.0, "step": 55690 }, { "epoch": 0.761600043754401, "grad_norm": NaN, "learning_rate": 0.00038921700122868287, "loss": 0.0, "step": 55700 }, { "epoch": 0.7617367762577681, "grad_norm": NaN, "learning_rate": 0.00038916744259793155, "loss": 0.0, "step": 55710 }, { "epoch": 0.7618735087611351, "grad_norm": NaN, "learning_rate": 0.00038911787604140323, "loss": 0.0, "step": 55720 }, { "epoch": 0.7620102412645022, "grad_norm": NaN, "learning_rate": 0.0003890683015619209, "loss": 0.0, "step": 55730 }, { "epoch": 0.7621469737678692, "grad_norm": NaN, "learning_rate": 0.00038901871916230776, "loss": 0.0, "step": 55740 }, { "epoch": 0.7622837062712363, "grad_norm": NaN, "learning_rate": 0.0003889691288453876, "loss": 0.0, "step": 55750 }, { "epoch": 0.7624204387746033, "grad_norm": NaN, "learning_rate": 0.0003889195306139847, "loss": 0.0, "step": 55760 }, { "epoch": 0.7625571712779703, "grad_norm": NaN, "learning_rate": 0.00038886992447092377, "loss": 0.0, "step": 55770 }, { "epoch": 0.7626939037813374, "grad_norm": NaN, "learning_rate": 0.00038882031041902995, "loss": 0.0, "step": 55780 }, { "epoch": 0.7628306362847044, "grad_norm": NaN, "learning_rate": 0.00038877068846112874, "loss": 0.0, "step": 55790 }, { "epoch": 0.7629673687880715, "grad_norm": NaN, "learning_rate": 0.0003887210586000463, "loss": 0.0, "step": 55800 }, { "epoch": 0.7631041012914385, "grad_norm": NaN, "learning_rate": 0.0003886714208386089, "loss": 0.0, "step": 55810 }, { "epoch": 0.7632408337948056, "grad_norm": NaN, "learning_rate": 0.00038862177517964375, "loss": 0.0, "step": 55820 }, { "epoch": 0.7633775662981725, "grad_norm": NaN, "learning_rate": 0.0003885721216259781, "loss": 0.0, "step": 55830 }, { "epoch": 0.7635142988015396, "grad_norm": NaN, "learning_rate": 0.0003885224601804398, "loss": 0.0, "step": 55840 }, { "epoch": 0.7636510313049066, "grad_norm": NaN, "learning_rate": 0.0003884727908458572, "loss": 0.0, "step": 55850 }, { "epoch": 0.7637877638082737, "grad_norm": NaN, "learning_rate": 0.0003884231136250589, "loss": 0.0, "step": 55860 }, { "epoch": 0.7639244963116407, "grad_norm": NaN, "learning_rate": 0.0003883734285208742, "loss": 0.0, "step": 55870 }, { "epoch": 0.7640612288150077, "grad_norm": NaN, "learning_rate": 0.00038832373553613275, "loss": 0.0, "step": 55880 }, { "epoch": 0.7641979613183748, "grad_norm": NaN, "learning_rate": 0.0003882740346736645, "loss": 0.0, "step": 55890 }, { "epoch": 0.7643346938217418, "grad_norm": NaN, "learning_rate": 0.0003882243259363002, "loss": 0.0, "step": 55900 }, { "epoch": 0.7644714263251089, "grad_norm": NaN, "learning_rate": 0.00038817460932687067, "loss": 0.0, "step": 55910 }, { "epoch": 0.7646081588284759, "grad_norm": NaN, "learning_rate": 0.0003881248848482073, "loss": 0.0, "step": 55920 }, { "epoch": 0.764744891331843, "grad_norm": NaN, "learning_rate": 0.000388075152503142, "loss": 0.0, "step": 55930 }, { "epoch": 0.76488162383521, "grad_norm": NaN, "learning_rate": 0.0003880254122945072, "loss": 0.0, "step": 55940 }, { "epoch": 0.7650183563385771, "grad_norm": NaN, "learning_rate": 0.0003879756642251356, "loss": 0.0, "step": 55950 }, { "epoch": 0.765155088841944, "grad_norm": NaN, "learning_rate": 0.00038792590829786044, "loss": 0.0, "step": 55960 }, { "epoch": 0.7652918213453112, "grad_norm": NaN, "learning_rate": 0.00038787614451551533, "loss": 0.0, "step": 55970 }, { "epoch": 0.7654285538486781, "grad_norm": NaN, "learning_rate": 0.0003878263728809345, "loss": 0.0, "step": 55980 }, { "epoch": 0.7655652863520451, "grad_norm": NaN, "learning_rate": 0.0003877765933969524, "loss": 0.0, "step": 55990 }, { "epoch": 0.7657020188554122, "grad_norm": NaN, "learning_rate": 0.0003877268060664041, "loss": 0.0, "step": 56000 }, { "epoch": 0.7658387513587792, "grad_norm": NaN, "learning_rate": 0.00038767701089212505, "loss": 0.0, "step": 56010 }, { "epoch": 0.7659754838621463, "grad_norm": NaN, "learning_rate": 0.00038762720787695115, "loss": 0.0, "step": 56020 }, { "epoch": 0.7661122163655133, "grad_norm": NaN, "learning_rate": 0.0003875773970237187, "loss": 0.0, "step": 56030 }, { "epoch": 0.7662489488688804, "grad_norm": NaN, "learning_rate": 0.00038752757833526453, "loss": 0.0, "step": 56040 }, { "epoch": 0.7663856813722474, "grad_norm": NaN, "learning_rate": 0.000387477751814426, "loss": 0.0, "step": 56050 }, { "epoch": 0.7665224138756145, "grad_norm": NaN, "learning_rate": 0.0003874279174640406, "loss": 0.0, "step": 56060 }, { "epoch": 0.7666591463789815, "grad_norm": NaN, "learning_rate": 0.00038737807528694656, "loss": 0.0, "step": 56070 }, { "epoch": 0.7667958788823486, "grad_norm": NaN, "learning_rate": 0.00038732822528598246, "loss": 0.0, "step": 56080 }, { "epoch": 0.7669326113857156, "grad_norm": NaN, "learning_rate": 0.0003872783674639873, "loss": 0.0, "step": 56090 }, { "epoch": 0.7670693438890825, "grad_norm": NaN, "learning_rate": 0.00038722850182380065, "loss": 0.0, "step": 56100 }, { "epoch": 0.7672060763924496, "grad_norm": NaN, "learning_rate": 0.00038717862836826233, "loss": 0.0, "step": 56110 }, { "epoch": 0.7673428088958166, "grad_norm": NaN, "learning_rate": 0.0003871287471002127, "loss": 0.0, "step": 56120 }, { "epoch": 0.7674795413991837, "grad_norm": NaN, "learning_rate": 0.0003870788580224926, "loss": 0.0, "step": 56130 }, { "epoch": 0.7676162739025507, "grad_norm": NaN, "learning_rate": 0.0003870289611379433, "loss": 0.0, "step": 56140 }, { "epoch": 0.7677530064059178, "grad_norm": NaN, "learning_rate": 0.0003869790564494065, "loss": 0.0, "step": 56150 }, { "epoch": 0.7678897389092848, "grad_norm": NaN, "learning_rate": 0.0003869291439597242, "loss": 0.0, "step": 56160 }, { "epoch": 0.7680264714126519, "grad_norm": NaN, "learning_rate": 0.00038687922367173914, "loss": 0.0, "step": 56170 }, { "epoch": 0.7681632039160189, "grad_norm": NaN, "learning_rate": 0.0003868292955882943, "loss": 0.0, "step": 56180 }, { "epoch": 0.768299936419386, "grad_norm": NaN, "learning_rate": 0.00038677935971223316, "loss": 0.0, "step": 56190 }, { "epoch": 0.768436668922753, "grad_norm": NaN, "learning_rate": 0.00038672941604639963, "loss": 0.0, "step": 56200 }, { "epoch": 0.76857340142612, "grad_norm": NaN, "learning_rate": 0.00038667946459363805, "loss": 0.0, "step": 56210 }, { "epoch": 0.7687101339294871, "grad_norm": NaN, "learning_rate": 0.00038662950535679324, "loss": 0.0, "step": 56220 }, { "epoch": 0.768846866432854, "grad_norm": NaN, "learning_rate": 0.00038657953833871054, "loss": 0.0, "step": 56230 }, { "epoch": 0.7689835989362211, "grad_norm": NaN, "learning_rate": 0.00038652956354223543, "loss": 0.0, "step": 56240 }, { "epoch": 0.7691203314395881, "grad_norm": NaN, "learning_rate": 0.0003864795809702143, "loss": 0.0, "step": 56250 }, { "epoch": 0.7692570639429552, "grad_norm": NaN, "learning_rate": 0.0003864295906254935, "loss": 0.0, "step": 56260 }, { "epoch": 0.7693937964463222, "grad_norm": NaN, "learning_rate": 0.00038637959251092016, "loss": 0.0, "step": 56270 }, { "epoch": 0.7695305289496893, "grad_norm": NaN, "learning_rate": 0.00038632958662934176, "loss": 0.0, "step": 56280 }, { "epoch": 0.7696672614530563, "grad_norm": NaN, "learning_rate": 0.0003862795729836061, "loss": 0.0, "step": 56290 }, { "epoch": 0.7698039939564234, "grad_norm": NaN, "learning_rate": 0.00038622955157656164, "loss": 0.0, "step": 56300 }, { "epoch": 0.7699407264597904, "grad_norm": NaN, "learning_rate": 0.0003861795224110571, "loss": 0.0, "step": 56310 }, { "epoch": 0.7700774589631574, "grad_norm": NaN, "learning_rate": 0.00038612948548994173, "loss": 0.0, "step": 56320 }, { "epoch": 0.7702141914665245, "grad_norm": NaN, "learning_rate": 0.0003860794408160653, "loss": 0.0, "step": 56330 }, { "epoch": 0.7703509239698915, "grad_norm": NaN, "learning_rate": 0.0003860293883922777, "loss": 0.0, "step": 56340 }, { "epoch": 0.7704876564732586, "grad_norm": NaN, "learning_rate": 0.0003859793282214297, "loss": 0.0, "step": 56350 }, { "epoch": 0.7706243889766256, "grad_norm": NaN, "learning_rate": 0.0003859292603063721, "loss": 0.0, "step": 56360 }, { "epoch": 0.7707611214799927, "grad_norm": NaN, "learning_rate": 0.00038587918464995645, "loss": 0.0, "step": 56370 }, { "epoch": 0.7708978539833596, "grad_norm": NaN, "learning_rate": 0.00038582910125503465, "loss": 0.0, "step": 56380 }, { "epoch": 0.7710345864867267, "grad_norm": NaN, "learning_rate": 0.00038577901012445904, "loss": 0.0, "step": 56390 }, { "epoch": 0.7711713189900937, "grad_norm": NaN, "learning_rate": 0.0003857289112610823, "loss": 0.0, "step": 56400 }, { "epoch": 0.7713080514934608, "grad_norm": NaN, "learning_rate": 0.00038567880466775764, "loss": 0.0, "step": 56410 }, { "epoch": 0.7714447839968278, "grad_norm": NaN, "learning_rate": 0.0003856286903473387, "loss": 0.0, "step": 56420 }, { "epoch": 0.7715815165001948, "grad_norm": NaN, "learning_rate": 0.0003855785683026796, "loss": 0.0, "step": 56430 }, { "epoch": 0.7717182490035619, "grad_norm": NaN, "learning_rate": 0.0003855284385366348, "loss": 0.0, "step": 56440 }, { "epoch": 0.7718549815069289, "grad_norm": NaN, "learning_rate": 0.0003854783010520593, "loss": 0.0, "step": 56450 }, { "epoch": 0.771991714010296, "grad_norm": NaN, "learning_rate": 0.00038542815585180845, "loss": 0.0, "step": 56460 }, { "epoch": 0.772128446513663, "grad_norm": NaN, "learning_rate": 0.0003853780029387382, "loss": 0.0, "step": 56470 }, { "epoch": 0.7722651790170301, "grad_norm": NaN, "learning_rate": 0.00038532784231570474, "loss": 0.0, "step": 56480 }, { "epoch": 0.772401911520397, "grad_norm": NaN, "learning_rate": 0.0003852776739855648, "loss": 0.0, "step": 56490 }, { "epoch": 0.7725386440237642, "grad_norm": NaN, "learning_rate": 0.0003852274979511755, "loss": 0.0, "step": 56500 }, { "epoch": 0.7726753765271311, "grad_norm": NaN, "learning_rate": 0.0003851773142153945, "loss": 0.0, "step": 56510 }, { "epoch": 0.7728121090304982, "grad_norm": NaN, "learning_rate": 0.0003851271227810798, "loss": 0.0, "step": 56520 }, { "epoch": 0.7729488415338652, "grad_norm": NaN, "learning_rate": 0.0003850769236510899, "loss": 0.0, "step": 56530 }, { "epoch": 0.7730855740372322, "grad_norm": NaN, "learning_rate": 0.0003850267168282836, "loss": 0.0, "step": 56540 }, { "epoch": 0.7732223065405993, "grad_norm": NaN, "learning_rate": 0.0003849765023155204, "loss": 0.0, "step": 56550 }, { "epoch": 0.7733590390439663, "grad_norm": NaN, "learning_rate": 0.00038492628011566, "loss": 0.0, "step": 56560 }, { "epoch": 0.7734957715473334, "grad_norm": NaN, "learning_rate": 0.0003848760502315627, "loss": 0.0, "step": 56570 }, { "epoch": 0.7736325040507004, "grad_norm": NaN, "learning_rate": 0.00038482581266608903, "loss": 0.0, "step": 56580 }, { "epoch": 0.7737692365540675, "grad_norm": NaN, "learning_rate": 0.0003847755674221002, "loss": 0.0, "step": 56590 }, { "epoch": 0.7739059690574345, "grad_norm": NaN, "learning_rate": 0.00038472531450245765, "loss": 0.0, "step": 56600 }, { "epoch": 0.7740427015608016, "grad_norm": NaN, "learning_rate": 0.00038467505391002343, "loss": 0.0, "step": 56610 }, { "epoch": 0.7741794340641686, "grad_norm": NaN, "learning_rate": 0.0003846247856476599, "loss": 0.0, "step": 56620 }, { "epoch": 0.7743161665675355, "grad_norm": NaN, "learning_rate": 0.00038457450971823, "loss": 0.0, "step": 56630 }, { "epoch": 0.7744528990709026, "grad_norm": NaN, "learning_rate": 0.00038452422612459693, "loss": 0.0, "step": 56640 }, { "epoch": 0.7745896315742696, "grad_norm": NaN, "learning_rate": 0.00038447393486962434, "loss": 0.0, "step": 56650 }, { "epoch": 0.7747263640776367, "grad_norm": NaN, "learning_rate": 0.00038442363595617656, "loss": 0.0, "step": 56660 }, { "epoch": 0.7748630965810037, "grad_norm": NaN, "learning_rate": 0.0003843733293871181, "loss": 0.0, "step": 56670 }, { "epoch": 0.7749998290843708, "grad_norm": NaN, "learning_rate": 0.000384323015165314, "loss": 0.0, "step": 56680 }, { "epoch": 0.7751365615877378, "grad_norm": NaN, "learning_rate": 0.0003842726932936297, "loss": 0.0, "step": 56690 }, { "epoch": 0.7752732940911049, "grad_norm": NaN, "learning_rate": 0.000384222363774931, "loss": 0.0, "step": 56700 }, { "epoch": 0.7754100265944719, "grad_norm": NaN, "learning_rate": 0.0003841720266120845, "loss": 0.0, "step": 56710 }, { "epoch": 0.775546759097839, "grad_norm": NaN, "learning_rate": 0.0003841216818079567, "loss": 0.0, "step": 56720 }, { "epoch": 0.775683491601206, "grad_norm": NaN, "learning_rate": 0.000384071329365415, "loss": 0.0, "step": 56730 }, { "epoch": 0.775820224104573, "grad_norm": NaN, "learning_rate": 0.0003840209692873269, "loss": 0.0, "step": 56740 }, { "epoch": 0.7759569566079401, "grad_norm": NaN, "learning_rate": 0.0003839706015765605, "loss": 0.0, "step": 56750 }, { "epoch": 0.776093689111307, "grad_norm": NaN, "learning_rate": 0.0003839202262359844, "loss": 0.0, "step": 56760 }, { "epoch": 0.7762304216146741, "grad_norm": NaN, "learning_rate": 0.00038386984326846754, "loss": 0.0, "step": 56770 }, { "epoch": 0.7763671541180411, "grad_norm": NaN, "learning_rate": 0.0003838194526768792, "loss": 0.0, "step": 56780 }, { "epoch": 0.7765038866214082, "grad_norm": NaN, "learning_rate": 0.00038376905446408917, "loss": 0.0, "step": 56790 }, { "epoch": 0.7766406191247752, "grad_norm": NaN, "learning_rate": 0.0003837186486329679, "loss": 0.0, "step": 56800 }, { "epoch": 0.7767773516281423, "grad_norm": NaN, "learning_rate": 0.0003836682351863858, "loss": 0.0, "step": 56810 }, { "epoch": 0.7769140841315093, "grad_norm": NaN, "learning_rate": 0.00038361781412721426, "loss": 0.0, "step": 56820 }, { "epoch": 0.7770508166348764, "grad_norm": NaN, "learning_rate": 0.0003835673854583246, "loss": 0.0, "step": 56830 }, { "epoch": 0.7771875491382434, "grad_norm": NaN, "learning_rate": 0.00038351694918258895, "loss": 0.0, "step": 56840 }, { "epoch": 0.7773242816416104, "grad_norm": NaN, "learning_rate": 0.00038346650530287973, "loss": 0.0, "step": 56850 }, { "epoch": 0.7774610141449775, "grad_norm": NaN, "learning_rate": 0.00038341605382206955, "loss": 0.0, "step": 56860 }, { "epoch": 0.7775977466483445, "grad_norm": NaN, "learning_rate": 0.00038336559474303204, "loss": 0.0, "step": 56870 }, { "epoch": 0.7777344791517116, "grad_norm": NaN, "learning_rate": 0.0003833151280686407, "loss": 0.0, "step": 56880 }, { "epoch": 0.7778712116550786, "grad_norm": NaN, "learning_rate": 0.00038326465380176974, "loss": 0.0, "step": 56890 }, { "epoch": 0.7780079441584457, "grad_norm": NaN, "learning_rate": 0.00038321417194529366, "loss": 0.0, "step": 56900 }, { "epoch": 0.7781446766618126, "grad_norm": NaN, "learning_rate": 0.00038316368250208756, "loss": 0.0, "step": 56910 }, { "epoch": 0.7782814091651797, "grad_norm": NaN, "learning_rate": 0.0003831131854750268, "loss": 0.0, "step": 56920 }, { "epoch": 0.7784181416685467, "grad_norm": NaN, "learning_rate": 0.0003830626808669874, "loss": 0.0, "step": 56930 }, { "epoch": 0.7785548741719138, "grad_norm": NaN, "learning_rate": 0.00038301216868084556, "loss": 0.0, "step": 56940 }, { "epoch": 0.7786916066752808, "grad_norm": NaN, "learning_rate": 0.00038296164891947807, "loss": 0.0, "step": 56950 }, { "epoch": 0.7788283391786478, "grad_norm": NaN, "learning_rate": 0.000382911121585762, "loss": 0.0, "step": 56960 }, { "epoch": 0.7789650716820149, "grad_norm": NaN, "learning_rate": 0.000382860586682575, "loss": 0.0, "step": 56970 }, { "epoch": 0.7791018041853819, "grad_norm": NaN, "learning_rate": 0.0003828100442127952, "loss": 0.0, "step": 56980 }, { "epoch": 0.779238536688749, "grad_norm": NaN, "learning_rate": 0.0003827594941793009, "loss": 0.0, "step": 56990 }, { "epoch": 0.779375269192116, "grad_norm": NaN, "learning_rate": 0.0003827089365849711, "loss": 0.0, "step": 57000 }, { "epoch": 0.7795120016954831, "grad_norm": NaN, "learning_rate": 0.00038265837143268514, "loss": 0.0, "step": 57010 }, { "epoch": 0.7796487341988501, "grad_norm": NaN, "learning_rate": 0.0003826077987253226, "loss": 0.0, "step": 57020 }, { "epoch": 0.7797854667022172, "grad_norm": NaN, "learning_rate": 0.000382557218465764, "loss": 0.0, "step": 57030 }, { "epoch": 0.7799221992055841, "grad_norm": NaN, "learning_rate": 0.0003825066306568896, "loss": 0.0, "step": 57040 }, { "epoch": 0.7800589317089512, "grad_norm": NaN, "learning_rate": 0.0003824560353015806, "loss": 0.0, "step": 57050 }, { "epoch": 0.7801956642123182, "grad_norm": NaN, "learning_rate": 0.0003824054324027185, "loss": 0.0, "step": 57060 }, { "epoch": 0.7803323967156852, "grad_norm": NaN, "learning_rate": 0.0003823548219631853, "loss": 0.0, "step": 57070 }, { "epoch": 0.7804691292190523, "grad_norm": NaN, "learning_rate": 0.00038230420398586306, "loss": 0.0, "step": 57080 }, { "epoch": 0.7806058617224193, "grad_norm": NaN, "learning_rate": 0.0003822535784736348, "loss": 0.0, "step": 57090 }, { "epoch": 0.7807425942257864, "grad_norm": NaN, "learning_rate": 0.00038220294542938354, "loss": 0.0, "step": 57100 }, { "epoch": 0.7808793267291534, "grad_norm": NaN, "learning_rate": 0.000382152304855993, "loss": 0.0, "step": 57110 }, { "epoch": 0.7810160592325205, "grad_norm": NaN, "learning_rate": 0.0003821016567563472, "loss": 0.0, "step": 57120 }, { "epoch": 0.7811527917358875, "grad_norm": NaN, "learning_rate": 0.00038205100113333063, "loss": 0.0, "step": 57130 }, { "epoch": 0.7812895242392546, "grad_norm": NaN, "learning_rate": 0.0003820003379898282, "loss": 0.0, "step": 57140 }, { "epoch": 0.7814262567426216, "grad_norm": NaN, "learning_rate": 0.0003819496673287252, "loss": 0.0, "step": 57150 }, { "epoch": 0.7815629892459887, "grad_norm": NaN, "learning_rate": 0.00038189898915290746, "loss": 0.0, "step": 57160 }, { "epoch": 0.7816997217493556, "grad_norm": NaN, "learning_rate": 0.0003818483034652611, "loss": 0.0, "step": 57170 }, { "epoch": 0.7818364542527226, "grad_norm": NaN, "learning_rate": 0.00038179761026867274, "loss": 0.0, "step": 57180 }, { "epoch": 0.7819731867560897, "grad_norm": NaN, "learning_rate": 0.0003817469095660295, "loss": 0.0, "step": 57190 }, { "epoch": 0.7821099192594567, "grad_norm": NaN, "learning_rate": 0.00038169620136021886, "loss": 0.0, "step": 57200 }, { "epoch": 0.7822466517628238, "grad_norm": NaN, "learning_rate": 0.0003816454856541286, "loss": 0.0, "step": 57210 }, { "epoch": 0.7823833842661908, "grad_norm": NaN, "learning_rate": 0.00038159476245064723, "loss": 0.0, "step": 57220 }, { "epoch": 0.7825201167695579, "grad_norm": NaN, "learning_rate": 0.0003815440317526633, "loss": 0.0, "step": 57230 }, { "epoch": 0.7826568492729249, "grad_norm": NaN, "learning_rate": 0.0003814932935630662, "loss": 0.0, "step": 57240 }, { "epoch": 0.782793581776292, "grad_norm": NaN, "learning_rate": 0.0003814425478847454, "loss": 0.0, "step": 57250 }, { "epoch": 0.782930314279659, "grad_norm": NaN, "learning_rate": 0.000381391794720591, "loss": 0.0, "step": 57260 }, { "epoch": 0.7830670467830261, "grad_norm": NaN, "learning_rate": 0.0003813410340734934, "loss": 0.0, "step": 57270 }, { "epoch": 0.7832037792863931, "grad_norm": NaN, "learning_rate": 0.0003812902659463435, "loss": 0.0, "step": 57280 }, { "epoch": 0.78334051178976, "grad_norm": NaN, "learning_rate": 0.0003812394903420328, "loss": 0.0, "step": 57290 }, { "epoch": 0.7834772442931272, "grad_norm": NaN, "learning_rate": 0.00038118870726345274, "loss": 0.0, "step": 57300 }, { "epoch": 0.7836139767964941, "grad_norm": NaN, "learning_rate": 0.0003811379167134956, "loss": 0.0, "step": 57310 }, { "epoch": 0.7837507092998612, "grad_norm": NaN, "learning_rate": 0.00038108711869505406, "loss": 0.0, "step": 57320 }, { "epoch": 0.7838874418032282, "grad_norm": NaN, "learning_rate": 0.00038103631321102105, "loss": 0.0, "step": 57330 }, { "epoch": 0.7840241743065953, "grad_norm": NaN, "learning_rate": 0.00038098550026429003, "loss": 0.0, "step": 57340 }, { "epoch": 0.7841609068099623, "grad_norm": NaN, "learning_rate": 0.00038093467985775487, "loss": 0.0, "step": 57350 }, { "epoch": 0.7842976393133294, "grad_norm": NaN, "learning_rate": 0.0003808838519943099, "loss": 0.0, "step": 57360 }, { "epoch": 0.7844343718166964, "grad_norm": NaN, "learning_rate": 0.0003808330166768498, "loss": 0.0, "step": 57370 }, { "epoch": 0.7845711043200635, "grad_norm": NaN, "learning_rate": 0.00038078217390826976, "loss": 0.0, "step": 57380 }, { "epoch": 0.7847078368234305, "grad_norm": NaN, "learning_rate": 0.0003807313236914652, "loss": 0.0, "step": 57390 }, { "epoch": 0.7848445693267975, "grad_norm": NaN, "learning_rate": 0.00038068046602933225, "loss": 0.0, "step": 57400 }, { "epoch": 0.7849813018301646, "grad_norm": NaN, "learning_rate": 0.0003806296009247673, "loss": 0.0, "step": 57410 }, { "epoch": 0.7851180343335316, "grad_norm": NaN, "learning_rate": 0.0003805787283806672, "loss": 0.0, "step": 57420 }, { "epoch": 0.7852547668368987, "grad_norm": NaN, "learning_rate": 0.00038052784839992916, "loss": 0.0, "step": 57430 }, { "epoch": 0.7853914993402656, "grad_norm": NaN, "learning_rate": 0.0003804769609854508, "loss": 0.0, "step": 57440 }, { "epoch": 0.7855282318436327, "grad_norm": NaN, "learning_rate": 0.00038042606614013043, "loss": 0.0, "step": 57450 }, { "epoch": 0.7856649643469997, "grad_norm": NaN, "learning_rate": 0.00038037516386686644, "loss": 0.0, "step": 57460 }, { "epoch": 0.7858016968503668, "grad_norm": NaN, "learning_rate": 0.00038032425416855776, "loss": 0.0, "step": 57470 }, { "epoch": 0.7859384293537338, "grad_norm": NaN, "learning_rate": 0.0003802733370481039, "loss": 0.0, "step": 57480 }, { "epoch": 0.7860751618571009, "grad_norm": NaN, "learning_rate": 0.0003802224125084045, "loss": 0.0, "step": 57490 }, { "epoch": 0.7862118943604679, "grad_norm": NaN, "learning_rate": 0.0003801714805523599, "loss": 0.0, "step": 57500 }, { "epoch": 0.7863486268638349, "grad_norm": NaN, "learning_rate": 0.0003801205411828707, "loss": 0.0, "step": 57510 }, { "epoch": 0.786485359367202, "grad_norm": NaN, "learning_rate": 0.00038006959440283795, "loss": 0.0, "step": 57520 }, { "epoch": 0.786622091870569, "grad_norm": NaN, "learning_rate": 0.0003800186402151632, "loss": 0.0, "step": 57530 }, { "epoch": 0.7867588243739361, "grad_norm": NaN, "learning_rate": 0.0003799676786227483, "loss": 0.0, "step": 57540 }, { "epoch": 0.7868955568773031, "grad_norm": NaN, "learning_rate": 0.0003799167096284956, "loss": 0.0, "step": 57550 }, { "epoch": 0.7870322893806702, "grad_norm": NaN, "learning_rate": 0.00037986573323530793, "loss": 0.0, "step": 57560 }, { "epoch": 0.7871690218840371, "grad_norm": NaN, "learning_rate": 0.00037981474944608827, "loss": 0.0, "step": 57570 }, { "epoch": 0.7873057543874042, "grad_norm": NaN, "learning_rate": 0.00037976375826374046, "loss": 0.0, "step": 57580 }, { "epoch": 0.7874424868907712, "grad_norm": NaN, "learning_rate": 0.00037971275969116836, "loss": 0.0, "step": 57590 }, { "epoch": 0.7875792193941383, "grad_norm": NaN, "learning_rate": 0.00037966175373127634, "loss": 0.0, "step": 57600 }, { "epoch": 0.7877159518975053, "grad_norm": NaN, "learning_rate": 0.0003796107403869695, "loss": 0.0, "step": 57610 }, { "epoch": 0.7878526844008723, "grad_norm": NaN, "learning_rate": 0.000379559719661153, "loss": 0.0, "step": 57620 }, { "epoch": 0.7879894169042394, "grad_norm": NaN, "learning_rate": 0.0003795086915567324, "loss": 0.0, "step": 57630 }, { "epoch": 0.7881261494076064, "grad_norm": NaN, "learning_rate": 0.0003794576560766141, "loss": 0.0, "step": 57640 }, { "epoch": 0.7882628819109735, "grad_norm": NaN, "learning_rate": 0.0003794066132237044, "loss": 0.0, "step": 57650 }, { "epoch": 0.7883996144143405, "grad_norm": NaN, "learning_rate": 0.0003793555630009103, "loss": 0.0, "step": 57660 }, { "epoch": 0.7885363469177076, "grad_norm": NaN, "learning_rate": 0.0003793045054111394, "loss": 0.0, "step": 57670 }, { "epoch": 0.7886730794210746, "grad_norm": NaN, "learning_rate": 0.00037925344045729913, "loss": 0.0, "step": 57680 }, { "epoch": 0.7888098119244417, "grad_norm": NaN, "learning_rate": 0.0003792023681422981, "loss": 0.0, "step": 57690 }, { "epoch": 0.7889465444278086, "grad_norm": NaN, "learning_rate": 0.0003791512884690446, "loss": 0.0, "step": 57700 }, { "epoch": 0.7890832769311757, "grad_norm": NaN, "learning_rate": 0.000379100201440448, "loss": 0.0, "step": 57710 }, { "epoch": 0.7892200094345427, "grad_norm": NaN, "learning_rate": 0.00037904910705941755, "loss": 0.0, "step": 57720 }, { "epoch": 0.7893567419379097, "grad_norm": NaN, "learning_rate": 0.00037899800532886313, "loss": 0.0, "step": 57730 }, { "epoch": 0.7894934744412768, "grad_norm": NaN, "learning_rate": 0.0003789468962516952, "loss": 0.0, "step": 57740 }, { "epoch": 0.7896302069446438, "grad_norm": NaN, "learning_rate": 0.00037889577983082445, "loss": 0.0, "step": 57750 }, { "epoch": 0.7897669394480109, "grad_norm": NaN, "learning_rate": 0.000378844656069162, "loss": 0.0, "step": 57760 }, { "epoch": 0.7899036719513779, "grad_norm": NaN, "learning_rate": 0.0003787935249696194, "loss": 0.0, "step": 57770 }, { "epoch": 0.790040404454745, "grad_norm": NaN, "learning_rate": 0.0003787423865351087, "loss": 0.0, "step": 57780 }, { "epoch": 0.790177136958112, "grad_norm": NaN, "learning_rate": 0.0003786912407685423, "loss": 0.0, "step": 57790 }, { "epoch": 0.7903138694614791, "grad_norm": NaN, "learning_rate": 0.00037864008767283304, "loss": 0.0, "step": 57800 }, { "epoch": 0.7904506019648461, "grad_norm": NaN, "learning_rate": 0.000378588927250894, "loss": 0.0, "step": 57810 }, { "epoch": 0.7905873344682132, "grad_norm": NaN, "learning_rate": 0.00037853775950563907, "loss": 0.0, "step": 57820 }, { "epoch": 0.7907240669715802, "grad_norm": NaN, "learning_rate": 0.0003784865844399821, "loss": 0.0, "step": 57830 }, { "epoch": 0.7908607994749471, "grad_norm": NaN, "learning_rate": 0.0003784354020568377, "loss": 0.0, "step": 57840 }, { "epoch": 0.7909975319783142, "grad_norm": NaN, "learning_rate": 0.0003783842123591208, "loss": 0.0, "step": 57850 }, { "epoch": 0.7911342644816812, "grad_norm": NaN, "learning_rate": 0.0003783330153497466, "loss": 0.0, "step": 57860 }, { "epoch": 0.7912709969850483, "grad_norm": NaN, "learning_rate": 0.0003782818110316311, "loss": 0.0, "step": 57870 }, { "epoch": 0.7914077294884153, "grad_norm": NaN, "learning_rate": 0.0003782305994076901, "loss": 0.0, "step": 57880 }, { "epoch": 0.7915444619917824, "grad_norm": NaN, "learning_rate": 0.00037817938048084033, "loss": 0.0, "step": 57890 }, { "epoch": 0.7916811944951494, "grad_norm": NaN, "learning_rate": 0.00037812815425399893, "loss": 0.0, "step": 57900 }, { "epoch": 0.7918179269985165, "grad_norm": NaN, "learning_rate": 0.00037807692073008314, "loss": 0.0, "step": 57910 }, { "epoch": 0.7919546595018835, "grad_norm": NaN, "learning_rate": 0.00037802567991201074, "loss": 0.0, "step": 57920 }, { "epoch": 0.7920913920052506, "grad_norm": NaN, "learning_rate": 0.0003779744318027002, "loss": 0.0, "step": 57930 }, { "epoch": 0.7922281245086176, "grad_norm": NaN, "learning_rate": 0.0003779231764050699, "loss": 0.0, "step": 57940 }, { "epoch": 0.7923648570119846, "grad_norm": NaN, "learning_rate": 0.000377871913722039, "loss": 0.0, "step": 57950 }, { "epoch": 0.7925015895153517, "grad_norm": NaN, "learning_rate": 0.00037782064375652705, "loss": 0.0, "step": 57960 }, { "epoch": 0.7926383220187186, "grad_norm": NaN, "learning_rate": 0.0003777693665114538, "loss": 0.0, "step": 57970 }, { "epoch": 0.7927750545220857, "grad_norm": NaN, "learning_rate": 0.0003777180819897398, "loss": 0.0, "step": 57980 }, { "epoch": 0.7929117870254527, "grad_norm": NaN, "learning_rate": 0.0003776667901943055, "loss": 0.0, "step": 57990 }, { "epoch": 0.7930485195288198, "grad_norm": NaN, "learning_rate": 0.0003776154911280722, "loss": 0.0, "step": 58000 }, { "epoch": 0.7931852520321868, "grad_norm": NaN, "learning_rate": 0.0003775641847939614, "loss": 0.0, "step": 58010 }, { "epoch": 0.7933219845355539, "grad_norm": NaN, "learning_rate": 0.000377512871194895, "loss": 0.0, "step": 58020 }, { "epoch": 0.7934587170389209, "grad_norm": NaN, "learning_rate": 0.00037746155033379557, "loss": 0.0, "step": 58030 }, { "epoch": 0.7935954495422879, "grad_norm": NaN, "learning_rate": 0.0003774102222135858, "loss": 0.0, "step": 58040 }, { "epoch": 0.793732182045655, "grad_norm": NaN, "learning_rate": 0.00037735888683718886, "loss": 0.0, "step": 58050 }, { "epoch": 0.793868914549022, "grad_norm": NaN, "learning_rate": 0.00037730754420752844, "loss": 0.0, "step": 58060 }, { "epoch": 0.7940056470523891, "grad_norm": NaN, "learning_rate": 0.00037725619432752843, "loss": 0.0, "step": 58070 }, { "epoch": 0.7941423795557561, "grad_norm": NaN, "learning_rate": 0.00037720483720011345, "loss": 0.0, "step": 58080 }, { "epoch": 0.7942791120591232, "grad_norm": NaN, "learning_rate": 0.0003771534728282083, "loss": 0.0, "step": 58090 }, { "epoch": 0.7944158445624901, "grad_norm": NaN, "learning_rate": 0.0003771021012147382, "loss": 0.0, "step": 58100 }, { "epoch": 0.7945525770658572, "grad_norm": NaN, "learning_rate": 0.00037705072236262895, "loss": 0.0, "step": 58110 }, { "epoch": 0.7946893095692242, "grad_norm": NaN, "learning_rate": 0.0003769993362748065, "loss": 0.0, "step": 58120 }, { "epoch": 0.7948260420725913, "grad_norm": NaN, "learning_rate": 0.00037694794295419745, "loss": 0.0, "step": 58130 }, { "epoch": 0.7949627745759583, "grad_norm": NaN, "learning_rate": 0.00037689654240372875, "loss": 0.0, "step": 58140 }, { "epoch": 0.7950995070793253, "grad_norm": NaN, "learning_rate": 0.0003768451346263275, "loss": 0.0, "step": 58150 }, { "epoch": 0.7952362395826924, "grad_norm": NaN, "learning_rate": 0.0003767937196249218, "loss": 0.0, "step": 58160 }, { "epoch": 0.7953729720860594, "grad_norm": NaN, "learning_rate": 0.0003767422974024396, "loss": 0.0, "step": 58170 }, { "epoch": 0.7955097045894265, "grad_norm": NaN, "learning_rate": 0.0003766908679618094, "loss": 0.0, "step": 58180 }, { "epoch": 0.7956464370927935, "grad_norm": NaN, "learning_rate": 0.0003766394313059604, "loss": 0.0, "step": 58190 }, { "epoch": 0.7957831695961606, "grad_norm": NaN, "learning_rate": 0.00037658798743782183, "loss": 0.0, "step": 58200 }, { "epoch": 0.7959199020995276, "grad_norm": NaN, "learning_rate": 0.00037653653636032347, "loss": 0.0, "step": 58210 }, { "epoch": 0.7960566346028947, "grad_norm": NaN, "learning_rate": 0.00037648507807639563, "loss": 0.0, "step": 58220 }, { "epoch": 0.7961933671062617, "grad_norm": NaN, "learning_rate": 0.0003764336125889688, "loss": 0.0, "step": 58230 }, { "epoch": 0.7963300996096287, "grad_norm": NaN, "learning_rate": 0.0003763821399009742, "loss": 0.0, "step": 58240 }, { "epoch": 0.7964668321129957, "grad_norm": NaN, "learning_rate": 0.00037633066001534313, "loss": 0.0, "step": 58250 }, { "epoch": 0.7966035646163627, "grad_norm": NaN, "learning_rate": 0.0003762791729350074, "loss": 0.0, "step": 58260 }, { "epoch": 0.7967402971197298, "grad_norm": NaN, "learning_rate": 0.0003762276786628994, "loss": 0.0, "step": 58270 }, { "epoch": 0.7968770296230968, "grad_norm": NaN, "learning_rate": 0.0003761761772019517, "loss": 0.0, "step": 58280 }, { "epoch": 0.7970137621264639, "grad_norm": NaN, "learning_rate": 0.00037612466855509744, "loss": 0.0, "step": 58290 }, { "epoch": 0.7971504946298309, "grad_norm": NaN, "learning_rate": 0.0003760731527252701, "loss": 0.0, "step": 58300 }, { "epoch": 0.797287227133198, "grad_norm": NaN, "learning_rate": 0.00037602162971540356, "loss": 0.0, "step": 58310 }, { "epoch": 0.797423959636565, "grad_norm": NaN, "learning_rate": 0.0003759700995284321, "loss": 0.0, "step": 58320 }, { "epoch": 0.7975606921399321, "grad_norm": NaN, "learning_rate": 0.00037591856216729046, "loss": 0.0, "step": 58330 }, { "epoch": 0.7976974246432991, "grad_norm": NaN, "learning_rate": 0.0003758670176349138, "loss": 0.0, "step": 58340 }, { "epoch": 0.7978341571466662, "grad_norm": NaN, "learning_rate": 0.00037581546593423764, "loss": 0.0, "step": 58350 }, { "epoch": 0.7979708896500332, "grad_norm": NaN, "learning_rate": 0.00037576390706819783, "loss": 0.0, "step": 58360 }, { "epoch": 0.7981076221534001, "grad_norm": NaN, "learning_rate": 0.0003757123410397309, "loss": 0.0, "step": 58370 }, { "epoch": 0.7982443546567672, "grad_norm": NaN, "learning_rate": 0.0003756607678517734, "loss": 0.0, "step": 58380 }, { "epoch": 0.7983810871601342, "grad_norm": NaN, "learning_rate": 0.00037560918750726265, "loss": 0.0, "step": 58390 }, { "epoch": 0.7985178196635013, "grad_norm": NaN, "learning_rate": 0.0003755576000091361, "loss": 0.0, "step": 58400 }, { "epoch": 0.7986545521668683, "grad_norm": NaN, "learning_rate": 0.00037550600536033187, "loss": 0.0, "step": 58410 }, { "epoch": 0.7987912846702354, "grad_norm": NaN, "learning_rate": 0.00037545440356378824, "loss": 0.0, "step": 58420 }, { "epoch": 0.7989280171736024, "grad_norm": NaN, "learning_rate": 0.00037540279462244396, "loss": 0.0, "step": 58430 }, { "epoch": 0.7990647496769695, "grad_norm": NaN, "learning_rate": 0.0003753511785392383, "loss": 0.0, "step": 58440 }, { "epoch": 0.7992014821803365, "grad_norm": NaN, "learning_rate": 0.0003752995553171109, "loss": 0.0, "step": 58450 }, { "epoch": 0.7993382146837036, "grad_norm": NaN, "learning_rate": 0.0003752479249590018, "loss": 0.0, "step": 58460 }, { "epoch": 0.7994749471870706, "grad_norm": NaN, "learning_rate": 0.0003751962874678513, "loss": 0.0, "step": 58470 }, { "epoch": 0.7996116796904376, "grad_norm": NaN, "learning_rate": 0.0003751446428466003, "loss": 0.0, "step": 58480 }, { "epoch": 0.7997484121938047, "grad_norm": NaN, "learning_rate": 0.00037509299109818995, "loss": 0.0, "step": 58490 }, { "epoch": 0.7998851446971716, "grad_norm": NaN, "learning_rate": 0.000375041332225562, "loss": 0.0, "step": 58500 }, { "epoch": 0.8000218772005387, "grad_norm": NaN, "learning_rate": 0.00037498966623165854, "loss": 0.0, "step": 58510 }, { "epoch": 0.8001586097039057, "grad_norm": NaN, "learning_rate": 0.00037493799311942176, "loss": 0.0, "step": 58520 }, { "epoch": 0.8002953422072728, "grad_norm": NaN, "learning_rate": 0.00037488631289179483, "loss": 0.0, "step": 58530 }, { "epoch": 0.8004320747106398, "grad_norm": NaN, "learning_rate": 0.0003748346255517208, "loss": 0.0, "step": 58540 }, { "epoch": 0.8005688072140069, "grad_norm": NaN, "learning_rate": 0.0003747829311021434, "loss": 0.0, "step": 58550 }, { "epoch": 0.8007055397173739, "grad_norm": NaN, "learning_rate": 0.00037473122954600666, "loss": 0.0, "step": 58560 }, { "epoch": 0.800842272220741, "grad_norm": NaN, "learning_rate": 0.00037467952088625513, "loss": 0.0, "step": 58570 }, { "epoch": 0.800979004724108, "grad_norm": NaN, "learning_rate": 0.0003746278051258336, "loss": 0.0, "step": 58580 }, { "epoch": 0.801115737227475, "grad_norm": NaN, "learning_rate": 0.00037457608226768745, "loss": 0.0, "step": 58590 }, { "epoch": 0.8012524697308421, "grad_norm": NaN, "learning_rate": 0.0003745243523147623, "loss": 0.0, "step": 58600 }, { "epoch": 0.8013892022342091, "grad_norm": NaN, "learning_rate": 0.00037447261527000437, "loss": 0.0, "step": 58610 }, { "epoch": 0.8015259347375762, "grad_norm": NaN, "learning_rate": 0.00037442087113635997, "loss": 0.0, "step": 58620 }, { "epoch": 0.8016626672409431, "grad_norm": NaN, "learning_rate": 0.00037436911991677605, "loss": 0.0, "step": 58630 }, { "epoch": 0.8017993997443102, "grad_norm": NaN, "learning_rate": 0.0003743173616142, "loss": 0.0, "step": 58640 }, { "epoch": 0.8019361322476772, "grad_norm": NaN, "learning_rate": 0.0003742655962315794, "loss": 0.0, "step": 58650 }, { "epoch": 0.8020728647510443, "grad_norm": NaN, "learning_rate": 0.00037421382377186255, "loss": 0.0, "step": 58660 }, { "epoch": 0.8022095972544113, "grad_norm": NaN, "learning_rate": 0.0003741620442379977, "loss": 0.0, "step": 58670 }, { "epoch": 0.8023463297577784, "grad_norm": NaN, "learning_rate": 0.000374110257632934, "loss": 0.0, "step": 58680 }, { "epoch": 0.8024830622611454, "grad_norm": NaN, "learning_rate": 0.00037405846395962064, "loss": 0.0, "step": 58690 }, { "epoch": 0.8026197947645124, "grad_norm": NaN, "learning_rate": 0.0003740066632210073, "loss": 0.0, "step": 58700 }, { "epoch": 0.8027565272678795, "grad_norm": NaN, "learning_rate": 0.0003739548554200443, "loss": 0.0, "step": 58710 }, { "epoch": 0.8028932597712465, "grad_norm": NaN, "learning_rate": 0.000373903040559682, "loss": 0.0, "step": 58720 }, { "epoch": 0.8030299922746136, "grad_norm": NaN, "learning_rate": 0.00037385121864287135, "loss": 0.0, "step": 58730 }, { "epoch": 0.8031667247779806, "grad_norm": NaN, "learning_rate": 0.00037379938967256376, "loss": 0.0, "step": 58740 }, { "epoch": 0.8033034572813477, "grad_norm": NaN, "learning_rate": 0.00037374755365171086, "loss": 0.0, "step": 58750 }, { "epoch": 0.8034401897847147, "grad_norm": NaN, "learning_rate": 0.0003736957105832648, "loss": 0.0, "step": 58760 }, { "epoch": 0.8035769222880818, "grad_norm": NaN, "learning_rate": 0.0003736438604701782, "loss": 0.0, "step": 58770 }, { "epoch": 0.8037136547914487, "grad_norm": NaN, "learning_rate": 0.0003735920033154039, "loss": 0.0, "step": 58780 }, { "epoch": 0.8038503872948158, "grad_norm": NaN, "learning_rate": 0.00037354013912189527, "loss": 0.0, "step": 58790 }, { "epoch": 0.8039871197981828, "grad_norm": NaN, "learning_rate": 0.0003734882678926061, "loss": 0.0, "step": 58800 }, { "epoch": 0.8041238523015498, "grad_norm": NaN, "learning_rate": 0.0003734363896304904, "loss": 0.0, "step": 58810 }, { "epoch": 0.8042605848049169, "grad_norm": NaN, "learning_rate": 0.0003733845043385028, "loss": 0.0, "step": 58820 }, { "epoch": 0.8043973173082839, "grad_norm": NaN, "learning_rate": 0.00037333261201959823, "loss": 0.0, "step": 58830 }, { "epoch": 0.804534049811651, "grad_norm": NaN, "learning_rate": 0.000373280712676732, "loss": 0.0, "step": 58840 }, { "epoch": 0.804670782315018, "grad_norm": NaN, "learning_rate": 0.00037322880631285993, "loss": 0.0, "step": 58850 }, { "epoch": 0.8048075148183851, "grad_norm": NaN, "learning_rate": 0.00037317689293093803, "loss": 0.0, "step": 58860 }, { "epoch": 0.8049442473217521, "grad_norm": NaN, "learning_rate": 0.000373124972533923, "loss": 0.0, "step": 58870 }, { "epoch": 0.8050809798251192, "grad_norm": NaN, "learning_rate": 0.00037307304512477157, "loss": 0.0, "step": 58880 }, { "epoch": 0.8052177123284862, "grad_norm": NaN, "learning_rate": 0.0003730211107064412, "loss": 0.0, "step": 58890 }, { "epoch": 0.8053544448318533, "grad_norm": NaN, "learning_rate": 0.00037296916928188973, "loss": 0.0, "step": 58900 }, { "epoch": 0.8054911773352202, "grad_norm": NaN, "learning_rate": 0.00037291722085407516, "loss": 0.0, "step": 58910 }, { "epoch": 0.8056279098385872, "grad_norm": NaN, "learning_rate": 0.00037286526542595596, "loss": 0.0, "step": 58920 }, { "epoch": 0.8057646423419543, "grad_norm": NaN, "learning_rate": 0.00037281330300049123, "loss": 0.0, "step": 58930 }, { "epoch": 0.8059013748453213, "grad_norm": NaN, "learning_rate": 0.00037276133358064015, "loss": 0.0, "step": 58940 }, { "epoch": 0.8060381073486884, "grad_norm": NaN, "learning_rate": 0.0003727093571693626, "loss": 0.0, "step": 58950 }, { "epoch": 0.8061748398520554, "grad_norm": NaN, "learning_rate": 0.0003726573737696186, "loss": 0.0, "step": 58960 }, { "epoch": 0.8063115723554225, "grad_norm": NaN, "learning_rate": 0.00037260538338436866, "loss": 0.0, "step": 58970 }, { "epoch": 0.8064483048587895, "grad_norm": NaN, "learning_rate": 0.0003725533860165738, "loss": 0.0, "step": 58980 }, { "epoch": 0.8065850373621566, "grad_norm": NaN, "learning_rate": 0.0003725013816691953, "loss": 0.0, "step": 58990 }, { "epoch": 0.8067217698655236, "grad_norm": NaN, "learning_rate": 0.00037244937034519484, "loss": 0.0, "step": 59000 }, { "epoch": 0.8068585023688907, "grad_norm": NaN, "learning_rate": 0.0003723973520475346, "loss": 0.0, "step": 59010 }, { "epoch": 0.8069952348722577, "grad_norm": NaN, "learning_rate": 0.00037234532677917696, "loss": 0.0, "step": 59020 }, { "epoch": 0.8071319673756246, "grad_norm": NaN, "learning_rate": 0.00037229329454308507, "loss": 0.0, "step": 59030 }, { "epoch": 0.8072686998789917, "grad_norm": NaN, "learning_rate": 0.0003722412553422221, "loss": 0.0, "step": 59040 }, { "epoch": 0.8074054323823587, "grad_norm": NaN, "learning_rate": 0.00037218920917955165, "loss": 0.0, "step": 59050 }, { "epoch": 0.8075421648857258, "grad_norm": NaN, "learning_rate": 0.00037213715605803794, "loss": 0.0, "step": 59060 }, { "epoch": 0.8076788973890928, "grad_norm": NaN, "learning_rate": 0.00037208509598064544, "loss": 0.0, "step": 59070 }, { "epoch": 0.8078156298924599, "grad_norm": NaN, "learning_rate": 0.00037203302895033916, "loss": 0.0, "step": 59080 }, { "epoch": 0.8079523623958269, "grad_norm": NaN, "learning_rate": 0.00037198095497008424, "loss": 0.0, "step": 59090 }, { "epoch": 0.808089094899194, "grad_norm": NaN, "learning_rate": 0.0003719288740428463, "loss": 0.0, "step": 59100 }, { "epoch": 0.808225827402561, "grad_norm": NaN, "learning_rate": 0.0003718767861715916, "loss": 0.0, "step": 59110 }, { "epoch": 0.8083625599059281, "grad_norm": NaN, "learning_rate": 0.00037182469135928653, "loss": 0.0, "step": 59120 }, { "epoch": 0.8084992924092951, "grad_norm": NaN, "learning_rate": 0.000371772589608898, "loss": 0.0, "step": 59130 }, { "epoch": 0.8086360249126621, "grad_norm": NaN, "learning_rate": 0.00037172048092339326, "loss": 0.0, "step": 59140 }, { "epoch": 0.8087727574160292, "grad_norm": NaN, "learning_rate": 0.0003716683653057399, "loss": 0.0, "step": 59150 }, { "epoch": 0.8089094899193962, "grad_norm": NaN, "learning_rate": 0.0003716162427589061, "loss": 0.0, "step": 59160 }, { "epoch": 0.8090462224227633, "grad_norm": NaN, "learning_rate": 0.00037156411328586025, "loss": 0.0, "step": 59170 }, { "epoch": 0.8091829549261302, "grad_norm": NaN, "learning_rate": 0.0003715119768895712, "loss": 0.0, "step": 59180 }, { "epoch": 0.8093196874294973, "grad_norm": NaN, "learning_rate": 0.0003714598335730081, "loss": 0.0, "step": 59190 }, { "epoch": 0.8094564199328643, "grad_norm": NaN, "learning_rate": 0.00037140768333914073, "loss": 0.0, "step": 59200 }, { "epoch": 0.8095931524362314, "grad_norm": NaN, "learning_rate": 0.00037135552619093904, "loss": 0.0, "step": 59210 }, { "epoch": 0.8097298849395984, "grad_norm": NaN, "learning_rate": 0.00037130336213137346, "loss": 0.0, "step": 59220 }, { "epoch": 0.8098666174429655, "grad_norm": NaN, "learning_rate": 0.00037125119116341476, "loss": 0.0, "step": 59230 }, { "epoch": 0.8100033499463325, "grad_norm": NaN, "learning_rate": 0.0003711990132900343, "loss": 0.0, "step": 59240 }, { "epoch": 0.8101400824496995, "grad_norm": NaN, "learning_rate": 0.0003711468285142035, "loss": 0.0, "step": 59250 }, { "epoch": 0.8102768149530666, "grad_norm": NaN, "learning_rate": 0.0003710946368388944, "loss": 0.0, "step": 59260 }, { "epoch": 0.8104135474564336, "grad_norm": NaN, "learning_rate": 0.0003710424382670795, "loss": 0.0, "step": 59270 }, { "epoch": 0.8105502799598007, "grad_norm": NaN, "learning_rate": 0.0003709902328017315, "loss": 0.0, "step": 59280 }, { "epoch": 0.8106870124631677, "grad_norm": NaN, "learning_rate": 0.0003709380204458236, "loss": 0.0, "step": 59290 }, { "epoch": 0.8108237449665348, "grad_norm": NaN, "learning_rate": 0.0003708858012023293, "loss": 0.0, "step": 59300 }, { "epoch": 0.8109604774699017, "grad_norm": NaN, "learning_rate": 0.00037083357507422255, "loss": 0.0, "step": 59310 }, { "epoch": 0.8110972099732688, "grad_norm": NaN, "learning_rate": 0.0003707813420644778, "loss": 0.0, "step": 59320 }, { "epoch": 0.8112339424766358, "grad_norm": NaN, "learning_rate": 0.0003707291021760698, "loss": 0.0, "step": 59330 }, { "epoch": 0.8113706749800029, "grad_norm": NaN, "learning_rate": 0.0003706768554119735, "loss": 0.0, "step": 59340 }, { "epoch": 0.8115074074833699, "grad_norm": NaN, "learning_rate": 0.0003706246017751646, "loss": 0.0, "step": 59350 }, { "epoch": 0.8116441399867369, "grad_norm": NaN, "learning_rate": 0.00037057234126861894, "loss": 0.0, "step": 59360 }, { "epoch": 0.811780872490104, "grad_norm": NaN, "learning_rate": 0.0003705200738953129, "loss": 0.0, "step": 59370 }, { "epoch": 0.811917604993471, "grad_norm": NaN, "learning_rate": 0.0003704677996582231, "loss": 0.0, "step": 59380 }, { "epoch": 0.8120543374968381, "grad_norm": NaN, "learning_rate": 0.00037041551856032665, "loss": 0.0, "step": 59390 }, { "epoch": 0.8121910700002051, "grad_norm": NaN, "learning_rate": 0.000370363230604601, "loss": 0.0, "step": 59400 }, { "epoch": 0.8123278025035722, "grad_norm": NaN, "learning_rate": 0.00037031093579402414, "loss": 0.0, "step": 59410 }, { "epoch": 0.8124645350069392, "grad_norm": NaN, "learning_rate": 0.00037025863413157423, "loss": 0.0, "step": 59420 }, { "epoch": 0.8126012675103063, "grad_norm": NaN, "learning_rate": 0.00037020632562022993, "loss": 0.0, "step": 59430 }, { "epoch": 0.8127380000136732, "grad_norm": NaN, "learning_rate": 0.0003701540102629703, "loss": 0.0, "step": 59440 }, { "epoch": 0.8128747325170403, "grad_norm": NaN, "learning_rate": 0.0003701016880627748, "loss": 0.0, "step": 59450 }, { "epoch": 0.8130114650204073, "grad_norm": NaN, "learning_rate": 0.00037004935902262316, "loss": 0.0, "step": 59460 }, { "epoch": 0.8131481975237743, "grad_norm": NaN, "learning_rate": 0.00036999702314549567, "loss": 0.0, "step": 59470 }, { "epoch": 0.8132849300271414, "grad_norm": NaN, "learning_rate": 0.0003699446804343729, "loss": 0.0, "step": 59480 }, { "epoch": 0.8134216625305084, "grad_norm": NaN, "learning_rate": 0.00036989233089223584, "loss": 0.0, "step": 59490 }, { "epoch": 0.8135583950338755, "grad_norm": NaN, "learning_rate": 0.00036983997452206597, "loss": 0.0, "step": 59500 }, { "epoch": 0.8136951275372425, "grad_norm": NaN, "learning_rate": 0.0003697876113268449, "loss": 0.0, "step": 59510 }, { "epoch": 0.8138318600406096, "grad_norm": NaN, "learning_rate": 0.00036973524130955475, "loss": 0.0, "step": 59520 }, { "epoch": 0.8139685925439766, "grad_norm": NaN, "learning_rate": 0.00036968286447317824, "loss": 0.0, "step": 59530 }, { "epoch": 0.8141053250473437, "grad_norm": NaN, "learning_rate": 0.0003696304808206983, "loss": 0.0, "step": 59540 }, { "epoch": 0.8142420575507107, "grad_norm": NaN, "learning_rate": 0.00036957809035509804, "loss": 0.0, "step": 59550 }, { "epoch": 0.8143787900540777, "grad_norm": NaN, "learning_rate": 0.0003695256930793615, "loss": 0.0, "step": 59560 }, { "epoch": 0.8145155225574447, "grad_norm": NaN, "learning_rate": 0.0003694732889964724, "loss": 0.0, "step": 59570 }, { "epoch": 0.8146522550608117, "grad_norm": NaN, "learning_rate": 0.00036942087810941545, "loss": 0.0, "step": 59580 }, { "epoch": 0.8147889875641788, "grad_norm": NaN, "learning_rate": 0.00036936846042117554, "loss": 0.0, "step": 59590 }, { "epoch": 0.8149257200675458, "grad_norm": NaN, "learning_rate": 0.0003693160359347378, "loss": 0.0, "step": 59600 }, { "epoch": 0.8150624525709129, "grad_norm": NaN, "learning_rate": 0.000369263604653088, "loss": 0.0, "step": 59610 }, { "epoch": 0.8151991850742799, "grad_norm": NaN, "learning_rate": 0.0003692111665792121, "loss": 0.0, "step": 59620 }, { "epoch": 0.815335917577647, "grad_norm": NaN, "learning_rate": 0.00036915872171609646, "loss": 0.0, "step": 59630 }, { "epoch": 0.815472650081014, "grad_norm": NaN, "learning_rate": 0.0003691062700667281, "loss": 0.0, "step": 59640 }, { "epoch": 0.8156093825843811, "grad_norm": NaN, "learning_rate": 0.000369053811634094, "loss": 0.0, "step": 59650 }, { "epoch": 0.8157461150877481, "grad_norm": NaN, "learning_rate": 0.00036900134642118177, "loss": 0.0, "step": 59660 }, { "epoch": 0.8158828475911151, "grad_norm": NaN, "learning_rate": 0.0003689488744309796, "loss": 0.0, "step": 59670 }, { "epoch": 0.8160195800944822, "grad_norm": NaN, "learning_rate": 0.0003688963956664755, "loss": 0.0, "step": 59680 }, { "epoch": 0.8161563125978492, "grad_norm": NaN, "learning_rate": 0.00036884391013065844, "loss": 0.0, "step": 59690 }, { "epoch": 0.8162930451012163, "grad_norm": NaN, "learning_rate": 0.00036879141782651746, "loss": 0.0, "step": 59700 }, { "epoch": 0.8164297776045832, "grad_norm": NaN, "learning_rate": 0.00036873891875704214, "loss": 0.0, "step": 59710 }, { "epoch": 0.8165665101079503, "grad_norm": NaN, "learning_rate": 0.00036868641292522224, "loss": 0.0, "step": 59720 }, { "epoch": 0.8167032426113173, "grad_norm": NaN, "learning_rate": 0.00036863390033404814, "loss": 0.0, "step": 59730 }, { "epoch": 0.8168399751146844, "grad_norm": NaN, "learning_rate": 0.00036858138098651056, "loss": 0.0, "step": 59740 }, { "epoch": 0.8169767076180514, "grad_norm": NaN, "learning_rate": 0.00036852885488560037, "loss": 0.0, "step": 59750 }, { "epoch": 0.8171134401214185, "grad_norm": NaN, "learning_rate": 0.00036847632203430917, "loss": 0.0, "step": 59760 }, { "epoch": 0.8172501726247855, "grad_norm": NaN, "learning_rate": 0.00036842378243562876, "loss": 0.0, "step": 59770 }, { "epoch": 0.8173869051281525, "grad_norm": NaN, "learning_rate": 0.0003683712360925512, "loss": 0.0, "step": 59780 }, { "epoch": 0.8175236376315196, "grad_norm": NaN, "learning_rate": 0.0003683186830080692, "loss": 0.0, "step": 59790 }, { "epoch": 0.8176603701348866, "grad_norm": NaN, "learning_rate": 0.0003682661231851757, "loss": 0.0, "step": 59800 }, { "epoch": 0.8177971026382537, "grad_norm": NaN, "learning_rate": 0.00036821355662686406, "loss": 0.0, "step": 59810 }, { "epoch": 0.8179338351416207, "grad_norm": NaN, "learning_rate": 0.00036816098333612804, "loss": 0.0, "step": 59820 }, { "epoch": 0.8180705676449878, "grad_norm": NaN, "learning_rate": 0.00036810840331596176, "loss": 0.0, "step": 59830 }, { "epoch": 0.8182073001483547, "grad_norm": NaN, "learning_rate": 0.00036805581656935965, "loss": 0.0, "step": 59840 }, { "epoch": 0.8183440326517218, "grad_norm": NaN, "learning_rate": 0.0003680032230993167, "loss": 0.0, "step": 59850 }, { "epoch": 0.8184807651550888, "grad_norm": NaN, "learning_rate": 0.00036795062290882806, "loss": 0.0, "step": 59860 }, { "epoch": 0.8186174976584559, "grad_norm": NaN, "learning_rate": 0.0003678980160008895, "loss": 0.0, "step": 59870 }, { "epoch": 0.8187542301618229, "grad_norm": NaN, "learning_rate": 0.000367845402378497, "loss": 0.0, "step": 59880 }, { "epoch": 0.8188909626651899, "grad_norm": NaN, "learning_rate": 0.00036779278204464695, "loss": 0.0, "step": 59890 }, { "epoch": 0.819027695168557, "grad_norm": NaN, "learning_rate": 0.00036774015500233624, "loss": 0.0, "step": 59900 }, { "epoch": 0.819164427671924, "grad_norm": NaN, "learning_rate": 0.0003676875212545619, "loss": 0.0, "step": 59910 }, { "epoch": 0.8193011601752911, "grad_norm": NaN, "learning_rate": 0.00036763488080432177, "loss": 0.0, "step": 59920 }, { "epoch": 0.8194378926786581, "grad_norm": NaN, "learning_rate": 0.00036758223365461354, "loss": 0.0, "step": 59930 }, { "epoch": 0.8195746251820252, "grad_norm": NaN, "learning_rate": 0.0003675295798084356, "loss": 0.0, "step": 59940 }, { "epoch": 0.8197113576853922, "grad_norm": NaN, "learning_rate": 0.0003674769192687866, "loss": 0.0, "step": 59950 }, { "epoch": 0.8198480901887593, "grad_norm": NaN, "learning_rate": 0.0003674242520386658, "loss": 0.0, "step": 59960 }, { "epoch": 0.8199848226921262, "grad_norm": NaN, "learning_rate": 0.0003673715781210725, "loss": 0.0, "step": 59970 }, { "epoch": 0.8201215551954933, "grad_norm": NaN, "learning_rate": 0.0003673188975190067, "loss": 0.0, "step": 59980 }, { "epoch": 0.8202582876988603, "grad_norm": NaN, "learning_rate": 0.00036726621023546856, "loss": 0.0, "step": 59990 }, { "epoch": 0.8203950202022273, "grad_norm": NaN, "learning_rate": 0.0003672135162734587, "loss": 0.0, "step": 60000 }, { "epoch": 0.8205317527055944, "grad_norm": NaN, "learning_rate": 0.0003671608156359781, "loss": 0.0, "step": 60010 }, { "epoch": 0.8206684852089614, "grad_norm": NaN, "learning_rate": 0.0003671081083260281, "loss": 0.0, "step": 60020 }, { "epoch": 0.8208052177123285, "grad_norm": NaN, "learning_rate": 0.0003670553943466105, "loss": 0.0, "step": 60030 }, { "epoch": 0.8209419502156955, "grad_norm": NaN, "learning_rate": 0.0003670026737007275, "loss": 0.0, "step": 60040 }, { "epoch": 0.8210786827190626, "grad_norm": NaN, "learning_rate": 0.0003669499463913815, "loss": 0.0, "step": 60050 }, { "epoch": 0.8212154152224296, "grad_norm": NaN, "learning_rate": 0.00036689721242157543, "loss": 0.0, "step": 60060 }, { "epoch": 0.8213521477257967, "grad_norm": NaN, "learning_rate": 0.0003668444717943125, "loss": 0.0, "step": 60070 }, { "epoch": 0.8214888802291637, "grad_norm": NaN, "learning_rate": 0.00036679172451259645, "loss": 0.0, "step": 60080 }, { "epoch": 0.8216256127325308, "grad_norm": NaN, "learning_rate": 0.0003667389705794314, "loss": 0.0, "step": 60090 }, { "epoch": 0.8217623452358978, "grad_norm": NaN, "learning_rate": 0.00036668620999782156, "loss": 0.0, "step": 60100 }, { "epoch": 0.8218990777392647, "grad_norm": NaN, "learning_rate": 0.0003666334427707718, "loss": 0.0, "step": 60110 }, { "epoch": 0.8220358102426318, "grad_norm": NaN, "learning_rate": 0.0003665806689012873, "loss": 0.0, "step": 60120 }, { "epoch": 0.8221725427459988, "grad_norm": NaN, "learning_rate": 0.0003665278883923736, "loss": 0.0, "step": 60130 }, { "epoch": 0.8223092752493659, "grad_norm": NaN, "learning_rate": 0.00036647510124703664, "loss": 0.0, "step": 60140 }, { "epoch": 0.8224460077527329, "grad_norm": NaN, "learning_rate": 0.00036642230746828264, "loss": 0.0, "step": 60150 }, { "epoch": 0.8225827402561, "grad_norm": NaN, "learning_rate": 0.00036636950705911845, "loss": 0.0, "step": 60160 }, { "epoch": 0.822719472759467, "grad_norm": NaN, "learning_rate": 0.00036631670002255093, "loss": 0.0, "step": 60170 }, { "epoch": 0.8228562052628341, "grad_norm": NaN, "learning_rate": 0.0003662638863615875, "loss": 0.0, "step": 60180 }, { "epoch": 0.8229929377662011, "grad_norm": NaN, "learning_rate": 0.00036621106607923623, "loss": 0.0, "step": 60190 }, { "epoch": 0.8231296702695682, "grad_norm": NaN, "learning_rate": 0.00036615823917850507, "loss": 0.0, "step": 60200 }, { "epoch": 0.8232664027729352, "grad_norm": NaN, "learning_rate": 0.0003661054056624026, "loss": 0.0, "step": 60210 }, { "epoch": 0.8234031352763022, "grad_norm": NaN, "learning_rate": 0.0003660525655339379, "loss": 0.0, "step": 60220 }, { "epoch": 0.8235398677796693, "grad_norm": NaN, "learning_rate": 0.00036599971879612016, "loss": 0.0, "step": 60230 }, { "epoch": 0.8236766002830362, "grad_norm": NaN, "learning_rate": 0.0003659468654519592, "loss": 0.0, "step": 60240 }, { "epoch": 0.8238133327864033, "grad_norm": NaN, "learning_rate": 0.00036589400550446496, "loss": 0.0, "step": 60250 }, { "epoch": 0.8239500652897703, "grad_norm": NaN, "learning_rate": 0.00036584113895664794, "loss": 0.0, "step": 60260 }, { "epoch": 0.8240867977931374, "grad_norm": NaN, "learning_rate": 0.000365788265811519, "loss": 0.0, "step": 60270 }, { "epoch": 0.8242235302965044, "grad_norm": NaN, "learning_rate": 0.00036573538607208924, "loss": 0.0, "step": 60280 }, { "epoch": 0.8243602627998715, "grad_norm": NaN, "learning_rate": 0.0003656824997413703, "loss": 0.0, "step": 60290 }, { "epoch": 0.8244969953032385, "grad_norm": NaN, "learning_rate": 0.0003656296068223741, "loss": 0.0, "step": 60300 }, { "epoch": 0.8246337278066056, "grad_norm": NaN, "learning_rate": 0.00036557670731811304, "loss": 0.0, "step": 60310 }, { "epoch": 0.8247704603099726, "grad_norm": NaN, "learning_rate": 0.00036552380123159976, "loss": 0.0, "step": 60320 }, { "epoch": 0.8249071928133396, "grad_norm": NaN, "learning_rate": 0.0003654708885658473, "loss": 0.0, "step": 60330 }, { "epoch": 0.8250439253167067, "grad_norm": NaN, "learning_rate": 0.00036541796932386915, "loss": 0.0, "step": 60340 }, { "epoch": 0.8251806578200737, "grad_norm": NaN, "learning_rate": 0.00036536504350867917, "loss": 0.0, "step": 60350 }, { "epoch": 0.8253173903234408, "grad_norm": NaN, "learning_rate": 0.0003653121111232916, "loss": 0.0, "step": 60360 }, { "epoch": 0.8254541228268077, "grad_norm": NaN, "learning_rate": 0.00036525917217072076, "loss": 0.0, "step": 60370 }, { "epoch": 0.8255908553301748, "grad_norm": NaN, "learning_rate": 0.00036520622665398185, "loss": 0.0, "step": 60380 }, { "epoch": 0.8257275878335418, "grad_norm": NaN, "learning_rate": 0.00036515327457609006, "loss": 0.0, "step": 60390 }, { "epoch": 0.8258643203369089, "grad_norm": NaN, "learning_rate": 0.0003651003159400612, "loss": 0.0, "step": 60400 }, { "epoch": 0.8260010528402759, "grad_norm": NaN, "learning_rate": 0.00036504735074891127, "loss": 0.0, "step": 60410 }, { "epoch": 0.826137785343643, "grad_norm": NaN, "learning_rate": 0.0003649943790056566, "loss": 0.0, "step": 60420 }, { "epoch": 0.82627451784701, "grad_norm": NaN, "learning_rate": 0.0003649414007133143, "loss": 0.0, "step": 60430 }, { "epoch": 0.826411250350377, "grad_norm": NaN, "learning_rate": 0.0003648884158749012, "loss": 0.0, "step": 60440 }, { "epoch": 0.8265479828537441, "grad_norm": NaN, "learning_rate": 0.00036483542449343507, "loss": 0.0, "step": 60450 }, { "epoch": 0.8266847153571111, "grad_norm": NaN, "learning_rate": 0.0003647824265719339, "loss": 0.0, "step": 60460 }, { "epoch": 0.8268214478604782, "grad_norm": NaN, "learning_rate": 0.0003647294221134158, "loss": 0.0, "step": 60470 }, { "epoch": 0.8269581803638452, "grad_norm": NaN, "learning_rate": 0.00036467641112089956, "loss": 0.0, "step": 60480 }, { "epoch": 0.8270949128672123, "grad_norm": NaN, "learning_rate": 0.0003646233935974042, "loss": 0.0, "step": 60490 }, { "epoch": 0.8272316453705792, "grad_norm": NaN, "learning_rate": 0.0003645703695459491, "loss": 0.0, "step": 60500 }, { "epoch": 0.8273683778739463, "grad_norm": NaN, "learning_rate": 0.0003645173389695542, "loss": 0.0, "step": 60510 }, { "epoch": 0.8275051103773133, "grad_norm": NaN, "learning_rate": 0.00036446430187123954, "loss": 0.0, "step": 60520 }, { "epoch": 0.8276418428806804, "grad_norm": NaN, "learning_rate": 0.00036441125825402567, "loss": 0.0, "step": 60530 }, { "epoch": 0.8277785753840474, "grad_norm": NaN, "learning_rate": 0.0003643582081209336, "loss": 0.0, "step": 60540 }, { "epoch": 0.8279153078874144, "grad_norm": NaN, "learning_rate": 0.00036430515147498435, "loss": 0.0, "step": 60550 }, { "epoch": 0.8280520403907815, "grad_norm": NaN, "learning_rate": 0.00036425208831919985, "loss": 0.0, "step": 60560 }, { "epoch": 0.8281887728941485, "grad_norm": NaN, "learning_rate": 0.0003641990186566019, "loss": 0.0, "step": 60570 }, { "epoch": 0.8283255053975156, "grad_norm": NaN, "learning_rate": 0.00036414594249021317, "loss": 0.0, "step": 60580 }, { "epoch": 0.8284622379008826, "grad_norm": NaN, "learning_rate": 0.0003640928598230562, "loss": 0.0, "step": 60590 }, { "epoch": 0.8285989704042497, "grad_norm": NaN, "learning_rate": 0.0003640397706581541, "loss": 0.0, "step": 60600 }, { "epoch": 0.8287357029076167, "grad_norm": NaN, "learning_rate": 0.00036398667499853044, "loss": 0.0, "step": 60610 }, { "epoch": 0.8288724354109838, "grad_norm": NaN, "learning_rate": 0.00036393357284720905, "loss": 0.0, "step": 60620 }, { "epoch": 0.8290091679143508, "grad_norm": NaN, "learning_rate": 0.00036388046420721425, "loss": 0.0, "step": 60630 }, { "epoch": 0.8291459004177179, "grad_norm": NaN, "learning_rate": 0.00036382734908157067, "loss": 0.0, "step": 60640 }, { "epoch": 0.8292826329210848, "grad_norm": NaN, "learning_rate": 0.00036377422747330325, "loss": 0.0, "step": 60650 }, { "epoch": 0.8294193654244518, "grad_norm": NaN, "learning_rate": 0.00036372109938543717, "loss": 0.0, "step": 60660 }, { "epoch": 0.8295560979278189, "grad_norm": NaN, "learning_rate": 0.00036366796482099846, "loss": 0.0, "step": 60670 }, { "epoch": 0.8296928304311859, "grad_norm": NaN, "learning_rate": 0.0003636148237830129, "loss": 0.0, "step": 60680 }, { "epoch": 0.829829562934553, "grad_norm": NaN, "learning_rate": 0.0003635616762745072, "loss": 0.0, "step": 60690 }, { "epoch": 0.82996629543792, "grad_norm": NaN, "learning_rate": 0.00036350852229850806, "loss": 0.0, "step": 60700 }, { "epoch": 0.8301030279412871, "grad_norm": NaN, "learning_rate": 0.0003634553618580426, "loss": 0.0, "step": 60710 }, { "epoch": 0.8302397604446541, "grad_norm": NaN, "learning_rate": 0.0003634021949561385, "loss": 0.0, "step": 60720 }, { "epoch": 0.8303764929480212, "grad_norm": NaN, "learning_rate": 0.00036334902159582366, "loss": 0.0, "step": 60730 }, { "epoch": 0.8305132254513882, "grad_norm": NaN, "learning_rate": 0.0003632958417801264, "loss": 0.0, "step": 60740 }, { "epoch": 0.8306499579547553, "grad_norm": NaN, "learning_rate": 0.00036324265551207534, "loss": 0.0, "step": 60750 }, { "epoch": 0.8307866904581223, "grad_norm": NaN, "learning_rate": 0.00036318946279469946, "loss": 0.0, "step": 60760 }, { "epoch": 0.8309234229614892, "grad_norm": NaN, "learning_rate": 0.00036313626363102826, "loss": 0.0, "step": 60770 }, { "epoch": 0.8310601554648563, "grad_norm": NaN, "learning_rate": 0.0003630830580240915, "loss": 0.0, "step": 60780 }, { "epoch": 0.8311968879682233, "grad_norm": NaN, "learning_rate": 0.0003630298459769192, "loss": 0.0, "step": 60790 }, { "epoch": 0.8313336204715904, "grad_norm": NaN, "learning_rate": 0.000362976627492542, "loss": 0.0, "step": 60800 }, { "epoch": 0.8314703529749574, "grad_norm": NaN, "learning_rate": 0.00036292340257399066, "loss": 0.0, "step": 60810 }, { "epoch": 0.8316070854783245, "grad_norm": NaN, "learning_rate": 0.0003628701712242965, "loss": 0.0, "step": 60820 }, { "epoch": 0.8317438179816915, "grad_norm": NaN, "learning_rate": 0.00036281693344649107, "loss": 0.0, "step": 60830 }, { "epoch": 0.8318805504850586, "grad_norm": NaN, "learning_rate": 0.0003627636892436063, "loss": 0.0, "step": 60840 }, { "epoch": 0.8320172829884256, "grad_norm": NaN, "learning_rate": 0.0003627104386186746, "loss": 0.0, "step": 60850 }, { "epoch": 0.8321540154917927, "grad_norm": NaN, "learning_rate": 0.0003626571815747286, "loss": 0.0, "step": 60860 }, { "epoch": 0.8322907479951597, "grad_norm": NaN, "learning_rate": 0.0003626039181148014, "loss": 0.0, "step": 60870 }, { "epoch": 0.8324274804985267, "grad_norm": NaN, "learning_rate": 0.0003625506482419264, "loss": 0.0, "step": 60880 }, { "epoch": 0.8325642130018938, "grad_norm": NaN, "learning_rate": 0.0003624973719591374, "loss": 0.0, "step": 60890 }, { "epoch": 0.8327009455052607, "grad_norm": NaN, "learning_rate": 0.0003624440892694685, "loss": 0.0, "step": 60900 }, { "epoch": 0.8328376780086278, "grad_norm": NaN, "learning_rate": 0.0003623908001759544, "loss": 0.0, "step": 60910 }, { "epoch": 0.8329744105119948, "grad_norm": NaN, "learning_rate": 0.0003623375046816298, "loss": 0.0, "step": 60920 }, { "epoch": 0.8331111430153619, "grad_norm": NaN, "learning_rate": 0.0003622842027895301, "loss": 0.0, "step": 60930 }, { "epoch": 0.8332478755187289, "grad_norm": NaN, "learning_rate": 0.00036223089450269073, "loss": 0.0, "step": 60940 }, { "epoch": 0.833384608022096, "grad_norm": NaN, "learning_rate": 0.0003621775798241479, "loss": 0.0, "step": 60950 }, { "epoch": 0.833521340525463, "grad_norm": NaN, "learning_rate": 0.00036212425875693777, "loss": 0.0, "step": 60960 }, { "epoch": 0.83365807302883, "grad_norm": NaN, "learning_rate": 0.00036207093130409713, "loss": 0.0, "step": 60970 }, { "epoch": 0.8337948055321971, "grad_norm": NaN, "learning_rate": 0.00036201759746866305, "loss": 0.0, "step": 60980 }, { "epoch": 0.8339315380355641, "grad_norm": NaN, "learning_rate": 0.00036196425725367295, "loss": 0.0, "step": 60990 }, { "epoch": 0.8340682705389312, "grad_norm": NaN, "learning_rate": 0.0003619109106621645, "loss": 0.0, "step": 61000 }, { "epoch": 0.8342050030422982, "grad_norm": NaN, "learning_rate": 0.0003618575576971762, "loss": 0.0, "step": 61010 }, { "epoch": 0.8343417355456653, "grad_norm": NaN, "learning_rate": 0.0003618041983617462, "loss": 0.0, "step": 61020 }, { "epoch": 0.8344784680490323, "grad_norm": NaN, "learning_rate": 0.00036175083265891365, "loss": 0.0, "step": 61030 }, { "epoch": 0.8346152005523994, "grad_norm": NaN, "learning_rate": 0.0003616974605917176, "loss": 0.0, "step": 61040 }, { "epoch": 0.8347519330557663, "grad_norm": NaN, "learning_rate": 0.00036164408216319784, "loss": 0.0, "step": 61050 }, { "epoch": 0.8348886655591334, "grad_norm": NaN, "learning_rate": 0.0003615906973763943, "loss": 0.0, "step": 61060 }, { "epoch": 0.8350253980625004, "grad_norm": NaN, "learning_rate": 0.0003615373062343472, "loss": 0.0, "step": 61070 }, { "epoch": 0.8351621305658674, "grad_norm": NaN, "learning_rate": 0.00036148390874009734, "loss": 0.0, "step": 61080 }, { "epoch": 0.8352988630692345, "grad_norm": NaN, "learning_rate": 0.0003614305048966858, "loss": 0.0, "step": 61090 }, { "epoch": 0.8354355955726015, "grad_norm": NaN, "learning_rate": 0.000361377094707154, "loss": 0.0, "step": 61100 }, { "epoch": 0.8355723280759686, "grad_norm": NaN, "learning_rate": 0.00036132367817454363, "loss": 0.0, "step": 61110 }, { "epoch": 0.8357090605793356, "grad_norm": NaN, "learning_rate": 0.00036127025530189693, "loss": 0.0, "step": 61120 }, { "epoch": 0.8358457930827027, "grad_norm": NaN, "learning_rate": 0.0003612168260922564, "loss": 0.0, "step": 61130 }, { "epoch": 0.8359825255860697, "grad_norm": NaN, "learning_rate": 0.0003611633905486648, "loss": 0.0, "step": 61140 }, { "epoch": 0.8361192580894368, "grad_norm": NaN, "learning_rate": 0.00036110994867416546, "loss": 0.0, "step": 61150 }, { "epoch": 0.8362559905928038, "grad_norm": NaN, "learning_rate": 0.000361056500471802, "loss": 0.0, "step": 61160 }, { "epoch": 0.8363927230961709, "grad_norm": NaN, "learning_rate": 0.0003610030459446183, "loss": 0.0, "step": 61170 }, { "epoch": 0.8365294555995378, "grad_norm": NaN, "learning_rate": 0.00036094958509565856, "loss": 0.0, "step": 61180 }, { "epoch": 0.8366661881029048, "grad_norm": NaN, "learning_rate": 0.00036089611792796767, "loss": 0.0, "step": 61190 }, { "epoch": 0.8368029206062719, "grad_norm": NaN, "learning_rate": 0.00036084264444459057, "loss": 0.0, "step": 61200 }, { "epoch": 0.8369396531096389, "grad_norm": NaN, "learning_rate": 0.00036078916464857264, "loss": 0.0, "step": 61210 }, { "epoch": 0.837076385613006, "grad_norm": NaN, "learning_rate": 0.0003607356785429596, "loss": 0.0, "step": 61220 }, { "epoch": 0.837213118116373, "grad_norm": NaN, "learning_rate": 0.0003606821861307976, "loss": 0.0, "step": 61230 }, { "epoch": 0.8373498506197401, "grad_norm": NaN, "learning_rate": 0.0003606286874151331, "loss": 0.0, "step": 61240 }, { "epoch": 0.8374865831231071, "grad_norm": NaN, "learning_rate": 0.00036057518239901285, "loss": 0.0, "step": 61250 }, { "epoch": 0.8376233156264742, "grad_norm": NaN, "learning_rate": 0.00036052167108548415, "loss": 0.0, "step": 61260 }, { "epoch": 0.8377600481298412, "grad_norm": NaN, "learning_rate": 0.00036046815347759453, "loss": 0.0, "step": 61270 }, { "epoch": 0.8378967806332083, "grad_norm": NaN, "learning_rate": 0.0003604146295783918, "loss": 0.0, "step": 61280 }, { "epoch": 0.8380335131365753, "grad_norm": NaN, "learning_rate": 0.0003603610993909243, "loss": 0.0, "step": 61290 }, { "epoch": 0.8381702456399422, "grad_norm": NaN, "learning_rate": 0.00036030756291824064, "loss": 0.0, "step": 61300 }, { "epoch": 0.8383069781433093, "grad_norm": NaN, "learning_rate": 0.0003602540201633897, "loss": 0.0, "step": 61310 }, { "epoch": 0.8384437106466763, "grad_norm": NaN, "learning_rate": 0.0003602004711294209, "loss": 0.0, "step": 61320 }, { "epoch": 0.8385804431500434, "grad_norm": NaN, "learning_rate": 0.00036014691581938397, "loss": 0.0, "step": 61330 }, { "epoch": 0.8387171756534104, "grad_norm": NaN, "learning_rate": 0.0003600933542363288, "loss": 0.0, "step": 61340 }, { "epoch": 0.8388539081567775, "grad_norm": NaN, "learning_rate": 0.000360039786383306, "loss": 0.0, "step": 61350 }, { "epoch": 0.8389906406601445, "grad_norm": NaN, "learning_rate": 0.0003599862122633663, "loss": 0.0, "step": 61360 }, { "epoch": 0.8391273731635116, "grad_norm": NaN, "learning_rate": 0.00035993263187956065, "loss": 0.0, "step": 61370 }, { "epoch": 0.8392641056668786, "grad_norm": NaN, "learning_rate": 0.0003598790452349407, "loss": 0.0, "step": 61380 }, { "epoch": 0.8394008381702457, "grad_norm": NaN, "learning_rate": 0.00035982545233255816, "loss": 0.0, "step": 61390 }, { "epoch": 0.8395375706736127, "grad_norm": NaN, "learning_rate": 0.00035977185317546533, "loss": 0.0, "step": 61400 }, { "epoch": 0.8396743031769797, "grad_norm": NaN, "learning_rate": 0.00035971824776671467, "loss": 0.0, "step": 61410 }, { "epoch": 0.8398110356803468, "grad_norm": NaN, "learning_rate": 0.00035966463610935913, "loss": 0.0, "step": 61420 }, { "epoch": 0.8399477681837137, "grad_norm": NaN, "learning_rate": 0.00035961101820645194, "loss": 0.0, "step": 61430 }, { "epoch": 0.8400845006870808, "grad_norm": NaN, "learning_rate": 0.0003595573940610467, "loss": 0.0, "step": 61440 }, { "epoch": 0.8402212331904478, "grad_norm": NaN, "learning_rate": 0.0003595037636761974, "loss": 0.0, "step": 61450 }, { "epoch": 0.8403579656938149, "grad_norm": NaN, "learning_rate": 0.0003594501270549584, "loss": 0.0, "step": 61460 }, { "epoch": 0.8404946981971819, "grad_norm": NaN, "learning_rate": 0.00035939648420038427, "loss": 0.0, "step": 61470 }, { "epoch": 0.840631430700549, "grad_norm": NaN, "learning_rate": 0.00035934283511553024, "loss": 0.0, "step": 61480 }, { "epoch": 0.840768163203916, "grad_norm": NaN, "learning_rate": 0.00035928917980345153, "loss": 0.0, "step": 61490 }, { "epoch": 0.8409048957072831, "grad_norm": NaN, "learning_rate": 0.0003592355182672039, "loss": 0.0, "step": 61500 }, { "epoch": 0.8410416282106501, "grad_norm": NaN, "learning_rate": 0.0003591818505098435, "loss": 0.0, "step": 61510 }, { "epoch": 0.8411783607140171, "grad_norm": NaN, "learning_rate": 0.00035912817653442673, "loss": 0.0, "step": 61520 }, { "epoch": 0.8413150932173842, "grad_norm": NaN, "learning_rate": 0.00035907449634401057, "loss": 0.0, "step": 61530 }, { "epoch": 0.8414518257207512, "grad_norm": NaN, "learning_rate": 0.00035902080994165193, "loss": 0.0, "step": 61540 }, { "epoch": 0.8415885582241183, "grad_norm": NaN, "learning_rate": 0.00035896711733040843, "loss": 0.0, "step": 61550 }, { "epoch": 0.8417252907274853, "grad_norm": NaN, "learning_rate": 0.00035891341851333796, "loss": 0.0, "step": 61560 }, { "epoch": 0.8418620232308524, "grad_norm": NaN, "learning_rate": 0.0003588597134934987, "loss": 0.0, "step": 61570 }, { "epoch": 0.8419987557342193, "grad_norm": NaN, "learning_rate": 0.00035880600227394935, "loss": 0.0, "step": 61580 }, { "epoch": 0.8421354882375864, "grad_norm": NaN, "learning_rate": 0.0003587522848577486, "loss": 0.0, "step": 61590 }, { "epoch": 0.8422722207409534, "grad_norm": NaN, "learning_rate": 0.00035869856124795584, "loss": 0.0, "step": 61600 }, { "epoch": 0.8424089532443205, "grad_norm": NaN, "learning_rate": 0.0003586448314476309, "loss": 0.0, "step": 61610 }, { "epoch": 0.8425456857476875, "grad_norm": NaN, "learning_rate": 0.00035859109545983346, "loss": 0.0, "step": 61620 }, { "epoch": 0.8426824182510545, "grad_norm": NaN, "learning_rate": 0.00035853735328762406, "loss": 0.0, "step": 61630 }, { "epoch": 0.8428191507544216, "grad_norm": NaN, "learning_rate": 0.00035848360493406333, "loss": 0.0, "step": 61640 }, { "epoch": 0.8429558832577886, "grad_norm": NaN, "learning_rate": 0.00035842985040221225, "loss": 0.0, "step": 61650 }, { "epoch": 0.8430926157611557, "grad_norm": NaN, "learning_rate": 0.0003583760896951324, "loss": 0.0, "step": 61660 }, { "epoch": 0.8432293482645227, "grad_norm": NaN, "learning_rate": 0.0003583223228158853, "loss": 0.0, "step": 61670 }, { "epoch": 0.8433660807678898, "grad_norm": NaN, "learning_rate": 0.00035826854976753317, "loss": 0.0, "step": 61680 }, { "epoch": 0.8435028132712568, "grad_norm": NaN, "learning_rate": 0.00035821477055313837, "loss": 0.0, "step": 61690 }, { "epoch": 0.8436395457746239, "grad_norm": NaN, "learning_rate": 0.0003581609851757639, "loss": 0.0, "step": 61700 }, { "epoch": 0.8437762782779908, "grad_norm": NaN, "learning_rate": 0.00035810719363847264, "loss": 0.0, "step": 61710 }, { "epoch": 0.8439130107813579, "grad_norm": NaN, "learning_rate": 0.00035805339594432826, "loss": 0.0, "step": 61720 }, { "epoch": 0.8440497432847249, "grad_norm": NaN, "learning_rate": 0.00035799959209639466, "loss": 0.0, "step": 61730 }, { "epoch": 0.8441864757880919, "grad_norm": NaN, "learning_rate": 0.0003579457820977359, "loss": 0.0, "step": 61740 }, { "epoch": 0.844323208291459, "grad_norm": NaN, "learning_rate": 0.0003578919659514166, "loss": 0.0, "step": 61750 }, { "epoch": 0.844459940794826, "grad_norm": NaN, "learning_rate": 0.0003578381436605016, "loss": 0.0, "step": 61760 }, { "epoch": 0.8445966732981931, "grad_norm": NaN, "learning_rate": 0.0003577843152280563, "loss": 0.0, "step": 61770 }, { "epoch": 0.8447334058015601, "grad_norm": NaN, "learning_rate": 0.0003577304806571462, "loss": 0.0, "step": 61780 }, { "epoch": 0.8448701383049272, "grad_norm": NaN, "learning_rate": 0.00035767663995083725, "loss": 0.0, "step": 61790 }, { "epoch": 0.8450068708082942, "grad_norm": NaN, "learning_rate": 0.0003576227931121958, "loss": 0.0, "step": 61800 }, { "epoch": 0.8451436033116613, "grad_norm": NaN, "learning_rate": 0.0003575689401442885, "loss": 0.0, "step": 61810 }, { "epoch": 0.8452803358150283, "grad_norm": NaN, "learning_rate": 0.0003575150810501823, "loss": 0.0, "step": 61820 }, { "epoch": 0.8454170683183954, "grad_norm": NaN, "learning_rate": 0.00035746121583294456, "loss": 0.0, "step": 61830 }, { "epoch": 0.8455538008217623, "grad_norm": NaN, "learning_rate": 0.00035740734449564297, "loss": 0.0, "step": 61840 }, { "epoch": 0.8456905333251293, "grad_norm": NaN, "learning_rate": 0.0003573534670413456, "loss": 0.0, "step": 61850 }, { "epoch": 0.8458272658284964, "grad_norm": NaN, "learning_rate": 0.00035729958347312083, "loss": 0.0, "step": 61860 }, { "epoch": 0.8459639983318634, "grad_norm": NaN, "learning_rate": 0.0003572456937940374, "loss": 0.0, "step": 61870 }, { "epoch": 0.8461007308352305, "grad_norm": NaN, "learning_rate": 0.0003571917980071645, "loss": 0.0, "step": 61880 }, { "epoch": 0.8462374633385975, "grad_norm": NaN, "learning_rate": 0.00035713789611557144, "loss": 0.0, "step": 61890 }, { "epoch": 0.8463741958419646, "grad_norm": NaN, "learning_rate": 0.0003570839881223281, "loss": 0.0, "step": 61900 }, { "epoch": 0.8465109283453316, "grad_norm": NaN, "learning_rate": 0.0003570300740305046, "loss": 0.0, "step": 61910 }, { "epoch": 0.8466476608486987, "grad_norm": NaN, "learning_rate": 0.0003569761538431713, "loss": 0.0, "step": 61920 }, { "epoch": 0.8467843933520657, "grad_norm": NaN, "learning_rate": 0.0003569222275633991, "loss": 0.0, "step": 61930 }, { "epoch": 0.8469211258554328, "grad_norm": NaN, "learning_rate": 0.0003568682951942592, "loss": 0.0, "step": 61940 }, { "epoch": 0.8470578583587998, "grad_norm": NaN, "learning_rate": 0.00035681435673882316, "loss": 0.0, "step": 61950 }, { "epoch": 0.8471945908621668, "grad_norm": NaN, "learning_rate": 0.00035676041220016286, "loss": 0.0, "step": 61960 }, { "epoch": 0.8473313233655339, "grad_norm": NaN, "learning_rate": 0.0003567064615813503, "loss": 0.0, "step": 61970 }, { "epoch": 0.8474680558689008, "grad_norm": NaN, "learning_rate": 0.00035665250488545837, "loss": 0.0, "step": 61980 }, { "epoch": 0.8476047883722679, "grad_norm": NaN, "learning_rate": 0.0003565985421155597, "loss": 0.0, "step": 61990 }, { "epoch": 0.8477415208756349, "grad_norm": NaN, "learning_rate": 0.0003565445732747277, "loss": 0.0, "step": 62000 }, { "epoch": 0.847878253379002, "grad_norm": NaN, "learning_rate": 0.0003564905983660359, "loss": 0.0, "step": 62010 }, { "epoch": 0.848014985882369, "grad_norm": NaN, "learning_rate": 0.0003564366173925583, "loss": 0.0, "step": 62020 }, { "epoch": 0.8481517183857361, "grad_norm": NaN, "learning_rate": 0.00035638263035736914, "loss": 0.0, "step": 62030 }, { "epoch": 0.8482884508891031, "grad_norm": NaN, "learning_rate": 0.0003563286372635431, "loss": 0.0, "step": 62040 }, { "epoch": 0.8484251833924702, "grad_norm": NaN, "learning_rate": 0.00035627463811415506, "loss": 0.0, "step": 62050 }, { "epoch": 0.8485619158958372, "grad_norm": NaN, "learning_rate": 0.0003562206329122805, "loss": 0.0, "step": 62060 }, { "epoch": 0.8486986483992042, "grad_norm": NaN, "learning_rate": 0.0003561666216609951, "loss": 0.0, "step": 62070 }, { "epoch": 0.8488353809025713, "grad_norm": NaN, "learning_rate": 0.0003561126043633746, "loss": 0.0, "step": 62080 }, { "epoch": 0.8489721134059383, "grad_norm": NaN, "learning_rate": 0.00035605858102249566, "loss": 0.0, "step": 62090 }, { "epoch": 0.8491088459093054, "grad_norm": NaN, "learning_rate": 0.0003560045516414348, "loss": 0.0, "step": 62100 }, { "epoch": 0.8492455784126723, "grad_norm": NaN, "learning_rate": 0.00035595051622326916, "loss": 0.0, "step": 62110 }, { "epoch": 0.8493823109160394, "grad_norm": NaN, "learning_rate": 0.00035589647477107615, "loss": 0.0, "step": 62120 }, { "epoch": 0.8495190434194064, "grad_norm": NaN, "learning_rate": 0.00035584242728793343, "loss": 0.0, "step": 62130 }, { "epoch": 0.8496557759227735, "grad_norm": NaN, "learning_rate": 0.0003557883737769191, "loss": 0.0, "step": 62140 }, { "epoch": 0.8497925084261405, "grad_norm": NaN, "learning_rate": 0.0003557343142411116, "loss": 0.0, "step": 62150 }, { "epoch": 0.8499292409295076, "grad_norm": NaN, "learning_rate": 0.0003556802486835897, "loss": 0.0, "step": 62160 }, { "epoch": 0.8500659734328746, "grad_norm": NaN, "learning_rate": 0.0003556261771074325, "loss": 0.0, "step": 62170 }, { "epoch": 0.8502027059362416, "grad_norm": NaN, "learning_rate": 0.00035557209951571947, "loss": 0.0, "step": 62180 }, { "epoch": 0.8503394384396087, "grad_norm": NaN, "learning_rate": 0.0003555180159115304, "loss": 0.0, "step": 62190 }, { "epoch": 0.8504761709429757, "grad_norm": NaN, "learning_rate": 0.00035546392629794534, "loss": 0.0, "step": 62200 }, { "epoch": 0.8506129034463428, "grad_norm": NaN, "learning_rate": 0.00035540983067804485, "loss": 0.0, "step": 62210 }, { "epoch": 0.8507496359497098, "grad_norm": NaN, "learning_rate": 0.0003553557290549098, "loss": 0.0, "step": 62220 }, { "epoch": 0.8508863684530769, "grad_norm": NaN, "learning_rate": 0.00035530162143162115, "loss": 0.0, "step": 62230 }, { "epoch": 0.8510231009564438, "grad_norm": NaN, "learning_rate": 0.00035524750781126065, "loss": 0.0, "step": 62240 }, { "epoch": 0.8511598334598109, "grad_norm": NaN, "learning_rate": 0.00035519338819691, "loss": 0.0, "step": 62250 }, { "epoch": 0.8512965659631779, "grad_norm": NaN, "learning_rate": 0.0003551392625916514, "loss": 0.0, "step": 62260 }, { "epoch": 0.851433298466545, "grad_norm": NaN, "learning_rate": 0.00035508513099856745, "loss": 0.0, "step": 62270 }, { "epoch": 0.851570030969912, "grad_norm": NaN, "learning_rate": 0.00035503099342074085, "loss": 0.0, "step": 62280 }, { "epoch": 0.851706763473279, "grad_norm": NaN, "learning_rate": 0.000354976849861255, "loss": 0.0, "step": 62290 }, { "epoch": 0.8518434959766461, "grad_norm": NaN, "learning_rate": 0.0003549227003231934, "loss": 0.0, "step": 62300 }, { "epoch": 0.8519802284800131, "grad_norm": NaN, "learning_rate": 0.0003548685448096399, "loss": 0.0, "step": 62310 }, { "epoch": 0.8521169609833802, "grad_norm": NaN, "learning_rate": 0.0003548143833236787, "loss": 0.0, "step": 62320 }, { "epoch": 0.8522536934867472, "grad_norm": NaN, "learning_rate": 0.00035476021586839453, "loss": 0.0, "step": 62330 }, { "epoch": 0.8523904259901143, "grad_norm": NaN, "learning_rate": 0.0003547060424468721, "loss": 0.0, "step": 62340 }, { "epoch": 0.8525271584934813, "grad_norm": NaN, "learning_rate": 0.0003546518630621968, "loss": 0.0, "step": 62350 }, { "epoch": 0.8526638909968484, "grad_norm": NaN, "learning_rate": 0.0003545976777174541, "loss": 0.0, "step": 62360 }, { "epoch": 0.8528006235002153, "grad_norm": NaN, "learning_rate": 0.00035454348641573004, "loss": 0.0, "step": 62370 }, { "epoch": 0.8529373560035823, "grad_norm": NaN, "learning_rate": 0.0003544892891601109, "loss": 0.0, "step": 62380 }, { "epoch": 0.8530740885069494, "grad_norm": NaN, "learning_rate": 0.00035443508595368314, "loss": 0.0, "step": 62390 }, { "epoch": 0.8532108210103164, "grad_norm": NaN, "learning_rate": 0.00035438087679953386, "loss": 0.0, "step": 62400 }, { "epoch": 0.8533475535136835, "grad_norm": NaN, "learning_rate": 0.00035432666170075026, "loss": 0.0, "step": 62410 }, { "epoch": 0.8534842860170505, "grad_norm": NaN, "learning_rate": 0.00035427244066042, "loss": 0.0, "step": 62420 }, { "epoch": 0.8536210185204176, "grad_norm": NaN, "learning_rate": 0.00035421821368163104, "loss": 0.0, "step": 62430 }, { "epoch": 0.8537577510237846, "grad_norm": NaN, "learning_rate": 0.0003541639807674717, "loss": 0.0, "step": 62440 }, { "epoch": 0.8538944835271517, "grad_norm": NaN, "learning_rate": 0.00035410974192103055, "loss": 0.0, "step": 62450 }, { "epoch": 0.8540312160305187, "grad_norm": NaN, "learning_rate": 0.0003540554971453967, "loss": 0.0, "step": 62460 }, { "epoch": 0.8541679485338858, "grad_norm": NaN, "learning_rate": 0.00035400124644365926, "loss": 0.0, "step": 62470 }, { "epoch": 0.8543046810372528, "grad_norm": NaN, "learning_rate": 0.00035394698981890815, "loss": 0.0, "step": 62480 }, { "epoch": 0.8544414135406198, "grad_norm": NaN, "learning_rate": 0.00035389272727423315, "loss": 0.0, "step": 62490 }, { "epoch": 0.8545781460439869, "grad_norm": NaN, "learning_rate": 0.00035383845881272456, "loss": 0.0, "step": 62500 }, { "epoch": 0.8547148785473538, "grad_norm": NaN, "learning_rate": 0.0003537841844374732, "loss": 0.0, "step": 62510 }, { "epoch": 0.8548516110507209, "grad_norm": NaN, "learning_rate": 0.00035372990415156997, "loss": 0.0, "step": 62520 }, { "epoch": 0.8549883435540879, "grad_norm": NaN, "learning_rate": 0.0003536756179581063, "loss": 0.0, "step": 62530 }, { "epoch": 0.855125076057455, "grad_norm": NaN, "learning_rate": 0.0003536213258601738, "loss": 0.0, "step": 62540 }, { "epoch": 0.855261808560822, "grad_norm": NaN, "learning_rate": 0.0003535670278608644, "loss": 0.0, "step": 62550 }, { "epoch": 0.8553985410641891, "grad_norm": NaN, "learning_rate": 0.0003535127239632705, "loss": 0.0, "step": 62560 }, { "epoch": 0.8555352735675561, "grad_norm": NaN, "learning_rate": 0.0003534584141704849, "loss": 0.0, "step": 62570 }, { "epoch": 0.8556720060709232, "grad_norm": NaN, "learning_rate": 0.00035340409848560046, "loss": 0.0, "step": 62580 }, { "epoch": 0.8558087385742902, "grad_norm": NaN, "learning_rate": 0.00035334977691171067, "loss": 0.0, "step": 62590 }, { "epoch": 0.8559454710776572, "grad_norm": NaN, "learning_rate": 0.00035329544945190906, "loss": 0.0, "step": 62600 }, { "epoch": 0.8560822035810243, "grad_norm": NaN, "learning_rate": 0.0003532411161092898, "loss": 0.0, "step": 62610 }, { "epoch": 0.8562189360843913, "grad_norm": NaN, "learning_rate": 0.00035318677688694715, "loss": 0.0, "step": 62620 }, { "epoch": 0.8563556685877584, "grad_norm": NaN, "learning_rate": 0.0003531324317879759, "loss": 0.0, "step": 62630 }, { "epoch": 0.8564924010911253, "grad_norm": NaN, "learning_rate": 0.000353078080815471, "loss": 0.0, "step": 62640 }, { "epoch": 0.8566291335944924, "grad_norm": NaN, "learning_rate": 0.00035302372397252776, "loss": 0.0, "step": 62650 }, { "epoch": 0.8567658660978594, "grad_norm": NaN, "learning_rate": 0.000352969361262242, "loss": 0.0, "step": 62660 }, { "epoch": 0.8569025986012265, "grad_norm": NaN, "learning_rate": 0.0003529149926877097, "loss": 0.0, "step": 62670 }, { "epoch": 0.8570393311045935, "grad_norm": NaN, "learning_rate": 0.0003528606182520272, "loss": 0.0, "step": 62680 }, { "epoch": 0.8571760636079606, "grad_norm": NaN, "learning_rate": 0.0003528062379582912, "loss": 0.0, "step": 62690 }, { "epoch": 0.8573127961113276, "grad_norm": NaN, "learning_rate": 0.00035275185180959877, "loss": 0.0, "step": 62700 }, { "epoch": 0.8574495286146946, "grad_norm": NaN, "learning_rate": 0.0003526974598090473, "loss": 0.0, "step": 62710 }, { "epoch": 0.8575862611180617, "grad_norm": NaN, "learning_rate": 0.00035264306195973447, "loss": 0.0, "step": 62720 }, { "epoch": 0.8577229936214287, "grad_norm": NaN, "learning_rate": 0.0003525886582647582, "loss": 0.0, "step": 62730 }, { "epoch": 0.8578597261247958, "grad_norm": NaN, "learning_rate": 0.000352534248727217, "loss": 0.0, "step": 62740 }, { "epoch": 0.8579964586281628, "grad_norm": NaN, "learning_rate": 0.00035247983335020953, "loss": 0.0, "step": 62750 }, { "epoch": 0.8581331911315299, "grad_norm": NaN, "learning_rate": 0.00035242541213683477, "loss": 0.0, "step": 62760 }, { "epoch": 0.8582699236348968, "grad_norm": NaN, "learning_rate": 0.00035237098509019216, "loss": 0.0, "step": 62770 }, { "epoch": 0.858406656138264, "grad_norm": NaN, "learning_rate": 0.00035231655221338134, "loss": 0.0, "step": 62780 }, { "epoch": 0.8585433886416309, "grad_norm": NaN, "learning_rate": 0.0003522621135095023, "loss": 0.0, "step": 62790 }, { "epoch": 0.858680121144998, "grad_norm": NaN, "learning_rate": 0.00035220766898165555, "loss": 0.0, "step": 62800 }, { "epoch": 0.858816853648365, "grad_norm": NaN, "learning_rate": 0.00035215321863294157, "loss": 0.0, "step": 62810 }, { "epoch": 0.858953586151732, "grad_norm": NaN, "learning_rate": 0.00035209876246646157, "loss": 0.0, "step": 62820 }, { "epoch": 0.8590903186550991, "grad_norm": NaN, "learning_rate": 0.0003520443004853168, "loss": 0.0, "step": 62830 }, { "epoch": 0.8592270511584661, "grad_norm": NaN, "learning_rate": 0.0003519898326926089, "loss": 0.0, "step": 62840 }, { "epoch": 0.8593637836618332, "grad_norm": NaN, "learning_rate": 0.00035193535909144, "loss": 0.0, "step": 62850 }, { "epoch": 0.8595005161652002, "grad_norm": NaN, "learning_rate": 0.0003518808796849125, "loss": 0.0, "step": 62860 }, { "epoch": 0.8596372486685673, "grad_norm": NaN, "learning_rate": 0.00035182639447612886, "loss": 0.0, "step": 62870 }, { "epoch": 0.8597739811719343, "grad_norm": NaN, "learning_rate": 0.00035177190346819226, "loss": 0.0, "step": 62880 }, { "epoch": 0.8599107136753014, "grad_norm": NaN, "learning_rate": 0.0003517174066642059, "loss": 0.0, "step": 62890 }, { "epoch": 0.8600474461786684, "grad_norm": NaN, "learning_rate": 0.0003516629040672737, "loss": 0.0, "step": 62900 }, { "epoch": 0.8601841786820354, "grad_norm": NaN, "learning_rate": 0.0003516083956804994, "loss": 0.0, "step": 62910 }, { "epoch": 0.8603209111854024, "grad_norm": NaN, "learning_rate": 0.0003515538815069874, "loss": 0.0, "step": 62920 }, { "epoch": 0.8604576436887694, "grad_norm": NaN, "learning_rate": 0.00035149936154984244, "loss": 0.0, "step": 62930 }, { "epoch": 0.8605943761921365, "grad_norm": NaN, "learning_rate": 0.00035144483581216933, "loss": 0.0, "step": 62940 }, { "epoch": 0.8607311086955035, "grad_norm": NaN, "learning_rate": 0.0003513903042970736, "loss": 0.0, "step": 62950 }, { "epoch": 0.8608678411988706, "grad_norm": NaN, "learning_rate": 0.0003513357670076607, "loss": 0.0, "step": 62960 }, { "epoch": 0.8610045737022376, "grad_norm": NaN, "learning_rate": 0.0003512812239470367, "loss": 0.0, "step": 62970 }, { "epoch": 0.8611413062056047, "grad_norm": NaN, "learning_rate": 0.000351226675118308, "loss": 0.0, "step": 62980 }, { "epoch": 0.8612780387089717, "grad_norm": NaN, "learning_rate": 0.000351172120524581, "loss": 0.0, "step": 62990 }, { "epoch": 0.8614147712123388, "grad_norm": NaN, "learning_rate": 0.00035111756016896283, "loss": 0.0, "step": 63000 }, { "epoch": 0.8615515037157058, "grad_norm": NaN, "learning_rate": 0.0003510629940545608, "loss": 0.0, "step": 63010 }, { "epoch": 0.8616882362190729, "grad_norm": NaN, "learning_rate": 0.00035100842218448234, "loss": 0.0, "step": 63020 }, { "epoch": 0.8618249687224399, "grad_norm": NaN, "learning_rate": 0.00035095384456183565, "loss": 0.0, "step": 63030 }, { "epoch": 0.8619617012258068, "grad_norm": NaN, "learning_rate": 0.0003508992611897288, "loss": 0.0, "step": 63040 }, { "epoch": 0.8620984337291739, "grad_norm": NaN, "learning_rate": 0.00035084467207127044, "loss": 0.0, "step": 63050 }, { "epoch": 0.8622351662325409, "grad_norm": NaN, "learning_rate": 0.00035079007720956954, "loss": 0.0, "step": 63060 }, { "epoch": 0.862371898735908, "grad_norm": NaN, "learning_rate": 0.0003507354766077353, "loss": 0.0, "step": 63070 }, { "epoch": 0.862508631239275, "grad_norm": NaN, "learning_rate": 0.0003506808702688773, "loss": 0.0, "step": 63080 }, { "epoch": 0.8626453637426421, "grad_norm": NaN, "learning_rate": 0.0003506262581961055, "loss": 0.0, "step": 63090 }, { "epoch": 0.8627820962460091, "grad_norm": NaN, "learning_rate": 0.00035057164039253, "loss": 0.0, "step": 63100 }, { "epoch": 0.8629188287493762, "grad_norm": NaN, "learning_rate": 0.0003505170168612615, "loss": 0.0, "step": 63110 }, { "epoch": 0.8630555612527432, "grad_norm": NaN, "learning_rate": 0.00035046238760541094, "loss": 0.0, "step": 63120 }, { "epoch": 0.8631922937561103, "grad_norm": NaN, "learning_rate": 0.0003504077526280893, "loss": 0.0, "step": 63130 }, { "epoch": 0.8633290262594773, "grad_norm": NaN, "learning_rate": 0.0003503531119324083, "loss": 0.0, "step": 63140 }, { "epoch": 0.8634657587628443, "grad_norm": NaN, "learning_rate": 0.0003502984655214798, "loss": 0.0, "step": 63150 }, { "epoch": 0.8636024912662114, "grad_norm": NaN, "learning_rate": 0.00035024381339841587, "loss": 0.0, "step": 63160 }, { "epoch": 0.8637392237695783, "grad_norm": NaN, "learning_rate": 0.0003501891555663291, "loss": 0.0, "step": 63170 }, { "epoch": 0.8638759562729454, "grad_norm": NaN, "learning_rate": 0.00035013449202833235, "loss": 0.0, "step": 63180 }, { "epoch": 0.8640126887763124, "grad_norm": NaN, "learning_rate": 0.0003500798227875387, "loss": 0.0, "step": 63190 }, { "epoch": 0.8641494212796795, "grad_norm": NaN, "learning_rate": 0.00035002514784706186, "loss": 0.0, "step": 63200 }, { "epoch": 0.8642861537830465, "grad_norm": NaN, "learning_rate": 0.0003499704672100153, "loss": 0.0, "step": 63210 }, { "epoch": 0.8644228862864136, "grad_norm": NaN, "learning_rate": 0.00034991578087951335, "loss": 0.0, "step": 63220 }, { "epoch": 0.8645596187897806, "grad_norm": NaN, "learning_rate": 0.0003498610888586705, "loss": 0.0, "step": 63230 }, { "epoch": 0.8646963512931477, "grad_norm": NaN, "learning_rate": 0.0003498063911506014, "loss": 0.0, "step": 63240 }, { "epoch": 0.8648330837965147, "grad_norm": NaN, "learning_rate": 0.0003497516877584214, "loss": 0.0, "step": 63250 }, { "epoch": 0.8649698162998817, "grad_norm": NaN, "learning_rate": 0.00034969697868524567, "loss": 0.0, "step": 63260 }, { "epoch": 0.8651065488032488, "grad_norm": NaN, "learning_rate": 0.0003496422639341902, "loss": 0.0, "step": 63270 }, { "epoch": 0.8652432813066158, "grad_norm": NaN, "learning_rate": 0.00034958754350837085, "loss": 0.0, "step": 63280 }, { "epoch": 0.8653800138099829, "grad_norm": NaN, "learning_rate": 0.00034953281741090415, "loss": 0.0, "step": 63290 }, { "epoch": 0.8655167463133498, "grad_norm": NaN, "learning_rate": 0.0003494780856449068, "loss": 0.0, "step": 63300 }, { "epoch": 0.865653478816717, "grad_norm": NaN, "learning_rate": 0.0003494233482134958, "loss": 0.0, "step": 63310 }, { "epoch": 0.8657902113200839, "grad_norm": NaN, "learning_rate": 0.0003493686051197886, "loss": 0.0, "step": 63320 }, { "epoch": 0.865926943823451, "grad_norm": NaN, "learning_rate": 0.0003493138563669029, "loss": 0.0, "step": 63330 }, { "epoch": 0.866063676326818, "grad_norm": NaN, "learning_rate": 0.00034925910195795666, "loss": 0.0, "step": 63340 }, { "epoch": 0.8662004088301851, "grad_norm": NaN, "learning_rate": 0.0003492043418960683, "loss": 0.0, "step": 63350 }, { "epoch": 0.8663371413335521, "grad_norm": NaN, "learning_rate": 0.00034914957618435634, "loss": 0.0, "step": 63360 }, { "epoch": 0.8664738738369191, "grad_norm": NaN, "learning_rate": 0.00034909480482593993, "loss": 0.0, "step": 63370 }, { "epoch": 0.8666106063402862, "grad_norm": NaN, "learning_rate": 0.0003490400278239382, "loss": 0.0, "step": 63380 }, { "epoch": 0.8667473388436532, "grad_norm": NaN, "learning_rate": 0.00034898524518147084, "loss": 0.0, "step": 63390 }, { "epoch": 0.8668840713470203, "grad_norm": NaN, "learning_rate": 0.0003489304569016579, "loss": 0.0, "step": 63400 }, { "epoch": 0.8670208038503873, "grad_norm": NaN, "learning_rate": 0.00034887566298761957, "loss": 0.0, "step": 63410 }, { "epoch": 0.8671575363537544, "grad_norm": NaN, "learning_rate": 0.00034882086344247644, "loss": 0.0, "step": 63420 }, { "epoch": 0.8672942688571214, "grad_norm": NaN, "learning_rate": 0.0003487660582693494, "loss": 0.0, "step": 63430 }, { "epoch": 0.8674310013604885, "grad_norm": NaN, "learning_rate": 0.0003487112474713597, "loss": 0.0, "step": 63440 }, { "epoch": 0.8675677338638554, "grad_norm": NaN, "learning_rate": 0.00034865643105162883, "loss": 0.0, "step": 63450 }, { "epoch": 0.8677044663672225, "grad_norm": NaN, "learning_rate": 0.00034860160901327875, "loss": 0.0, "step": 63460 }, { "epoch": 0.8678411988705895, "grad_norm": NaN, "learning_rate": 0.0003485467813594316, "loss": 0.0, "step": 63470 }, { "epoch": 0.8679779313739565, "grad_norm": NaN, "learning_rate": 0.00034849194809321, "loss": 0.0, "step": 63480 }, { "epoch": 0.8681146638773236, "grad_norm": NaN, "learning_rate": 0.00034843710921773663, "loss": 0.0, "step": 63490 }, { "epoch": 0.8682513963806906, "grad_norm": NaN, "learning_rate": 0.00034838226473613465, "loss": 0.0, "step": 63500 }, { "epoch": 0.8683881288840577, "grad_norm": NaN, "learning_rate": 0.0003483274146515277, "loss": 0.0, "step": 63510 }, { "epoch": 0.8685248613874247, "grad_norm": NaN, "learning_rate": 0.0003482725589670393, "loss": 0.0, "step": 63520 }, { "epoch": 0.8686615938907918, "grad_norm": NaN, "learning_rate": 0.0003482176976857937, "loss": 0.0, "step": 63530 }, { "epoch": 0.8687983263941588, "grad_norm": NaN, "learning_rate": 0.00034816283081091546, "loss": 0.0, "step": 63540 }, { "epoch": 0.8689350588975259, "grad_norm": NaN, "learning_rate": 0.000348107958345529, "loss": 0.0, "step": 63550 }, { "epoch": 0.8690717914008929, "grad_norm": NaN, "learning_rate": 0.00034805308029275977, "loss": 0.0, "step": 63560 }, { "epoch": 0.86920852390426, "grad_norm": NaN, "learning_rate": 0.0003479981966557329, "loss": 0.0, "step": 63570 }, { "epoch": 0.8693452564076269, "grad_norm": NaN, "learning_rate": 0.00034794330743757407, "loss": 0.0, "step": 63580 }, { "epoch": 0.8694819889109939, "grad_norm": NaN, "learning_rate": 0.0003478884126414094, "loss": 0.0, "step": 63590 }, { "epoch": 0.869618721414361, "grad_norm": NaN, "learning_rate": 0.0003478335122703652, "loss": 0.0, "step": 63600 }, { "epoch": 0.869755453917728, "grad_norm": NaN, "learning_rate": 0.0003477786063275681, "loss": 0.0, "step": 63610 }, { "epoch": 0.8698921864210951, "grad_norm": NaN, "learning_rate": 0.00034772369481614513, "loss": 0.0, "step": 63620 }, { "epoch": 0.8700289189244621, "grad_norm": NaN, "learning_rate": 0.0003476687777392234, "loss": 0.0, "step": 63630 }, { "epoch": 0.8701656514278292, "grad_norm": NaN, "learning_rate": 0.0003476138550999308, "loss": 0.0, "step": 63640 }, { "epoch": 0.8703023839311962, "grad_norm": NaN, "learning_rate": 0.000347558926901395, "loss": 0.0, "step": 63650 }, { "epoch": 0.8704391164345633, "grad_norm": NaN, "learning_rate": 0.0003475039931467443, "loss": 0.0, "step": 63660 }, { "epoch": 0.8705758489379303, "grad_norm": NaN, "learning_rate": 0.00034744905383910727, "loss": 0.0, "step": 63670 }, { "epoch": 0.8707125814412974, "grad_norm": NaN, "learning_rate": 0.00034739410898161285, "loss": 0.0, "step": 63680 }, { "epoch": 0.8708493139446644, "grad_norm": NaN, "learning_rate": 0.00034733915857739015, "loss": 0.0, "step": 63690 }, { "epoch": 0.8709860464480313, "grad_norm": NaN, "learning_rate": 0.00034728420262956873, "loss": 0.0, "step": 63700 }, { "epoch": 0.8711227789513984, "grad_norm": NaN, "learning_rate": 0.0003472292411412783, "loss": 0.0, "step": 63710 }, { "epoch": 0.8712595114547654, "grad_norm": NaN, "learning_rate": 0.0003471742741156491, "loss": 0.0, "step": 63720 }, { "epoch": 0.8713962439581325, "grad_norm": NaN, "learning_rate": 0.0003471193015558114, "loss": 0.0, "step": 63730 }, { "epoch": 0.8715329764614995, "grad_norm": NaN, "learning_rate": 0.00034706432346489626, "loss": 0.0, "step": 63740 }, { "epoch": 0.8716697089648666, "grad_norm": NaN, "learning_rate": 0.0003470093398460345, "loss": 0.0, "step": 63750 }, { "epoch": 0.8718064414682336, "grad_norm": NaN, "learning_rate": 0.0003469543507023576, "loss": 0.0, "step": 63760 }, { "epoch": 0.8719431739716007, "grad_norm": NaN, "learning_rate": 0.0003468993560369973, "loss": 0.0, "step": 63770 }, { "epoch": 0.8720799064749677, "grad_norm": NaN, "learning_rate": 0.00034684435585308557, "loss": 0.0, "step": 63780 }, { "epoch": 0.8722166389783347, "grad_norm": NaN, "learning_rate": 0.00034678935015375475, "loss": 0.0, "step": 63790 }, { "epoch": 0.8723533714817018, "grad_norm": NaN, "learning_rate": 0.00034673433894213756, "loss": 0.0, "step": 63800 }, { "epoch": 0.8724901039850688, "grad_norm": NaN, "learning_rate": 0.0003466793222213669, "loss": 0.0, "step": 63810 }, { "epoch": 0.8726268364884359, "grad_norm": NaN, "learning_rate": 0.000346624299994576, "loss": 0.0, "step": 63820 }, { "epoch": 0.8727635689918029, "grad_norm": NaN, "learning_rate": 0.00034656927226489864, "loss": 0.0, "step": 63830 }, { "epoch": 0.87290030149517, "grad_norm": NaN, "learning_rate": 0.00034651423903546847, "loss": 0.0, "step": 63840 }, { "epoch": 0.8730370339985369, "grad_norm": NaN, "learning_rate": 0.0003464592003094199, "loss": 0.0, "step": 63850 }, { "epoch": 0.873173766501904, "grad_norm": NaN, "learning_rate": 0.0003464041560898874, "loss": 0.0, "step": 63860 }, { "epoch": 0.873310499005271, "grad_norm": NaN, "learning_rate": 0.0003463491063800058, "loss": 0.0, "step": 63870 }, { "epoch": 0.8734472315086381, "grad_norm": NaN, "learning_rate": 0.00034629405118291026, "loss": 0.0, "step": 63880 }, { "epoch": 0.8735839640120051, "grad_norm": NaN, "learning_rate": 0.00034623899050173626, "loss": 0.0, "step": 63890 }, { "epoch": 0.8737206965153721, "grad_norm": NaN, "learning_rate": 0.00034618392433961956, "loss": 0.0, "step": 63900 }, { "epoch": 0.8738574290187392, "grad_norm": NaN, "learning_rate": 0.0003461288526996963, "loss": 0.0, "step": 63910 }, { "epoch": 0.8739941615221062, "grad_norm": NaN, "learning_rate": 0.00034607377558510283, "loss": 0.0, "step": 63920 }, { "epoch": 0.8741308940254733, "grad_norm": NaN, "learning_rate": 0.0003460186929989759, "loss": 0.0, "step": 63930 }, { "epoch": 0.8742676265288403, "grad_norm": NaN, "learning_rate": 0.0003459636049444525, "loss": 0.0, "step": 63940 }, { "epoch": 0.8744043590322074, "grad_norm": NaN, "learning_rate": 0.00034590851142467, "loss": 0.0, "step": 63950 }, { "epoch": 0.8745410915355744, "grad_norm": NaN, "learning_rate": 0.00034585341244276616, "loss": 0.0, "step": 63960 }, { "epoch": 0.8746778240389415, "grad_norm": NaN, "learning_rate": 0.00034579830800187875, "loss": 0.0, "step": 63970 }, { "epoch": 0.8748145565423084, "grad_norm": NaN, "learning_rate": 0.0003457431981051461, "loss": 0.0, "step": 63980 }, { "epoch": 0.8749512890456755, "grad_norm": NaN, "learning_rate": 0.0003456880827557069, "loss": 0.0, "step": 63990 }, { "epoch": 0.8750880215490425, "grad_norm": NaN, "learning_rate": 0.0003456329619566999, "loss": 0.0, "step": 64000 }, { "epoch": 0.8752247540524095, "grad_norm": NaN, "learning_rate": 0.0003455778357112644, "loss": 0.0, "step": 64010 }, { "epoch": 0.8753614865557766, "grad_norm": NaN, "learning_rate": 0.00034552270402253994, "loss": 0.0, "step": 64020 }, { "epoch": 0.8754982190591436, "grad_norm": NaN, "learning_rate": 0.00034546756689366625, "loss": 0.0, "step": 64030 }, { "epoch": 0.8756349515625107, "grad_norm": NaN, "learning_rate": 0.00034541242432778354, "loss": 0.0, "step": 64040 }, { "epoch": 0.8757716840658777, "grad_norm": NaN, "learning_rate": 0.00034535727632803215, "loss": 0.0, "step": 64050 }, { "epoch": 0.8759084165692448, "grad_norm": NaN, "learning_rate": 0.00034530212289755305, "loss": 0.0, "step": 64060 }, { "epoch": 0.8760451490726118, "grad_norm": NaN, "learning_rate": 0.00034524696403948703, "loss": 0.0, "step": 64070 }, { "epoch": 0.8761818815759789, "grad_norm": NaN, "learning_rate": 0.00034519179975697555, "loss": 0.0, "step": 64080 }, { "epoch": 0.8763186140793459, "grad_norm": NaN, "learning_rate": 0.0003451366300531605, "loss": 0.0, "step": 64090 }, { "epoch": 0.876455346582713, "grad_norm": NaN, "learning_rate": 0.0003450814549311836, "loss": 0.0, "step": 64100 }, { "epoch": 0.87659207908608, "grad_norm": NaN, "learning_rate": 0.00034502627439418734, "loss": 0.0, "step": 64110 }, { "epoch": 0.8767288115894469, "grad_norm": NaN, "learning_rate": 0.00034497108844531424, "loss": 0.0, "step": 64120 }, { "epoch": 0.876865544092814, "grad_norm": NaN, "learning_rate": 0.0003449158970877071, "loss": 0.0, "step": 64130 }, { "epoch": 0.877002276596181, "grad_norm": NaN, "learning_rate": 0.0003448607003245094, "loss": 0.0, "step": 64140 }, { "epoch": 0.8771390090995481, "grad_norm": NaN, "learning_rate": 0.0003448054981588645, "loss": 0.0, "step": 64150 }, { "epoch": 0.8772757416029151, "grad_norm": NaN, "learning_rate": 0.00034475029059391623, "loss": 0.0, "step": 64160 }, { "epoch": 0.8774124741062822, "grad_norm": NaN, "learning_rate": 0.0003446950776328088, "loss": 0.0, "step": 64170 }, { "epoch": 0.8775492066096492, "grad_norm": NaN, "learning_rate": 0.0003446398592786866, "loss": 0.0, "step": 64180 }, { "epoch": 0.8776859391130163, "grad_norm": NaN, "learning_rate": 0.0003445846355346945, "loss": 0.0, "step": 64190 }, { "epoch": 0.8778226716163833, "grad_norm": NaN, "learning_rate": 0.00034452940640397747, "loss": 0.0, "step": 64200 }, { "epoch": 0.8779594041197504, "grad_norm": NaN, "learning_rate": 0.00034447417188968087, "loss": 0.0, "step": 64210 }, { "epoch": 0.8780961366231174, "grad_norm": NaN, "learning_rate": 0.00034441893199495054, "loss": 0.0, "step": 64220 }, { "epoch": 0.8782328691264844, "grad_norm": NaN, "learning_rate": 0.0003443636867229324, "loss": 0.0, "step": 64230 }, { "epoch": 0.8783696016298514, "grad_norm": NaN, "learning_rate": 0.00034430843607677255, "loss": 0.0, "step": 64240 }, { "epoch": 0.8785063341332184, "grad_norm": NaN, "learning_rate": 0.0003442531800596178, "loss": 0.0, "step": 64250 }, { "epoch": 0.8786430666365855, "grad_norm": NaN, "learning_rate": 0.00034419791867461503, "loss": 0.0, "step": 64260 }, { "epoch": 0.8787797991399525, "grad_norm": NaN, "learning_rate": 0.0003441426519249114, "loss": 0.0, "step": 64270 }, { "epoch": 0.8789165316433196, "grad_norm": NaN, "learning_rate": 0.00034408737981365446, "loss": 0.0, "step": 64280 }, { "epoch": 0.8790532641466866, "grad_norm": NaN, "learning_rate": 0.00034403210234399203, "loss": 0.0, "step": 64290 }, { "epoch": 0.8791899966500537, "grad_norm": NaN, "learning_rate": 0.00034397681951907225, "loss": 0.0, "step": 64300 }, { "epoch": 0.8793267291534207, "grad_norm": NaN, "learning_rate": 0.0003439215313420434, "loss": 0.0, "step": 64310 }, { "epoch": 0.8794634616567878, "grad_norm": NaN, "learning_rate": 0.00034386623781605446, "loss": 0.0, "step": 64320 }, { "epoch": 0.8796001941601548, "grad_norm": NaN, "learning_rate": 0.0003438109389442544, "loss": 0.0, "step": 64330 }, { "epoch": 0.8797369266635218, "grad_norm": NaN, "learning_rate": 0.00034375563472979233, "loss": 0.0, "step": 64340 }, { "epoch": 0.8798736591668889, "grad_norm": NaN, "learning_rate": 0.0003437003251758183, "loss": 0.0, "step": 64350 }, { "epoch": 0.8800103916702559, "grad_norm": NaN, "learning_rate": 0.000343645010285482, "loss": 0.0, "step": 64360 }, { "epoch": 0.880147124173623, "grad_norm": NaN, "learning_rate": 0.0003435896900619337, "loss": 0.0, "step": 64370 }, { "epoch": 0.8802838566769899, "grad_norm": NaN, "learning_rate": 0.00034353436450832407, "loss": 0.0, "step": 64380 }, { "epoch": 0.880420589180357, "grad_norm": NaN, "learning_rate": 0.00034347903362780393, "loss": 0.0, "step": 64390 }, { "epoch": 0.880557321683724, "grad_norm": NaN, "learning_rate": 0.0003434236974235244, "loss": 0.0, "step": 64400 }, { "epoch": 0.8806940541870911, "grad_norm": NaN, "learning_rate": 0.0003433683558986371, "loss": 0.0, "step": 64410 }, { "epoch": 0.8808307866904581, "grad_norm": NaN, "learning_rate": 0.0003433130090562935, "loss": 0.0, "step": 64420 }, { "epoch": 0.8809675191938252, "grad_norm": NaN, "learning_rate": 0.00034325765689964603, "loss": 0.0, "step": 64430 }, { "epoch": 0.8811042516971922, "grad_norm": NaN, "learning_rate": 0.0003432022994318469, "loss": 0.0, "step": 64440 }, { "epoch": 0.8812409842005592, "grad_norm": NaN, "learning_rate": 0.0003431469366560488, "loss": 0.0, "step": 64450 }, { "epoch": 0.8813777167039263, "grad_norm": NaN, "learning_rate": 0.0003430915685754048, "loss": 0.0, "step": 64460 }, { "epoch": 0.8815144492072933, "grad_norm": NaN, "learning_rate": 0.000343036195193068, "loss": 0.0, "step": 64470 }, { "epoch": 0.8816511817106604, "grad_norm": NaN, "learning_rate": 0.00034298081651219217, "loss": 0.0, "step": 64480 }, { "epoch": 0.8817879142140274, "grad_norm": NaN, "learning_rate": 0.0003429254325359311, "loss": 0.0, "step": 64490 }, { "epoch": 0.8819246467173945, "grad_norm": NaN, "learning_rate": 0.00034287004326743906, "loss": 0.0, "step": 64500 }, { "epoch": 0.8820613792207614, "grad_norm": NaN, "learning_rate": 0.0003428146487098705, "loss": 0.0, "step": 64510 }, { "epoch": 0.8821981117241285, "grad_norm": NaN, "learning_rate": 0.00034275924886638024, "loss": 0.0, "step": 64520 }, { "epoch": 0.8823348442274955, "grad_norm": NaN, "learning_rate": 0.0003427038437401233, "loss": 0.0, "step": 64530 }, { "epoch": 0.8824715767308626, "grad_norm": NaN, "learning_rate": 0.00034264843333425523, "loss": 0.0, "step": 64540 }, { "epoch": 0.8826083092342296, "grad_norm": NaN, "learning_rate": 0.0003425930176519315, "loss": 0.0, "step": 64550 }, { "epoch": 0.8827450417375966, "grad_norm": NaN, "learning_rate": 0.00034253759669630843, "loss": 0.0, "step": 64560 }, { "epoch": 0.8828817742409637, "grad_norm": NaN, "learning_rate": 0.00034248217047054205, "loss": 0.0, "step": 64570 }, { "epoch": 0.8830185067443307, "grad_norm": NaN, "learning_rate": 0.000342426738977789, "loss": 0.0, "step": 64580 }, { "epoch": 0.8831552392476978, "grad_norm": NaN, "learning_rate": 0.00034237130222120635, "loss": 0.0, "step": 64590 }, { "epoch": 0.8832919717510648, "grad_norm": NaN, "learning_rate": 0.000342315860203951, "loss": 0.0, "step": 64600 }, { "epoch": 0.8834287042544319, "grad_norm": NaN, "learning_rate": 0.00034226041292918075, "loss": 0.0, "step": 64610 }, { "epoch": 0.8835654367577989, "grad_norm": NaN, "learning_rate": 0.00034220496040005325, "loss": 0.0, "step": 64620 }, { "epoch": 0.883702169261166, "grad_norm": NaN, "learning_rate": 0.0003421495026197265, "loss": 0.0, "step": 64630 }, { "epoch": 0.883838901764533, "grad_norm": NaN, "learning_rate": 0.0003420940395913592, "loss": 0.0, "step": 64640 }, { "epoch": 0.8839756342679, "grad_norm": NaN, "learning_rate": 0.00034203857131810987, "loss": 0.0, "step": 64650 }, { "epoch": 0.884112366771267, "grad_norm": NaN, "learning_rate": 0.0003419830978031374, "loss": 0.0, "step": 64660 }, { "epoch": 0.884249099274634, "grad_norm": NaN, "learning_rate": 0.00034192761904960125, "loss": 0.0, "step": 64670 }, { "epoch": 0.8843858317780011, "grad_norm": NaN, "learning_rate": 0.00034187213506066085, "loss": 0.0, "step": 64680 }, { "epoch": 0.8845225642813681, "grad_norm": NaN, "learning_rate": 0.00034181664583947624, "loss": 0.0, "step": 64690 }, { "epoch": 0.8846592967847352, "grad_norm": NaN, "learning_rate": 0.00034176115138920754, "loss": 0.0, "step": 64700 }, { "epoch": 0.8847960292881022, "grad_norm": NaN, "learning_rate": 0.00034170565171301524, "loss": 0.0, "step": 64710 }, { "epoch": 0.8849327617914693, "grad_norm": NaN, "learning_rate": 0.00034165014681406013, "loss": 0.0, "step": 64720 }, { "epoch": 0.8850694942948363, "grad_norm": NaN, "learning_rate": 0.00034159463669550325, "loss": 0.0, "step": 64730 }, { "epoch": 0.8852062267982034, "grad_norm": NaN, "learning_rate": 0.00034153912136050614, "loss": 0.0, "step": 64740 }, { "epoch": 0.8853429593015704, "grad_norm": NaN, "learning_rate": 0.00034148360081223016, "loss": 0.0, "step": 64750 }, { "epoch": 0.8854796918049375, "grad_norm": NaN, "learning_rate": 0.00034142807505383756, "loss": 0.0, "step": 64760 }, { "epoch": 0.8856164243083045, "grad_norm": NaN, "learning_rate": 0.00034137254408849057, "loss": 0.0, "step": 64770 }, { "epoch": 0.8857531568116714, "grad_norm": NaN, "learning_rate": 0.0003413170079193517, "loss": 0.0, "step": 64780 }, { "epoch": 0.8858898893150385, "grad_norm": NaN, "learning_rate": 0.0003412614665495837, "loss": 0.0, "step": 64790 }, { "epoch": 0.8860266218184055, "grad_norm": NaN, "learning_rate": 0.00034120591998235, "loss": 0.0, "step": 64800 }, { "epoch": 0.8861633543217726, "grad_norm": NaN, "learning_rate": 0.0003411503682208138, "loss": 0.0, "step": 64810 }, { "epoch": 0.8863000868251396, "grad_norm": NaN, "learning_rate": 0.00034109481126813894, "loss": 0.0, "step": 64820 }, { "epoch": 0.8864368193285067, "grad_norm": NaN, "learning_rate": 0.00034103924912748954, "loss": 0.0, "step": 64830 }, { "epoch": 0.8865735518318737, "grad_norm": NaN, "learning_rate": 0.00034098368180202976, "loss": 0.0, "step": 64840 }, { "epoch": 0.8867102843352408, "grad_norm": NaN, "learning_rate": 0.00034092810929492446, "loss": 0.0, "step": 64850 }, { "epoch": 0.8868470168386078, "grad_norm": NaN, "learning_rate": 0.0003408725316093384, "loss": 0.0, "step": 64860 }, { "epoch": 0.8869837493419749, "grad_norm": NaN, "learning_rate": 0.0003408169487484368, "loss": 0.0, "step": 64870 }, { "epoch": 0.8871204818453419, "grad_norm": NaN, "learning_rate": 0.0003407613607153853, "loss": 0.0, "step": 64880 }, { "epoch": 0.8872572143487089, "grad_norm": NaN, "learning_rate": 0.0003407057675133497, "loss": 0.0, "step": 64890 }, { "epoch": 0.887393946852076, "grad_norm": NaN, "learning_rate": 0.00034065016914549596, "loss": 0.0, "step": 64900 }, { "epoch": 0.8875306793554429, "grad_norm": NaN, "learning_rate": 0.0003405945656149907, "loss": 0.0, "step": 64910 }, { "epoch": 0.88766741185881, "grad_norm": NaN, "learning_rate": 0.0003405389569250004, "loss": 0.0, "step": 64920 }, { "epoch": 0.887804144362177, "grad_norm": NaN, "learning_rate": 0.00034048334307869223, "loss": 0.0, "step": 64930 }, { "epoch": 0.8879408768655441, "grad_norm": NaN, "learning_rate": 0.0003404277240792334, "loss": 0.0, "step": 64940 }, { "epoch": 0.8880776093689111, "grad_norm": NaN, "learning_rate": 0.00034037209992979147, "loss": 0.0, "step": 64950 }, { "epoch": 0.8882143418722782, "grad_norm": NaN, "learning_rate": 0.0003403164706335343, "loss": 0.0, "step": 64960 }, { "epoch": 0.8883510743756452, "grad_norm": NaN, "learning_rate": 0.0003402608361936301, "loss": 0.0, "step": 64970 }, { "epoch": 0.8884878068790123, "grad_norm": NaN, "learning_rate": 0.0003402051966132474, "loss": 0.0, "step": 64980 }, { "epoch": 0.8886245393823793, "grad_norm": NaN, "learning_rate": 0.0003401495518955548, "loss": 0.0, "step": 64990 }, { "epoch": 0.8887612718857463, "grad_norm": NaN, "learning_rate": 0.00034009390204372137, "loss": 0.0, "step": 65000 }, { "epoch": 0.8888980043891134, "grad_norm": NaN, "learning_rate": 0.00034003824706091656, "loss": 0.0, "step": 65010 }, { "epoch": 0.8890347368924804, "grad_norm": NaN, "learning_rate": 0.0003399825869503099, "loss": 0.0, "step": 65020 }, { "epoch": 0.8891714693958475, "grad_norm": NaN, "learning_rate": 0.0003399269217150713, "loss": 0.0, "step": 65030 }, { "epoch": 0.8893082018992144, "grad_norm": NaN, "learning_rate": 0.00033987125135837097, "loss": 0.0, "step": 65040 }, { "epoch": 0.8894449344025815, "grad_norm": NaN, "learning_rate": 0.00033981557588337954, "loss": 0.0, "step": 65050 }, { "epoch": 0.8895816669059485, "grad_norm": NaN, "learning_rate": 0.00033975989529326766, "loss": 0.0, "step": 65060 }, { "epoch": 0.8897183994093156, "grad_norm": NaN, "learning_rate": 0.0003397042095912065, "loss": 0.0, "step": 65070 }, { "epoch": 0.8898551319126826, "grad_norm": NaN, "learning_rate": 0.00033964851878036736, "loss": 0.0, "step": 65080 }, { "epoch": 0.8899918644160497, "grad_norm": NaN, "learning_rate": 0.000339592822863922, "loss": 0.0, "step": 65090 }, { "epoch": 0.8901285969194167, "grad_norm": NaN, "learning_rate": 0.00033953712184504225, "loss": 0.0, "step": 65100 }, { "epoch": 0.8902653294227837, "grad_norm": NaN, "learning_rate": 0.0003394814157269005, "loss": 0.0, "step": 65110 }, { "epoch": 0.8904020619261508, "grad_norm": NaN, "learning_rate": 0.0003394257045126693, "loss": 0.0, "step": 65120 }, { "epoch": 0.8905387944295178, "grad_norm": NaN, "learning_rate": 0.0003393699882055213, "loss": 0.0, "step": 65130 }, { "epoch": 0.8906755269328849, "grad_norm": NaN, "learning_rate": 0.0003393142668086298, "loss": 0.0, "step": 65140 }, { "epoch": 0.8908122594362519, "grad_norm": NaN, "learning_rate": 0.00033925854032516807, "loss": 0.0, "step": 65150 }, { "epoch": 0.890948991939619, "grad_norm": NaN, "learning_rate": 0.0003392028087583099, "loss": 0.0, "step": 65160 }, { "epoch": 0.891085724442986, "grad_norm": NaN, "learning_rate": 0.0003391470721112293, "loss": 0.0, "step": 65170 }, { "epoch": 0.891222456946353, "grad_norm": NaN, "learning_rate": 0.0003390913303871005, "loss": 0.0, "step": 65180 }, { "epoch": 0.89135918944972, "grad_norm": NaN, "learning_rate": 0.000339035583589098, "loss": 0.0, "step": 65190 }, { "epoch": 0.891495921953087, "grad_norm": NaN, "learning_rate": 0.00033897983172039687, "loss": 0.0, "step": 65200 }, { "epoch": 0.8916326544564541, "grad_norm": NaN, "learning_rate": 0.00033892407478417196, "loss": 0.0, "step": 65210 }, { "epoch": 0.8917693869598211, "grad_norm": NaN, "learning_rate": 0.000338868312783599, "loss": 0.0, "step": 65220 }, { "epoch": 0.8919061194631882, "grad_norm": NaN, "learning_rate": 0.0003388125457218535, "loss": 0.0, "step": 65230 }, { "epoch": 0.8920428519665552, "grad_norm": NaN, "learning_rate": 0.0003387567736021116, "loss": 0.0, "step": 65240 }, { "epoch": 0.8921795844699223, "grad_norm": NaN, "learning_rate": 0.0003387009964275495, "loss": 0.0, "step": 65250 }, { "epoch": 0.8923163169732893, "grad_norm": NaN, "learning_rate": 0.0003386452142013439, "loss": 0.0, "step": 65260 }, { "epoch": 0.8924530494766564, "grad_norm": NaN, "learning_rate": 0.00033858942692667157, "loss": 0.0, "step": 65270 }, { "epoch": 0.8925897819800234, "grad_norm": NaN, "learning_rate": 0.0003385336346067097, "loss": 0.0, "step": 65280 }, { "epoch": 0.8927265144833905, "grad_norm": NaN, "learning_rate": 0.0003384778372446357, "loss": 0.0, "step": 65290 }, { "epoch": 0.8928632469867575, "grad_norm": NaN, "learning_rate": 0.00033842203484362745, "loss": 0.0, "step": 65300 }, { "epoch": 0.8929999794901244, "grad_norm": NaN, "learning_rate": 0.00033836622740686276, "loss": 0.0, "step": 65310 }, { "epoch": 0.8931367119934915, "grad_norm": NaN, "learning_rate": 0.00033831041493752014, "loss": 0.0, "step": 65320 }, { "epoch": 0.8932734444968585, "grad_norm": NaN, "learning_rate": 0.00033825459743877805, "loss": 0.0, "step": 65330 }, { "epoch": 0.8934101770002256, "grad_norm": NaN, "learning_rate": 0.00033819877491381546, "loss": 0.0, "step": 65340 }, { "epoch": 0.8935469095035926, "grad_norm": NaN, "learning_rate": 0.00033814294736581155, "loss": 0.0, "step": 65350 }, { "epoch": 0.8936836420069597, "grad_norm": NaN, "learning_rate": 0.00033808711479794566, "loss": 0.0, "step": 65360 }, { "epoch": 0.8938203745103267, "grad_norm": NaN, "learning_rate": 0.00033803127721339767, "loss": 0.0, "step": 65370 }, { "epoch": 0.8939571070136938, "grad_norm": NaN, "learning_rate": 0.00033797543461534747, "loss": 0.0, "step": 65380 }, { "epoch": 0.8940938395170608, "grad_norm": NaN, "learning_rate": 0.00033791958700697547, "loss": 0.0, "step": 65390 }, { "epoch": 0.8942305720204279, "grad_norm": NaN, "learning_rate": 0.0003378637343914623, "loss": 0.0, "step": 65400 }, { "epoch": 0.8943673045237949, "grad_norm": NaN, "learning_rate": 0.0003378078767719887, "loss": 0.0, "step": 65410 }, { "epoch": 0.8945040370271619, "grad_norm": NaN, "learning_rate": 0.00033775201415173596, "loss": 0.0, "step": 65420 }, { "epoch": 0.894640769530529, "grad_norm": NaN, "learning_rate": 0.00033769614653388546, "loss": 0.0, "step": 65430 }, { "epoch": 0.8947775020338959, "grad_norm": NaN, "learning_rate": 0.000337640273921619, "loss": 0.0, "step": 65440 }, { "epoch": 0.894914234537263, "grad_norm": NaN, "learning_rate": 0.00033758439631811855, "loss": 0.0, "step": 65450 }, { "epoch": 0.89505096704063, "grad_norm": NaN, "learning_rate": 0.0003375285137265665, "loss": 0.0, "step": 65460 }, { "epoch": 0.8951876995439971, "grad_norm": NaN, "learning_rate": 0.00033747262615014536, "loss": 0.0, "step": 65470 }, { "epoch": 0.8953244320473641, "grad_norm": NaN, "learning_rate": 0.0003374167335920381, "loss": 0.0, "step": 65480 }, { "epoch": 0.8954611645507312, "grad_norm": NaN, "learning_rate": 0.00033736083605542767, "loss": 0.0, "step": 65490 }, { "epoch": 0.8955978970540982, "grad_norm": NaN, "learning_rate": 0.0003373049335434977, "loss": 0.0, "step": 65500 }, { "epoch": 0.8957346295574653, "grad_norm": NaN, "learning_rate": 0.00033724902605943187, "loss": 0.0, "step": 65510 }, { "epoch": 0.8958713620608323, "grad_norm": NaN, "learning_rate": 0.00033719311360641416, "loss": 0.0, "step": 65520 }, { "epoch": 0.8960080945641993, "grad_norm": NaN, "learning_rate": 0.0003371371961876289, "loss": 0.0, "step": 65530 }, { "epoch": 0.8961448270675664, "grad_norm": NaN, "learning_rate": 0.00033708127380626063, "loss": 0.0, "step": 65540 }, { "epoch": 0.8962815595709334, "grad_norm": NaN, "learning_rate": 0.0003370253464654942, "loss": 0.0, "step": 65550 }, { "epoch": 0.8964182920743005, "grad_norm": NaN, "learning_rate": 0.0003369694141685148, "loss": 0.0, "step": 65560 }, { "epoch": 0.8965550245776674, "grad_norm": NaN, "learning_rate": 0.0003369134769185078, "loss": 0.0, "step": 65570 }, { "epoch": 0.8966917570810345, "grad_norm": NaN, "learning_rate": 0.00033685753471865887, "loss": 0.0, "step": 65580 }, { "epoch": 0.8968284895844015, "grad_norm": NaN, "learning_rate": 0.0003368015875721542, "loss": 0.0, "step": 65590 }, { "epoch": 0.8969652220877686, "grad_norm": NaN, "learning_rate": 0.0003367456354821798, "loss": 0.0, "step": 65600 }, { "epoch": 0.8971019545911356, "grad_norm": NaN, "learning_rate": 0.00033668967845192234, "loss": 0.0, "step": 65610 }, { "epoch": 0.8972386870945027, "grad_norm": NaN, "learning_rate": 0.00033663371648456864, "loss": 0.0, "step": 65620 }, { "epoch": 0.8973754195978697, "grad_norm": NaN, "learning_rate": 0.0003365777495833057, "loss": 0.0, "step": 65630 }, { "epoch": 0.8975121521012367, "grad_norm": NaN, "learning_rate": 0.0003365217777513212, "loss": 0.0, "step": 65640 }, { "epoch": 0.8976488846046038, "grad_norm": NaN, "learning_rate": 0.0003364658009918025, "loss": 0.0, "step": 65650 }, { "epoch": 0.8977856171079708, "grad_norm": NaN, "learning_rate": 0.00033640981930793776, "loss": 0.0, "step": 65660 }, { "epoch": 0.8979223496113379, "grad_norm": NaN, "learning_rate": 0.0003363538327029151, "loss": 0.0, "step": 65670 }, { "epoch": 0.8980590821147049, "grad_norm": NaN, "learning_rate": 0.000336297841179923, "loss": 0.0, "step": 65680 }, { "epoch": 0.898195814618072, "grad_norm": NaN, "learning_rate": 0.0003362418447421504, "loss": 0.0, "step": 65690 }, { "epoch": 0.898332547121439, "grad_norm": NaN, "learning_rate": 0.00033618584339278635, "loss": 0.0, "step": 65700 }, { "epoch": 0.898469279624806, "grad_norm": NaN, "learning_rate": 0.00033612983713502006, "loss": 0.0, "step": 65710 }, { "epoch": 0.898606012128173, "grad_norm": NaN, "learning_rate": 0.00033607382597204124, "loss": 0.0, "step": 65720 }, { "epoch": 0.8987427446315401, "grad_norm": NaN, "learning_rate": 0.0003360178099070399, "loss": 0.0, "step": 65730 }, { "epoch": 0.8988794771349071, "grad_norm": NaN, "learning_rate": 0.0003359617889432061, "loss": 0.0, "step": 65740 }, { "epoch": 0.8990162096382741, "grad_norm": NaN, "learning_rate": 0.0003359057630837303, "loss": 0.0, "step": 65750 }, { "epoch": 0.8991529421416412, "grad_norm": NaN, "learning_rate": 0.00033584973233180337, "loss": 0.0, "step": 65760 }, { "epoch": 0.8992896746450082, "grad_norm": NaN, "learning_rate": 0.0003357936966906163, "loss": 0.0, "step": 65770 }, { "epoch": 0.8994264071483753, "grad_norm": NaN, "learning_rate": 0.0003357376561633604, "loss": 0.0, "step": 65780 }, { "epoch": 0.8995631396517423, "grad_norm": NaN, "learning_rate": 0.0003356816107532272, "loss": 0.0, "step": 65790 }, { "epoch": 0.8996998721551094, "grad_norm": NaN, "learning_rate": 0.0003356255604634086, "loss": 0.0, "step": 65800 }, { "epoch": 0.8998366046584764, "grad_norm": NaN, "learning_rate": 0.00033556950529709667, "loss": 0.0, "step": 65810 }, { "epoch": 0.8999733371618435, "grad_norm": NaN, "learning_rate": 0.00033551344525748394, "loss": 0.0, "step": 65820 }, { "epoch": 0.9001100696652105, "grad_norm": NaN, "learning_rate": 0.00033545738034776316, "loss": 0.0, "step": 65830 }, { "epoch": 0.9002468021685776, "grad_norm": NaN, "learning_rate": 0.000335401310571127, "loss": 0.0, "step": 65840 }, { "epoch": 0.9003835346719445, "grad_norm": NaN, "learning_rate": 0.00033534523593076903, "loss": 0.0, "step": 65850 }, { "epoch": 0.9005202671753115, "grad_norm": NaN, "learning_rate": 0.0003352891564298827, "loss": 0.0, "step": 65860 }, { "epoch": 0.9006569996786786, "grad_norm": NaN, "learning_rate": 0.0003352330720716617, "loss": 0.0, "step": 65870 }, { "epoch": 0.9007937321820456, "grad_norm": NaN, "learning_rate": 0.00033517698285930015, "loss": 0.0, "step": 65880 }, { "epoch": 0.9009304646854127, "grad_norm": NaN, "learning_rate": 0.00033512088879599254, "loss": 0.0, "step": 65890 }, { "epoch": 0.9010671971887797, "grad_norm": NaN, "learning_rate": 0.00033506478988493333, "loss": 0.0, "step": 65900 }, { "epoch": 0.9012039296921468, "grad_norm": NaN, "learning_rate": 0.00033500868612931756, "loss": 0.0, "step": 65910 }, { "epoch": 0.9013406621955138, "grad_norm": NaN, "learning_rate": 0.0003349525775323403, "loss": 0.0, "step": 65920 }, { "epoch": 0.9014773946988809, "grad_norm": NaN, "learning_rate": 0.0003348964640971972, "loss": 0.0, "step": 65930 }, { "epoch": 0.9016141272022479, "grad_norm": NaN, "learning_rate": 0.0003348403458270838, "loss": 0.0, "step": 65940 }, { "epoch": 0.901750859705615, "grad_norm": NaN, "learning_rate": 0.00033478422272519617, "loss": 0.0, "step": 65950 }, { "epoch": 0.901887592208982, "grad_norm": NaN, "learning_rate": 0.00033472809479473067, "loss": 0.0, "step": 65960 }, { "epoch": 0.902024324712349, "grad_norm": NaN, "learning_rate": 0.0003346719620388837, "loss": 0.0, "step": 65970 }, { "epoch": 0.902161057215716, "grad_norm": NaN, "learning_rate": 0.0003346158244608523, "loss": 0.0, "step": 65980 }, { "epoch": 0.902297789719083, "grad_norm": NaN, "learning_rate": 0.0003345596820638335, "loss": 0.0, "step": 65990 }, { "epoch": 0.9024345222224501, "grad_norm": NaN, "learning_rate": 0.0003345035348510245, "loss": 0.0, "step": 66000 }, { "epoch": 0.9025712547258171, "grad_norm": NaN, "learning_rate": 0.00033444738282562334, "loss": 0.0, "step": 66010 }, { "epoch": 0.9027079872291842, "grad_norm": NaN, "learning_rate": 0.0003343912259908277, "loss": 0.0, "step": 66020 }, { "epoch": 0.9028447197325512, "grad_norm": NaN, "learning_rate": 0.00033433506434983574, "loss": 0.0, "step": 66030 }, { "epoch": 0.9029814522359183, "grad_norm": NaN, "learning_rate": 0.00033427889790584617, "loss": 0.0, "step": 66040 }, { "epoch": 0.9031181847392853, "grad_norm": NaN, "learning_rate": 0.00033422272666205747, "loss": 0.0, "step": 66050 }, { "epoch": 0.9032549172426524, "grad_norm": NaN, "learning_rate": 0.00033416655062166897, "loss": 0.0, "step": 66060 }, { "epoch": 0.9033916497460194, "grad_norm": NaN, "learning_rate": 0.0003341103697878797, "loss": 0.0, "step": 66070 }, { "epoch": 0.9035283822493864, "grad_norm": NaN, "learning_rate": 0.00033405418416388946, "loss": 0.0, "step": 66080 }, { "epoch": 0.9036651147527535, "grad_norm": NaN, "learning_rate": 0.0003339979937528979, "loss": 0.0, "step": 66090 }, { "epoch": 0.9038018472561204, "grad_norm": NaN, "learning_rate": 0.00033394179855810525, "loss": 0.0, "step": 66100 }, { "epoch": 0.9039385797594875, "grad_norm": NaN, "learning_rate": 0.00033388559858271195, "loss": 0.0, "step": 66110 }, { "epoch": 0.9040753122628545, "grad_norm": NaN, "learning_rate": 0.00033382939382991855, "loss": 0.0, "step": 66120 }, { "epoch": 0.9042120447662216, "grad_norm": NaN, "learning_rate": 0.000333773184302926, "loss": 0.0, "step": 66130 }, { "epoch": 0.9043487772695886, "grad_norm": NaN, "learning_rate": 0.00033371697000493555, "loss": 0.0, "step": 66140 }, { "epoch": 0.9044855097729557, "grad_norm": NaN, "learning_rate": 0.00033366075093914875, "loss": 0.0, "step": 66150 }, { "epoch": 0.9046222422763227, "grad_norm": NaN, "learning_rate": 0.00033360452710876726, "loss": 0.0, "step": 66160 }, { "epoch": 0.9047589747796898, "grad_norm": NaN, "learning_rate": 0.00033354829851699316, "loss": 0.0, "step": 66170 }, { "epoch": 0.9048957072830568, "grad_norm": NaN, "learning_rate": 0.00033349206516702863, "loss": 0.0, "step": 66180 }, { "epoch": 0.9050324397864238, "grad_norm": NaN, "learning_rate": 0.0003334358270620764, "loss": 0.0, "step": 66190 }, { "epoch": 0.9051691722897909, "grad_norm": NaN, "learning_rate": 0.0003333795842053392, "loss": 0.0, "step": 66200 }, { "epoch": 0.9053059047931579, "grad_norm": NaN, "learning_rate": 0.00033332333660002016, "loss": 0.0, "step": 66210 }, { "epoch": 0.905442637296525, "grad_norm": NaN, "learning_rate": 0.00033326708424932273, "loss": 0.0, "step": 66220 }, { "epoch": 0.905579369799892, "grad_norm": NaN, "learning_rate": 0.0003332108271564504, "loss": 0.0, "step": 66230 }, { "epoch": 0.905716102303259, "grad_norm": NaN, "learning_rate": 0.0003331545653246072, "loss": 0.0, "step": 66240 }, { "epoch": 0.905852834806626, "grad_norm": NaN, "learning_rate": 0.0003330982987569974, "loss": 0.0, "step": 66250 }, { "epoch": 0.9059895673099931, "grad_norm": NaN, "learning_rate": 0.0003330420274568252, "loss": 0.0, "step": 66260 }, { "epoch": 0.9061262998133601, "grad_norm": NaN, "learning_rate": 0.0003329857514272956, "loss": 0.0, "step": 66270 }, { "epoch": 0.9062630323167272, "grad_norm": NaN, "learning_rate": 0.00033292947067161343, "loss": 0.0, "step": 66280 }, { "epoch": 0.9063997648200942, "grad_norm": NaN, "learning_rate": 0.00033287318519298396, "loss": 0.0, "step": 66290 }, { "epoch": 0.9065364973234612, "grad_norm": NaN, "learning_rate": 0.00033281689499461286, "loss": 0.0, "step": 66300 }, { "epoch": 0.9066732298268283, "grad_norm": NaN, "learning_rate": 0.0003327606000797059, "loss": 0.0, "step": 66310 }, { "epoch": 0.9068099623301953, "grad_norm": NaN, "learning_rate": 0.00033270430045146906, "loss": 0.0, "step": 66320 }, { "epoch": 0.9069466948335624, "grad_norm": NaN, "learning_rate": 0.0003326479961131087, "loss": 0.0, "step": 66330 }, { "epoch": 0.9070834273369294, "grad_norm": NaN, "learning_rate": 0.00033259168706783137, "loss": 0.0, "step": 66340 }, { "epoch": 0.9072201598402965, "grad_norm": NaN, "learning_rate": 0.00033253537331884417, "loss": 0.0, "step": 66350 }, { "epoch": 0.9073568923436635, "grad_norm": NaN, "learning_rate": 0.000332479054869354, "loss": 0.0, "step": 66360 }, { "epoch": 0.9074936248470306, "grad_norm": NaN, "learning_rate": 0.00033242273172256847, "loss": 0.0, "step": 66370 }, { "epoch": 0.9076303573503975, "grad_norm": NaN, "learning_rate": 0.00033236640388169513, "loss": 0.0, "step": 66380 }, { "epoch": 0.9077670898537646, "grad_norm": NaN, "learning_rate": 0.00033231007134994195, "loss": 0.0, "step": 66390 }, { "epoch": 0.9079038223571316, "grad_norm": NaN, "learning_rate": 0.0003322537341305172, "loss": 0.0, "step": 66400 }, { "epoch": 0.9080405548604986, "grad_norm": NaN, "learning_rate": 0.00033219739222662927, "loss": 0.0, "step": 66410 }, { "epoch": 0.9081772873638657, "grad_norm": NaN, "learning_rate": 0.00033214104564148694, "loss": 0.0, "step": 66420 }, { "epoch": 0.9083140198672327, "grad_norm": NaN, "learning_rate": 0.00033208469437829934, "loss": 0.0, "step": 66430 }, { "epoch": 0.9084507523705998, "grad_norm": NaN, "learning_rate": 0.00033202833844027564, "loss": 0.0, "step": 66440 }, { "epoch": 0.9085874848739668, "grad_norm": NaN, "learning_rate": 0.00033197197783062536, "loss": 0.0, "step": 66450 }, { "epoch": 0.9087242173773339, "grad_norm": NaN, "learning_rate": 0.00033191561255255844, "loss": 0.0, "step": 66460 }, { "epoch": 0.9088609498807009, "grad_norm": NaN, "learning_rate": 0.0003318592426092848, "loss": 0.0, "step": 66470 }, { "epoch": 0.908997682384068, "grad_norm": NaN, "learning_rate": 0.00033180286800401493, "loss": 0.0, "step": 66480 }, { "epoch": 0.909134414887435, "grad_norm": NaN, "learning_rate": 0.00033174648873995937, "loss": 0.0, "step": 66490 }, { "epoch": 0.9092711473908021, "grad_norm": NaN, "learning_rate": 0.00033169010482032895, "loss": 0.0, "step": 66500 }, { "epoch": 0.909407879894169, "grad_norm": NaN, "learning_rate": 0.00033163371624833496, "loss": 0.0, "step": 66510 }, { "epoch": 0.909544612397536, "grad_norm": NaN, "learning_rate": 0.0003315773230271886, "loss": 0.0, "step": 66520 }, { "epoch": 0.9096813449009031, "grad_norm": NaN, "learning_rate": 0.00033152092516010176, "loss": 0.0, "step": 66530 }, { "epoch": 0.9098180774042701, "grad_norm": NaN, "learning_rate": 0.0003314645226502862, "loss": 0.0, "step": 66540 }, { "epoch": 0.9099548099076372, "grad_norm": NaN, "learning_rate": 0.0003314081155009542, "loss": 0.0, "step": 66550 }, { "epoch": 0.9100915424110042, "grad_norm": NaN, "learning_rate": 0.00033135170371531824, "loss": 0.0, "step": 66560 }, { "epoch": 0.9102282749143713, "grad_norm": NaN, "learning_rate": 0.00033129528729659104, "loss": 0.0, "step": 66570 }, { "epoch": 0.9103650074177383, "grad_norm": NaN, "learning_rate": 0.00033123886624798554, "loss": 0.0, "step": 66580 }, { "epoch": 0.9105017399211054, "grad_norm": NaN, "learning_rate": 0.0003311824405727151, "loss": 0.0, "step": 66590 }, { "epoch": 0.9106384724244724, "grad_norm": NaN, "learning_rate": 0.00033112601027399314, "loss": 0.0, "step": 66600 }, { "epoch": 0.9107752049278395, "grad_norm": NaN, "learning_rate": 0.0003310695753550334, "loss": 0.0, "step": 66610 }, { "epoch": 0.9109119374312065, "grad_norm": NaN, "learning_rate": 0.00033101313581905014, "loss": 0.0, "step": 66620 }, { "epoch": 0.9110486699345735, "grad_norm": NaN, "learning_rate": 0.0003309566916692574, "loss": 0.0, "step": 66630 }, { "epoch": 0.9111854024379406, "grad_norm": NaN, "learning_rate": 0.00033090024290886996, "loss": 0.0, "step": 66640 }, { "epoch": 0.9113221349413075, "grad_norm": NaN, "learning_rate": 0.00033084378954110257, "loss": 0.0, "step": 66650 }, { "epoch": 0.9114588674446746, "grad_norm": NaN, "learning_rate": 0.0003307873315691703, "loss": 0.0, "step": 66660 }, { "epoch": 0.9115955999480416, "grad_norm": NaN, "learning_rate": 0.00033073086899628856, "loss": 0.0, "step": 66670 }, { "epoch": 0.9117323324514087, "grad_norm": NaN, "learning_rate": 0.000330674401825673, "loss": 0.0, "step": 66680 }, { "epoch": 0.9118690649547757, "grad_norm": NaN, "learning_rate": 0.0003306179300605394, "loss": 0.0, "step": 66690 }, { "epoch": 0.9120057974581428, "grad_norm": NaN, "learning_rate": 0.00033056145370410396, "loss": 0.0, "step": 66700 }, { "epoch": 0.9121425299615098, "grad_norm": NaN, "learning_rate": 0.0003305049727595831, "loss": 0.0, "step": 66710 }, { "epoch": 0.9122792624648768, "grad_norm": NaN, "learning_rate": 0.0003304484872301935, "loss": 0.0, "step": 66720 }, { "epoch": 0.9124159949682439, "grad_norm": NaN, "learning_rate": 0.0003303919971191521, "loss": 0.0, "step": 66730 }, { "epoch": 0.9125527274716109, "grad_norm": NaN, "learning_rate": 0.00033033550242967595, "loss": 0.0, "step": 66740 }, { "epoch": 0.912689459974978, "grad_norm": NaN, "learning_rate": 0.0003302790031649827, "loss": 0.0, "step": 66750 }, { "epoch": 0.912826192478345, "grad_norm": NaN, "learning_rate": 0.0003302224993282899, "loss": 0.0, "step": 66760 }, { "epoch": 0.912962924981712, "grad_norm": NaN, "learning_rate": 0.00033016599092281564, "loss": 0.0, "step": 66770 }, { "epoch": 0.913099657485079, "grad_norm": NaN, "learning_rate": 0.00033010947795177807, "loss": 0.0, "step": 66780 }, { "epoch": 0.9132363899884461, "grad_norm": NaN, "learning_rate": 0.0003300529604183957, "loss": 0.0, "step": 66790 }, { "epoch": 0.9133731224918131, "grad_norm": NaN, "learning_rate": 0.00032999643832588735, "loss": 0.0, "step": 66800 }, { "epoch": 0.9135098549951802, "grad_norm": NaN, "learning_rate": 0.00032993991167747186, "loss": 0.0, "step": 66810 }, { "epoch": 0.9136465874985472, "grad_norm": NaN, "learning_rate": 0.0003298833804763686, "loss": 0.0, "step": 66820 }, { "epoch": 0.9137833200019142, "grad_norm": NaN, "learning_rate": 0.0003298268447257972, "loss": 0.0, "step": 66830 }, { "epoch": 0.9139200525052813, "grad_norm": NaN, "learning_rate": 0.00032977030442897737, "loss": 0.0, "step": 66840 }, { "epoch": 0.9140567850086483, "grad_norm": NaN, "learning_rate": 0.0003297137595891291, "loss": 0.0, "step": 66850 }, { "epoch": 0.9141935175120154, "grad_norm": NaN, "learning_rate": 0.00032965721020947284, "loss": 0.0, "step": 66860 }, { "epoch": 0.9143302500153824, "grad_norm": NaN, "learning_rate": 0.0003296006562932289, "loss": 0.0, "step": 66870 }, { "epoch": 0.9144669825187495, "grad_norm": NaN, "learning_rate": 0.0003295440978436184, "loss": 0.0, "step": 66880 }, { "epoch": 0.9146037150221165, "grad_norm": NaN, "learning_rate": 0.0003294875348638622, "loss": 0.0, "step": 66890 }, { "epoch": 0.9147404475254836, "grad_norm": NaN, "learning_rate": 0.0003294309673571817, "loss": 0.0, "step": 66900 }, { "epoch": 0.9148771800288505, "grad_norm": NaN, "learning_rate": 0.0003293743953267986, "loss": 0.0, "step": 66910 }, { "epoch": 0.9150139125322176, "grad_norm": NaN, "learning_rate": 0.00032931781877593456, "loss": 0.0, "step": 66920 }, { "epoch": 0.9151506450355846, "grad_norm": NaN, "learning_rate": 0.0003292612377078119, "loss": 0.0, "step": 66930 }, { "epoch": 0.9152873775389516, "grad_norm": NaN, "learning_rate": 0.00032920465212565294, "loss": 0.0, "step": 66940 }, { "epoch": 0.9154241100423187, "grad_norm": NaN, "learning_rate": 0.0003291480620326802, "loss": 0.0, "step": 66950 }, { "epoch": 0.9155608425456857, "grad_norm": NaN, "learning_rate": 0.00032909146743211656, "loss": 0.0, "step": 66960 }, { "epoch": 0.9156975750490528, "grad_norm": NaN, "learning_rate": 0.0003290348683271853, "loss": 0.0, "step": 66970 }, { "epoch": 0.9158343075524198, "grad_norm": NaN, "learning_rate": 0.0003289782647211097, "loss": 0.0, "step": 66980 }, { "epoch": 0.9159710400557869, "grad_norm": NaN, "learning_rate": 0.00032892165661711347, "loss": 0.0, "step": 66990 }, { "epoch": 0.9161077725591539, "grad_norm": NaN, "learning_rate": 0.00032886504401842054, "loss": 0.0, "step": 67000 }, { "epoch": 0.916244505062521, "grad_norm": NaN, "learning_rate": 0.000328808426928255, "loss": 0.0, "step": 67010 }, { "epoch": 0.916381237565888, "grad_norm": NaN, "learning_rate": 0.0003287518053498414, "loss": 0.0, "step": 67020 }, { "epoch": 0.9165179700692551, "grad_norm": NaN, "learning_rate": 0.00032869517928640424, "loss": 0.0, "step": 67030 }, { "epoch": 0.916654702572622, "grad_norm": NaN, "learning_rate": 0.0003286385487411686, "loss": 0.0, "step": 67040 }, { "epoch": 0.916791435075989, "grad_norm": NaN, "learning_rate": 0.0003285819137173595, "loss": 0.0, "step": 67050 }, { "epoch": 0.9169281675793561, "grad_norm": NaN, "learning_rate": 0.00032852527421820254, "loss": 0.0, "step": 67060 }, { "epoch": 0.9170649000827231, "grad_norm": NaN, "learning_rate": 0.00032846863024692344, "loss": 0.0, "step": 67070 }, { "epoch": 0.9172016325860902, "grad_norm": NaN, "learning_rate": 0.000328411981806748, "loss": 0.0, "step": 67080 }, { "epoch": 0.9173383650894572, "grad_norm": NaN, "learning_rate": 0.00032835532890090245, "loss": 0.0, "step": 67090 }, { "epoch": 0.9174750975928243, "grad_norm": NaN, "learning_rate": 0.00032829867153261346, "loss": 0.0, "step": 67100 }, { "epoch": 0.9176118300961913, "grad_norm": NaN, "learning_rate": 0.0003282420097051075, "loss": 0.0, "step": 67110 }, { "epoch": 0.9177485625995584, "grad_norm": NaN, "learning_rate": 0.00032818534342161167, "loss": 0.0, "step": 67120 }, { "epoch": 0.9178852951029254, "grad_norm": NaN, "learning_rate": 0.00032812867268535307, "loss": 0.0, "step": 67130 }, { "epoch": 0.9180220276062925, "grad_norm": NaN, "learning_rate": 0.00032807199749955934, "loss": 0.0, "step": 67140 }, { "epoch": 0.9181587601096595, "grad_norm": NaN, "learning_rate": 0.0003280153178674581, "loss": 0.0, "step": 67150 }, { "epoch": 0.9182954926130265, "grad_norm": NaN, "learning_rate": 0.0003279586337922774, "loss": 0.0, "step": 67160 }, { "epoch": 0.9184322251163936, "grad_norm": NaN, "learning_rate": 0.0003279019452772454, "loss": 0.0, "step": 67170 }, { "epoch": 0.9185689576197605, "grad_norm": NaN, "learning_rate": 0.0003278452523255906, "loss": 0.0, "step": 67180 }, { "epoch": 0.9187056901231276, "grad_norm": NaN, "learning_rate": 0.00032778855494054184, "loss": 0.0, "step": 67190 }, { "epoch": 0.9188424226264946, "grad_norm": NaN, "learning_rate": 0.000327731853125328, "loss": 0.0, "step": 67200 }, { "epoch": 0.9189791551298617, "grad_norm": NaN, "learning_rate": 0.00032767514688317835, "loss": 0.0, "step": 67210 }, { "epoch": 0.9191158876332287, "grad_norm": NaN, "learning_rate": 0.00032761843621732247, "loss": 0.0, "step": 67220 }, { "epoch": 0.9192526201365958, "grad_norm": NaN, "learning_rate": 0.0003275617211309899, "loss": 0.0, "step": 67230 }, { "epoch": 0.9193893526399628, "grad_norm": NaN, "learning_rate": 0.0003275050016274109, "loss": 0.0, "step": 67240 }, { "epoch": 0.9195260851433299, "grad_norm": NaN, "learning_rate": 0.00032744827770981563, "loss": 0.0, "step": 67250 }, { "epoch": 0.9196628176466969, "grad_norm": NaN, "learning_rate": 0.0003273915493814345, "loss": 0.0, "step": 67260 }, { "epoch": 0.9197995501500639, "grad_norm": NaN, "learning_rate": 0.00032733481664549844, "loss": 0.0, "step": 67270 }, { "epoch": 0.919936282653431, "grad_norm": NaN, "learning_rate": 0.0003272780795052383, "loss": 0.0, "step": 67280 }, { "epoch": 0.920073015156798, "grad_norm": NaN, "learning_rate": 0.0003272213379638854, "loss": 0.0, "step": 67290 }, { "epoch": 0.9202097476601651, "grad_norm": NaN, "learning_rate": 0.00032716459202467127, "loss": 0.0, "step": 67300 }, { "epoch": 0.920346480163532, "grad_norm": NaN, "learning_rate": 0.00032710784169082764, "loss": 0.0, "step": 67310 }, { "epoch": 0.9204832126668991, "grad_norm": NaN, "learning_rate": 0.0003270510869655865, "loss": 0.0, "step": 67320 }, { "epoch": 0.9206199451702661, "grad_norm": NaN, "learning_rate": 0.0003269943278521802, "loss": 0.0, "step": 67330 }, { "epoch": 0.9207566776736332, "grad_norm": NaN, "learning_rate": 0.0003269375643538411, "loss": 0.0, "step": 67340 }, { "epoch": 0.9208934101770002, "grad_norm": NaN, "learning_rate": 0.0003268807964738021, "loss": 0.0, "step": 67350 }, { "epoch": 0.9210301426803673, "grad_norm": NaN, "learning_rate": 0.0003268240242152962, "loss": 0.0, "step": 67360 }, { "epoch": 0.9211668751837343, "grad_norm": NaN, "learning_rate": 0.0003267672475815564, "loss": 0.0, "step": 67370 }, { "epoch": 0.9213036076871013, "grad_norm": NaN, "learning_rate": 0.0003267104665758166, "loss": 0.0, "step": 67380 }, { "epoch": 0.9214403401904684, "grad_norm": NaN, "learning_rate": 0.0003266536812013104, "loss": 0.0, "step": 67390 }, { "epoch": 0.9215770726938354, "grad_norm": NaN, "learning_rate": 0.0003265968914612717, "loss": 0.0, "step": 67400 }, { "epoch": 0.9217138051972025, "grad_norm": NaN, "learning_rate": 0.0003265400973589349, "loss": 0.0, "step": 67410 }, { "epoch": 0.9218505377005695, "grad_norm": NaN, "learning_rate": 0.0003264832988975343, "loss": 0.0, "step": 67420 }, { "epoch": 0.9219872702039366, "grad_norm": NaN, "learning_rate": 0.00032642649608030487, "loss": 0.0, "step": 67430 }, { "epoch": 0.9221240027073035, "grad_norm": NaN, "learning_rate": 0.0003263696889104816, "loss": 0.0, "step": 67440 }, { "epoch": 0.9222607352106706, "grad_norm": NaN, "learning_rate": 0.0003263128773912996, "loss": 0.0, "step": 67450 }, { "epoch": 0.9223974677140376, "grad_norm": NaN, "learning_rate": 0.0003262560615259944, "loss": 0.0, "step": 67460 }, { "epoch": 0.9225342002174047, "grad_norm": NaN, "learning_rate": 0.0003261992413178018, "loss": 0.0, "step": 67470 }, { "epoch": 0.9226709327207717, "grad_norm": NaN, "learning_rate": 0.0003261424167699577, "loss": 0.0, "step": 67480 }, { "epoch": 0.9228076652241387, "grad_norm": NaN, "learning_rate": 0.0003260855878856984, "loss": 0.0, "step": 67490 }, { "epoch": 0.9229443977275058, "grad_norm": NaN, "learning_rate": 0.0003260287546682604, "loss": 0.0, "step": 67500 }, { "epoch": 0.9230811302308728, "grad_norm": NaN, "learning_rate": 0.00032597191712088037, "loss": 0.0, "step": 67510 }, { "epoch": 0.9232178627342399, "grad_norm": NaN, "learning_rate": 0.0003259150752467954, "loss": 0.0, "step": 67520 }, { "epoch": 0.9233545952376069, "grad_norm": NaN, "learning_rate": 0.00032585822904924257, "loss": 0.0, "step": 67530 }, { "epoch": 0.923491327740974, "grad_norm": NaN, "learning_rate": 0.00032580137853145945, "loss": 0.0, "step": 67540 }, { "epoch": 0.923628060244341, "grad_norm": NaN, "learning_rate": 0.00032574452369668366, "loss": 0.0, "step": 67550 }, { "epoch": 0.9237647927477081, "grad_norm": NaN, "learning_rate": 0.0003256876645481533, "loss": 0.0, "step": 67560 }, { "epoch": 0.923901525251075, "grad_norm": NaN, "learning_rate": 0.0003256308010891065, "loss": 0.0, "step": 67570 }, { "epoch": 0.9240382577544421, "grad_norm": NaN, "learning_rate": 0.00032557393332278164, "loss": 0.0, "step": 67580 }, { "epoch": 0.9241749902578091, "grad_norm": NaN, "learning_rate": 0.00032551706125241747, "loss": 0.0, "step": 67590 }, { "epoch": 0.9243117227611761, "grad_norm": NaN, "learning_rate": 0.00032546018488125296, "loss": 0.0, "step": 67600 }, { "epoch": 0.9244484552645432, "grad_norm": NaN, "learning_rate": 0.00032540330421252734, "loss": 0.0, "step": 67610 }, { "epoch": 0.9245851877679102, "grad_norm": NaN, "learning_rate": 0.0003253464192494799, "loss": 0.0, "step": 67620 }, { "epoch": 0.9247219202712773, "grad_norm": NaN, "learning_rate": 0.0003252895299953505, "loss": 0.0, "step": 67630 }, { "epoch": 0.9248586527746443, "grad_norm": NaN, "learning_rate": 0.0003252326364533789, "loss": 0.0, "step": 67640 }, { "epoch": 0.9249953852780114, "grad_norm": NaN, "learning_rate": 0.0003251757386268053, "loss": 0.0, "step": 67650 }, { "epoch": 0.9251321177813784, "grad_norm": NaN, "learning_rate": 0.0003251188365188702, "loss": 0.0, "step": 67660 }, { "epoch": 0.9252688502847455, "grad_norm": NaN, "learning_rate": 0.0003250619301328142, "loss": 0.0, "step": 67670 }, { "epoch": 0.9254055827881125, "grad_norm": NaN, "learning_rate": 0.0003250050194718782, "loss": 0.0, "step": 67680 }, { "epoch": 0.9255423152914796, "grad_norm": NaN, "learning_rate": 0.00032494810453930325, "loss": 0.0, "step": 67690 }, { "epoch": 0.9256790477948466, "grad_norm": NaN, "learning_rate": 0.0003248911853383309, "loss": 0.0, "step": 67700 }, { "epoch": 0.9258157802982135, "grad_norm": NaN, "learning_rate": 0.0003248342618722026, "loss": 0.0, "step": 67710 }, { "epoch": 0.9259525128015806, "grad_norm": NaN, "learning_rate": 0.00032477733414416036, "loss": 0.0, "step": 67720 }, { "epoch": 0.9260892453049476, "grad_norm": NaN, "learning_rate": 0.0003247204021574463, "loss": 0.0, "step": 67730 }, { "epoch": 0.9262259778083147, "grad_norm": NaN, "learning_rate": 0.0003246634659153026, "loss": 0.0, "step": 67740 }, { "epoch": 0.9263627103116817, "grad_norm": NaN, "learning_rate": 0.0003246065254209721, "loss": 0.0, "step": 67750 }, { "epoch": 0.9264994428150488, "grad_norm": NaN, "learning_rate": 0.0003245495806776974, "loss": 0.0, "step": 67760 }, { "epoch": 0.9266361753184158, "grad_norm": NaN, "learning_rate": 0.0003244926316887218, "loss": 0.0, "step": 67770 }, { "epoch": 0.9267729078217829, "grad_norm": NaN, "learning_rate": 0.0003244356784572884, "loss": 0.0, "step": 67780 }, { "epoch": 0.9269096403251499, "grad_norm": NaN, "learning_rate": 0.0003243787209866409, "loss": 0.0, "step": 67790 }, { "epoch": 0.927046372828517, "grad_norm": NaN, "learning_rate": 0.00032432175928002316, "loss": 0.0, "step": 67800 }, { "epoch": 0.927183105331884, "grad_norm": NaN, "learning_rate": 0.00032426479334067917, "loss": 0.0, "step": 67810 }, { "epoch": 0.927319837835251, "grad_norm": NaN, "learning_rate": 0.0003242078231718531, "loss": 0.0, "step": 67820 }, { "epoch": 0.9274565703386181, "grad_norm": NaN, "learning_rate": 0.0003241508487767897, "loss": 0.0, "step": 67830 }, { "epoch": 0.927593302841985, "grad_norm": NaN, "learning_rate": 0.0003240938701587335, "loss": 0.0, "step": 67840 }, { "epoch": 0.9277300353453521, "grad_norm": NaN, "learning_rate": 0.0003240368873209297, "loss": 0.0, "step": 67850 }, { "epoch": 0.9278667678487191, "grad_norm": NaN, "learning_rate": 0.0003239799002666235, "loss": 0.0, "step": 67860 }, { "epoch": 0.9280035003520862, "grad_norm": NaN, "learning_rate": 0.00032392290899906036, "loss": 0.0, "step": 67870 }, { "epoch": 0.9281402328554532, "grad_norm": NaN, "learning_rate": 0.00032386591352148605, "loss": 0.0, "step": 67880 }, { "epoch": 0.9282769653588203, "grad_norm": NaN, "learning_rate": 0.00032380891383714644, "loss": 0.0, "step": 67890 }, { "epoch": 0.9284136978621873, "grad_norm": NaN, "learning_rate": 0.0003237519099492879, "loss": 0.0, "step": 67900 }, { "epoch": 0.9285504303655544, "grad_norm": NaN, "learning_rate": 0.0003236949018611568, "loss": 0.0, "step": 67910 }, { "epoch": 0.9286871628689214, "grad_norm": NaN, "learning_rate": 0.0003236378895759997, "loss": 0.0, "step": 67920 }, { "epoch": 0.9288238953722884, "grad_norm": NaN, "learning_rate": 0.00032358087309706374, "loss": 0.0, "step": 67930 }, { "epoch": 0.9289606278756555, "grad_norm": NaN, "learning_rate": 0.00032352385242759605, "loss": 0.0, "step": 67940 }, { "epoch": 0.9290973603790225, "grad_norm": NaN, "learning_rate": 0.00032346682757084395, "loss": 0.0, "step": 67950 }, { "epoch": 0.9292340928823896, "grad_norm": NaN, "learning_rate": 0.00032340979853005516, "loss": 0.0, "step": 67960 }, { "epoch": 0.9293708253857565, "grad_norm": NaN, "learning_rate": 0.0003233527653084774, "loss": 0.0, "step": 67970 }, { "epoch": 0.9295075578891236, "grad_norm": NaN, "learning_rate": 0.000323295727909359, "loss": 0.0, "step": 67980 }, { "epoch": 0.9296442903924906, "grad_norm": NaN, "learning_rate": 0.00032323868633594825, "loss": 0.0, "step": 67990 }, { "epoch": 0.9297810228958577, "grad_norm": NaN, "learning_rate": 0.00032318164059149366, "loss": 0.0, "step": 68000 }, { "epoch": 0.9299177553992247, "grad_norm": NaN, "learning_rate": 0.0003231245906792443, "loss": 0.0, "step": 68010 }, { "epoch": 0.9300544879025918, "grad_norm": NaN, "learning_rate": 0.0003230675366024489, "loss": 0.0, "step": 68020 }, { "epoch": 0.9301912204059588, "grad_norm": NaN, "learning_rate": 0.000323010478364357, "loss": 0.0, "step": 68030 }, { "epoch": 0.9303279529093258, "grad_norm": NaN, "learning_rate": 0.0003229534159682181, "loss": 0.0, "step": 68040 }, { "epoch": 0.9304646854126929, "grad_norm": NaN, "learning_rate": 0.00032289634941728187, "loss": 0.0, "step": 68050 }, { "epoch": 0.9306014179160599, "grad_norm": NaN, "learning_rate": 0.0003228392787147985, "loss": 0.0, "step": 68060 }, { "epoch": 0.930738150419427, "grad_norm": NaN, "learning_rate": 0.00032278220386401814, "loss": 0.0, "step": 68070 }, { "epoch": 0.930874882922794, "grad_norm": NaN, "learning_rate": 0.00032272512486819136, "loss": 0.0, "step": 68080 }, { "epoch": 0.9310116154261611, "grad_norm": NaN, "learning_rate": 0.0003226680417305688, "loss": 0.0, "step": 68090 }, { "epoch": 0.931148347929528, "grad_norm": NaN, "learning_rate": 0.00032261095445440145, "loss": 0.0, "step": 68100 }, { "epoch": 0.9312850804328952, "grad_norm": NaN, "learning_rate": 0.00032255386304294055, "loss": 0.0, "step": 68110 }, { "epoch": 0.9314218129362621, "grad_norm": NaN, "learning_rate": 0.0003224967674994375, "loss": 0.0, "step": 68120 }, { "epoch": 0.9315585454396291, "grad_norm": NaN, "learning_rate": 0.00032243966782714395, "loss": 0.0, "step": 68130 }, { "epoch": 0.9316952779429962, "grad_norm": NaN, "learning_rate": 0.0003223825640293119, "loss": 0.0, "step": 68140 }, { "epoch": 0.9318320104463632, "grad_norm": NaN, "learning_rate": 0.00032232545610919337, "loss": 0.0, "step": 68150 }, { "epoch": 0.9319687429497303, "grad_norm": NaN, "learning_rate": 0.0003222683440700408, "loss": 0.0, "step": 68160 }, { "epoch": 0.9321054754530973, "grad_norm": NaN, "learning_rate": 0.00032221122791510675, "loss": 0.0, "step": 68170 }, { "epoch": 0.9322422079564644, "grad_norm": NaN, "learning_rate": 0.00032215410764764406, "loss": 0.0, "step": 68180 }, { "epoch": 0.9323789404598314, "grad_norm": NaN, "learning_rate": 0.00032209698327090585, "loss": 0.0, "step": 68190 }, { "epoch": 0.9325156729631985, "grad_norm": NaN, "learning_rate": 0.0003220398547881455, "loss": 0.0, "step": 68200 }, { "epoch": 0.9326524054665655, "grad_norm": NaN, "learning_rate": 0.0003219827222026165, "loss": 0.0, "step": 68210 }, { "epoch": 0.9327891379699326, "grad_norm": NaN, "learning_rate": 0.00032192558551757256, "loss": 0.0, "step": 68220 }, { "epoch": 0.9329258704732996, "grad_norm": NaN, "learning_rate": 0.0003218684447362677, "loss": 0.0, "step": 68230 }, { "epoch": 0.9330626029766665, "grad_norm": NaN, "learning_rate": 0.00032181129986195626, "loss": 0.0, "step": 68240 }, { "epoch": 0.9331993354800336, "grad_norm": NaN, "learning_rate": 0.00032175415089789265, "loss": 0.0, "step": 68250 }, { "epoch": 0.9333360679834006, "grad_norm": NaN, "learning_rate": 0.00032169699784733157, "loss": 0.0, "step": 68260 }, { "epoch": 0.9334728004867677, "grad_norm": NaN, "learning_rate": 0.00032163984071352807, "loss": 0.0, "step": 68270 }, { "epoch": 0.9336095329901347, "grad_norm": NaN, "learning_rate": 0.00032158267949973724, "loss": 0.0, "step": 68280 }, { "epoch": 0.9337462654935018, "grad_norm": NaN, "learning_rate": 0.00032152551420921446, "loss": 0.0, "step": 68290 }, { "epoch": 0.9338829979968688, "grad_norm": NaN, "learning_rate": 0.00032146834484521547, "loss": 0.0, "step": 68300 }, { "epoch": 0.9340197305002359, "grad_norm": NaN, "learning_rate": 0.000321411171410996, "loss": 0.0, "step": 68310 }, { "epoch": 0.9341564630036029, "grad_norm": NaN, "learning_rate": 0.0003213539939098123, "loss": 0.0, "step": 68320 }, { "epoch": 0.93429319550697, "grad_norm": NaN, "learning_rate": 0.0003212968123449206, "loss": 0.0, "step": 68330 }, { "epoch": 0.934429928010337, "grad_norm": NaN, "learning_rate": 0.00032123962671957757, "loss": 0.0, "step": 68340 }, { "epoch": 0.934566660513704, "grad_norm": NaN, "learning_rate": 0.0003211824370370399, "loss": 0.0, "step": 68350 }, { "epoch": 0.9347033930170711, "grad_norm": NaN, "learning_rate": 0.00032112524330056477, "loss": 0.0, "step": 68360 }, { "epoch": 0.934840125520438, "grad_norm": NaN, "learning_rate": 0.00032106804551340927, "loss": 0.0, "step": 68370 }, { "epoch": 0.9349768580238051, "grad_norm": NaN, "learning_rate": 0.00032101084367883105, "loss": 0.0, "step": 68380 }, { "epoch": 0.9351135905271721, "grad_norm": NaN, "learning_rate": 0.00032095363780008766, "loss": 0.0, "step": 68390 }, { "epoch": 0.9352503230305392, "grad_norm": NaN, "learning_rate": 0.00032089642788043723, "loss": 0.0, "step": 68400 }, { "epoch": 0.9353870555339062, "grad_norm": NaN, "learning_rate": 0.00032083921392313784, "loss": 0.0, "step": 68410 }, { "epoch": 0.9355237880372733, "grad_norm": NaN, "learning_rate": 0.00032078199593144784, "loss": 0.0, "step": 68420 }, { "epoch": 0.9356605205406403, "grad_norm": NaN, "learning_rate": 0.0003207247739086261, "loss": 0.0, "step": 68430 }, { "epoch": 0.9357972530440074, "grad_norm": NaN, "learning_rate": 0.0003206675478579313, "loss": 0.0, "step": 68440 }, { "epoch": 0.9359339855473744, "grad_norm": NaN, "learning_rate": 0.00032061031778262257, "loss": 0.0, "step": 68450 }, { "epoch": 0.9360707180507414, "grad_norm": NaN, "learning_rate": 0.0003205530836859593, "loss": 0.0, "step": 68460 }, { "epoch": 0.9362074505541085, "grad_norm": NaN, "learning_rate": 0.00032049584557120096, "loss": 0.0, "step": 68470 }, { "epoch": 0.9363441830574755, "grad_norm": NaN, "learning_rate": 0.00032043860344160743, "loss": 0.0, "step": 68480 }, { "epoch": 0.9364809155608426, "grad_norm": NaN, "learning_rate": 0.0003203813573004387, "loss": 0.0, "step": 68490 }, { "epoch": 0.9366176480642096, "grad_norm": NaN, "learning_rate": 0.000320324107150955, "loss": 0.0, "step": 68500 }, { "epoch": 0.9367543805675766, "grad_norm": NaN, "learning_rate": 0.0003202668529964168, "loss": 0.0, "step": 68510 }, { "epoch": 0.9368911130709436, "grad_norm": NaN, "learning_rate": 0.0003202095948400849, "loss": 0.0, "step": 68520 }, { "epoch": 0.9370278455743107, "grad_norm": NaN, "learning_rate": 0.0003201523326852201, "loss": 0.0, "step": 68530 }, { "epoch": 0.9371645780776777, "grad_norm": NaN, "learning_rate": 0.0003200950665350836, "loss": 0.0, "step": 68540 }, { "epoch": 0.9373013105810448, "grad_norm": NaN, "learning_rate": 0.00032003779639293674, "loss": 0.0, "step": 68550 }, { "epoch": 0.9374380430844118, "grad_norm": NaN, "learning_rate": 0.00031998052226204127, "loss": 0.0, "step": 68560 }, { "epoch": 0.9375747755877788, "grad_norm": NaN, "learning_rate": 0.00031992324414565894, "loss": 0.0, "step": 68570 }, { "epoch": 0.9377115080911459, "grad_norm": NaN, "learning_rate": 0.00031986596204705174, "loss": 0.0, "step": 68580 }, { "epoch": 0.9378482405945129, "grad_norm": NaN, "learning_rate": 0.00031980867596948214, "loss": 0.0, "step": 68590 }, { "epoch": 0.93798497309788, "grad_norm": NaN, "learning_rate": 0.0003197513859162125, "loss": 0.0, "step": 68600 }, { "epoch": 0.938121705601247, "grad_norm": NaN, "learning_rate": 0.00031969409189050565, "loss": 0.0, "step": 68610 }, { "epoch": 0.9382584381046141, "grad_norm": NaN, "learning_rate": 0.00031963679389562463, "loss": 0.0, "step": 68620 }, { "epoch": 0.938395170607981, "grad_norm": NaN, "learning_rate": 0.00031957949193483245, "loss": 0.0, "step": 68630 }, { "epoch": 0.9385319031113482, "grad_norm": NaN, "learning_rate": 0.00031952218601139274, "loss": 0.0, "step": 68640 }, { "epoch": 0.9386686356147151, "grad_norm": NaN, "learning_rate": 0.000319464876128569, "loss": 0.0, "step": 68650 }, { "epoch": 0.9388053681180822, "grad_norm": NaN, "learning_rate": 0.0003194075622896251, "loss": 0.0, "step": 68660 }, { "epoch": 0.9389421006214492, "grad_norm": NaN, "learning_rate": 0.0003193502444978253, "loss": 0.0, "step": 68670 }, { "epoch": 0.9390788331248162, "grad_norm": NaN, "learning_rate": 0.0003192929227564338, "loss": 0.0, "step": 68680 }, { "epoch": 0.9392155656281833, "grad_norm": NaN, "learning_rate": 0.0003192355970687152, "loss": 0.0, "step": 68690 }, { "epoch": 0.9393522981315503, "grad_norm": NaN, "learning_rate": 0.0003191782674379342, "loss": 0.0, "step": 68700 }, { "epoch": 0.9394890306349174, "grad_norm": NaN, "learning_rate": 0.0003191209338673559, "loss": 0.0, "step": 68710 }, { "epoch": 0.9396257631382844, "grad_norm": NaN, "learning_rate": 0.0003190635963602455, "loss": 0.0, "step": 68720 }, { "epoch": 0.9397624956416515, "grad_norm": NaN, "learning_rate": 0.00031900625491986846, "loss": 0.0, "step": 68730 }, { "epoch": 0.9398992281450185, "grad_norm": NaN, "learning_rate": 0.0003189489095494903, "loss": 0.0, "step": 68740 }, { "epoch": 0.9400359606483856, "grad_norm": NaN, "learning_rate": 0.0003188915602523772, "loss": 0.0, "step": 68750 }, { "epoch": 0.9401726931517526, "grad_norm": NaN, "learning_rate": 0.00031883420703179507, "loss": 0.0, "step": 68760 }, { "epoch": 0.9403094256551197, "grad_norm": NaN, "learning_rate": 0.0003187768498910103, "loss": 0.0, "step": 68770 }, { "epoch": 0.9404461581584866, "grad_norm": NaN, "learning_rate": 0.0003187194888332895, "loss": 0.0, "step": 68780 }, { "epoch": 0.9405828906618536, "grad_norm": NaN, "learning_rate": 0.00031866212386189935, "loss": 0.0, "step": 68790 }, { "epoch": 0.9407196231652207, "grad_norm": NaN, "learning_rate": 0.000318604754980107, "loss": 0.0, "step": 68800 }, { "epoch": 0.9408563556685877, "grad_norm": NaN, "learning_rate": 0.0003185473821911797, "loss": 0.0, "step": 68810 }, { "epoch": 0.9409930881719548, "grad_norm": NaN, "learning_rate": 0.00031849000549838473, "loss": 0.0, "step": 68820 }, { "epoch": 0.9411298206753218, "grad_norm": NaN, "learning_rate": 0.00031843262490498995, "loss": 0.0, "step": 68830 }, { "epoch": 0.9412665531786889, "grad_norm": NaN, "learning_rate": 0.0003183752404142632, "loss": 0.0, "step": 68840 }, { "epoch": 0.9414032856820559, "grad_norm": NaN, "learning_rate": 0.0003183178520294726, "loss": 0.0, "step": 68850 }, { "epoch": 0.941540018185423, "grad_norm": NaN, "learning_rate": 0.0003182604597538865, "loss": 0.0, "step": 68860 }, { "epoch": 0.94167675068879, "grad_norm": NaN, "learning_rate": 0.00031820306359077346, "loss": 0.0, "step": 68870 }, { "epoch": 0.9418134831921571, "grad_norm": NaN, "learning_rate": 0.0003181456635434022, "loss": 0.0, "step": 68880 }, { "epoch": 0.9419502156955241, "grad_norm": NaN, "learning_rate": 0.0003180882596150419, "loss": 0.0, "step": 68890 }, { "epoch": 0.942086948198891, "grad_norm": NaN, "learning_rate": 0.00031803085180896165, "loss": 0.0, "step": 68900 }, { "epoch": 0.9422236807022581, "grad_norm": NaN, "learning_rate": 0.000317973440128431, "loss": 0.0, "step": 68910 }, { "epoch": 0.9423604132056251, "grad_norm": NaN, "learning_rate": 0.0003179160245767195, "loss": 0.0, "step": 68920 }, { "epoch": 0.9424971457089922, "grad_norm": NaN, "learning_rate": 0.0003178586051570973, "loss": 0.0, "step": 68930 }, { "epoch": 0.9426338782123592, "grad_norm": NaN, "learning_rate": 0.0003178011818728342, "loss": 0.0, "step": 68940 }, { "epoch": 0.9427706107157263, "grad_norm": NaN, "learning_rate": 0.0003177437547272007, "loss": 0.0, "step": 68950 }, { "epoch": 0.9429073432190933, "grad_norm": NaN, "learning_rate": 0.0003176863237234674, "loss": 0.0, "step": 68960 }, { "epoch": 0.9430440757224604, "grad_norm": NaN, "learning_rate": 0.0003176288888649049, "loss": 0.0, "step": 68970 }, { "epoch": 0.9431808082258274, "grad_norm": NaN, "learning_rate": 0.0003175714501547843, "loss": 0.0, "step": 68980 }, { "epoch": 0.9433175407291945, "grad_norm": NaN, "learning_rate": 0.00031751400759637695, "loss": 0.0, "step": 68990 }, { "epoch": 0.9434542732325615, "grad_norm": NaN, "learning_rate": 0.00031745656119295396, "loss": 0.0, "step": 69000 }, { "epoch": 0.9435910057359285, "grad_norm": NaN, "learning_rate": 0.0003173991109477873, "loss": 0.0, "step": 69010 }, { "epoch": 0.9437277382392956, "grad_norm": NaN, "learning_rate": 0.00031734165686414864, "loss": 0.0, "step": 69020 }, { "epoch": 0.9438644707426626, "grad_norm": NaN, "learning_rate": 0.0003172841989453101, "loss": 0.0, "step": 69030 }, { "epoch": 0.9440012032460297, "grad_norm": NaN, "learning_rate": 0.0003172267371945441, "loss": 0.0, "step": 69040 }, { "epoch": 0.9441379357493966, "grad_norm": NaN, "learning_rate": 0.00031716927161512304, "loss": 0.0, "step": 69050 }, { "epoch": 0.9442746682527637, "grad_norm": NaN, "learning_rate": 0.00031711180221031973, "loss": 0.0, "step": 69060 }, { "epoch": 0.9444114007561307, "grad_norm": NaN, "learning_rate": 0.00031705432898340713, "loss": 0.0, "step": 69070 }, { "epoch": 0.9445481332594978, "grad_norm": NaN, "learning_rate": 0.0003169968519376584, "loss": 0.0, "step": 69080 }, { "epoch": 0.9446848657628648, "grad_norm": NaN, "learning_rate": 0.0003169393710763469, "loss": 0.0, "step": 69090 }, { "epoch": 0.9448215982662319, "grad_norm": NaN, "learning_rate": 0.0003168818864027463, "loss": 0.0, "step": 69100 }, { "epoch": 0.9449583307695989, "grad_norm": NaN, "learning_rate": 0.0003168243979201304, "loss": 0.0, "step": 69110 }, { "epoch": 0.9450950632729659, "grad_norm": NaN, "learning_rate": 0.00031676690563177336, "loss": 0.0, "step": 69120 }, { "epoch": 0.945231795776333, "grad_norm": NaN, "learning_rate": 0.0003167094095409492, "loss": 0.0, "step": 69130 }, { "epoch": 0.9453685282797, "grad_norm": NaN, "learning_rate": 0.00031665190965093264, "loss": 0.0, "step": 69140 }, { "epoch": 0.9455052607830671, "grad_norm": NaN, "learning_rate": 0.00031659440596499834, "loss": 0.0, "step": 69150 }, { "epoch": 0.9456419932864341, "grad_norm": NaN, "learning_rate": 0.00031653689848642097, "loss": 0.0, "step": 69160 }, { "epoch": 0.9457787257898012, "grad_norm": NaN, "learning_rate": 0.00031647938721847597, "loss": 0.0, "step": 69170 }, { "epoch": 0.9459154582931681, "grad_norm": NaN, "learning_rate": 0.0003164218721644386, "loss": 0.0, "step": 69180 }, { "epoch": 0.9460521907965352, "grad_norm": NaN, "learning_rate": 0.0003163643533275843, "loss": 0.0, "step": 69190 }, { "epoch": 0.9461889232999022, "grad_norm": NaN, "learning_rate": 0.00031630683071118905, "loss": 0.0, "step": 69200 }, { "epoch": 0.9463256558032693, "grad_norm": NaN, "learning_rate": 0.0003162493043185286, "loss": 0.0, "step": 69210 }, { "epoch": 0.9464623883066363, "grad_norm": NaN, "learning_rate": 0.00031619177415287934, "loss": 0.0, "step": 69220 }, { "epoch": 0.9465991208100033, "grad_norm": NaN, "learning_rate": 0.0003161342402175177, "loss": 0.0, "step": 69230 }, { "epoch": 0.9467358533133704, "grad_norm": NaN, "learning_rate": 0.0003160767025157201, "loss": 0.0, "step": 69240 }, { "epoch": 0.9468725858167374, "grad_norm": NaN, "learning_rate": 0.00031601916105076367, "loss": 0.0, "step": 69250 }, { "epoch": 0.9470093183201045, "grad_norm": NaN, "learning_rate": 0.00031596161582592527, "loss": 0.0, "step": 69260 }, { "epoch": 0.9471460508234715, "grad_norm": NaN, "learning_rate": 0.0003159040668444823, "loss": 0.0, "step": 69270 }, { "epoch": 0.9472827833268386, "grad_norm": NaN, "learning_rate": 0.0003158465141097122, "loss": 0.0, "step": 69280 }, { "epoch": 0.9474195158302056, "grad_norm": NaN, "learning_rate": 0.0003157889576248927, "loss": 0.0, "step": 69290 }, { "epoch": 0.9475562483335727, "grad_norm": NaN, "learning_rate": 0.0003157313973933016, "loss": 0.0, "step": 69300 }, { "epoch": 0.9476929808369396, "grad_norm": NaN, "learning_rate": 0.0003156738334182173, "loss": 0.0, "step": 69310 }, { "epoch": 0.9478297133403067, "grad_norm": NaN, "learning_rate": 0.0003156162657029179, "loss": 0.0, "step": 69320 }, { "epoch": 0.9479664458436737, "grad_norm": NaN, "learning_rate": 0.00031555869425068213, "loss": 0.0, "step": 69330 }, { "epoch": 0.9481031783470407, "grad_norm": NaN, "learning_rate": 0.0003155011190647886, "loss": 0.0, "step": 69340 }, { "epoch": 0.9482399108504078, "grad_norm": NaN, "learning_rate": 0.00031544354014851646, "loss": 0.0, "step": 69350 }, { "epoch": 0.9483766433537748, "grad_norm": NaN, "learning_rate": 0.0003153859575051448, "loss": 0.0, "step": 69360 }, { "epoch": 0.9485133758571419, "grad_norm": NaN, "learning_rate": 0.00031532837113795297, "loss": 0.0, "step": 69370 }, { "epoch": 0.9486501083605089, "grad_norm": NaN, "learning_rate": 0.0003152707810502208, "loss": 0.0, "step": 69380 }, { "epoch": 0.948786840863876, "grad_norm": NaN, "learning_rate": 0.00031521318724522795, "loss": 0.0, "step": 69390 }, { "epoch": 0.948923573367243, "grad_norm": NaN, "learning_rate": 0.00031515558972625455, "loss": 0.0, "step": 69400 }, { "epoch": 0.9490603058706101, "grad_norm": NaN, "learning_rate": 0.00031509798849658084, "loss": 0.0, "step": 69410 }, { "epoch": 0.9491970383739771, "grad_norm": NaN, "learning_rate": 0.0003150403835594872, "loss": 0.0, "step": 69420 }, { "epoch": 0.9493337708773442, "grad_norm": NaN, "learning_rate": 0.00031498277491825446, "loss": 0.0, "step": 69430 }, { "epoch": 0.9494705033807112, "grad_norm": NaN, "learning_rate": 0.00031492516257616344, "loss": 0.0, "step": 69440 }, { "epoch": 0.9496072358840781, "grad_norm": NaN, "learning_rate": 0.0003148675465364952, "loss": 0.0, "step": 69450 }, { "epoch": 0.9497439683874452, "grad_norm": NaN, "learning_rate": 0.00031480992680253114, "loss": 0.0, "step": 69460 }, { "epoch": 0.9498807008908122, "grad_norm": NaN, "learning_rate": 0.00031475230337755275, "loss": 0.0, "step": 69470 }, { "epoch": 0.9500174333941793, "grad_norm": NaN, "learning_rate": 0.0003146946762648417, "loss": 0.0, "step": 69480 }, { "epoch": 0.9501541658975463, "grad_norm": NaN, "learning_rate": 0.00031463704546768, "loss": 0.0, "step": 69490 }, { "epoch": 0.9502908984009134, "grad_norm": NaN, "learning_rate": 0.00031457941098934976, "loss": 0.0, "step": 69500 }, { "epoch": 0.9504276309042804, "grad_norm": NaN, "learning_rate": 0.0003145217728331335, "loss": 0.0, "step": 69510 }, { "epoch": 0.9505643634076475, "grad_norm": NaN, "learning_rate": 0.0003144641310023136, "loss": 0.0, "step": 69520 }, { "epoch": 0.9507010959110145, "grad_norm": NaN, "learning_rate": 0.0003144064855001728, "loss": 0.0, "step": 69530 }, { "epoch": 0.9508378284143815, "grad_norm": NaN, "learning_rate": 0.00031434883632999435, "loss": 0.0, "step": 69540 }, { "epoch": 0.9509745609177486, "grad_norm": NaN, "learning_rate": 0.0003142911834950611, "loss": 0.0, "step": 69550 }, { "epoch": 0.9511112934211156, "grad_norm": NaN, "learning_rate": 0.0003142335269986568, "loss": 0.0, "step": 69560 }, { "epoch": 0.9512480259244827, "grad_norm": NaN, "learning_rate": 0.00031417586684406493, "loss": 0.0, "step": 69570 }, { "epoch": 0.9513847584278496, "grad_norm": NaN, "learning_rate": 0.00031411820303456925, "loss": 0.0, "step": 69580 }, { "epoch": 0.9515214909312167, "grad_norm": NaN, "learning_rate": 0.0003140605355734538, "loss": 0.0, "step": 69590 }, { "epoch": 0.9516582234345837, "grad_norm": NaN, "learning_rate": 0.000314002864464003, "loss": 0.0, "step": 69600 }, { "epoch": 0.9517949559379508, "grad_norm": NaN, "learning_rate": 0.0003139451897095011, "loss": 0.0, "step": 69610 }, { "epoch": 0.9519316884413178, "grad_norm": NaN, "learning_rate": 0.0003138875113132328, "loss": 0.0, "step": 69620 }, { "epoch": 0.9520684209446849, "grad_norm": NaN, "learning_rate": 0.000313829829278483, "loss": 0.0, "step": 69630 }, { "epoch": 0.9522051534480519, "grad_norm": NaN, "learning_rate": 0.00031377214360853677, "loss": 0.0, "step": 69640 }, { "epoch": 0.9523418859514189, "grad_norm": NaN, "learning_rate": 0.00031371445430667945, "loss": 0.0, "step": 69650 }, { "epoch": 0.952478618454786, "grad_norm": NaN, "learning_rate": 0.0003136567613761963, "loss": 0.0, "step": 69660 }, { "epoch": 0.952615350958153, "grad_norm": NaN, "learning_rate": 0.00031359906482037326, "loss": 0.0, "step": 69670 }, { "epoch": 0.9527520834615201, "grad_norm": NaN, "learning_rate": 0.00031354136464249613, "loss": 0.0, "step": 69680 }, { "epoch": 0.9528888159648871, "grad_norm": NaN, "learning_rate": 0.000313483660845851, "loss": 0.0, "step": 69690 }, { "epoch": 0.9530255484682542, "grad_norm": NaN, "learning_rate": 0.0003134259534337242, "loss": 0.0, "step": 69700 }, { "epoch": 0.9531622809716211, "grad_norm": NaN, "learning_rate": 0.0003133682424094022, "loss": 0.0, "step": 69710 }, { "epoch": 0.9532990134749882, "grad_norm": NaN, "learning_rate": 0.0003133105277761718, "loss": 0.0, "step": 69720 }, { "epoch": 0.9534357459783552, "grad_norm": NaN, "learning_rate": 0.0003132528095373199, "loss": 0.0, "step": 69730 }, { "epoch": 0.9535724784817223, "grad_norm": NaN, "learning_rate": 0.0003131950876961336, "loss": 0.0, "step": 69740 }, { "epoch": 0.9537092109850893, "grad_norm": NaN, "learning_rate": 0.00031313736225590036, "loss": 0.0, "step": 69750 }, { "epoch": 0.9538459434884563, "grad_norm": NaN, "learning_rate": 0.0003130796332199075, "loss": 0.0, "step": 69760 }, { "epoch": 0.9539826759918234, "grad_norm": NaN, "learning_rate": 0.00031302190059144297, "loss": 0.0, "step": 69770 }, { "epoch": 0.9541194084951904, "grad_norm": NaN, "learning_rate": 0.0003129641643737946, "loss": 0.0, "step": 69780 }, { "epoch": 0.9542561409985575, "grad_norm": NaN, "learning_rate": 0.00031290642457025065, "loss": 0.0, "step": 69790 }, { "epoch": 0.9543928735019245, "grad_norm": NaN, "learning_rate": 0.00031284868118409937, "loss": 0.0, "step": 69800 }, { "epoch": 0.9545296060052916, "grad_norm": NaN, "learning_rate": 0.00031279093421862945, "loss": 0.0, "step": 69810 }, { "epoch": 0.9546663385086586, "grad_norm": NaN, "learning_rate": 0.0003127331836771295, "loss": 0.0, "step": 69820 }, { "epoch": 0.9548030710120257, "grad_norm": NaN, "learning_rate": 0.0003126754295628886, "loss": 0.0, "step": 69830 }, { "epoch": 0.9549398035153926, "grad_norm": NaN, "learning_rate": 0.000312617671879196, "loss": 0.0, "step": 69840 }, { "epoch": 0.9550765360187597, "grad_norm": NaN, "learning_rate": 0.0003125599106293409, "loss": 0.0, "step": 69850 }, { "epoch": 0.9552132685221267, "grad_norm": NaN, "learning_rate": 0.00031250214581661295, "loss": 0.0, "step": 69860 }, { "epoch": 0.9553500010254937, "grad_norm": NaN, "learning_rate": 0.000312444377444302, "loss": 0.0, "step": 69870 }, { "epoch": 0.9554867335288608, "grad_norm": NaN, "learning_rate": 0.00031238660551569797, "loss": 0.0, "step": 69880 }, { "epoch": 0.9556234660322278, "grad_norm": NaN, "learning_rate": 0.0003123288300340911, "loss": 0.0, "step": 69890 }, { "epoch": 0.9557601985355949, "grad_norm": NaN, "learning_rate": 0.00031227105100277175, "loss": 0.0, "step": 69900 }, { "epoch": 0.9558969310389619, "grad_norm": NaN, "learning_rate": 0.00031221326842503056, "loss": 0.0, "step": 69910 }, { "epoch": 0.956033663542329, "grad_norm": NaN, "learning_rate": 0.00031215548230415825, "loss": 0.0, "step": 69920 }, { "epoch": 0.956170396045696, "grad_norm": NaN, "learning_rate": 0.0003120976926434459, "loss": 0.0, "step": 69930 }, { "epoch": 0.9563071285490631, "grad_norm": NaN, "learning_rate": 0.0003120398994461846, "loss": 0.0, "step": 69940 }, { "epoch": 0.9564438610524301, "grad_norm": NaN, "learning_rate": 0.00031198210271566587, "loss": 0.0, "step": 69950 }, { "epoch": 0.9565805935557972, "grad_norm": NaN, "learning_rate": 0.0003119243024551812, "loss": 0.0, "step": 69960 }, { "epoch": 0.9567173260591642, "grad_norm": NaN, "learning_rate": 0.0003118664986680225, "loss": 0.0, "step": 69970 }, { "epoch": 0.9568540585625311, "grad_norm": NaN, "learning_rate": 0.00031180869135748174, "loss": 0.0, "step": 69980 }, { "epoch": 0.9569907910658982, "grad_norm": NaN, "learning_rate": 0.00031175088052685115, "loss": 0.0, "step": 69990 }, { "epoch": 0.9571275235692652, "grad_norm": NaN, "learning_rate": 0.00031169306617942304, "loss": 0.0, "step": 70000 }, { "epoch": 0.9572642560726323, "grad_norm": NaN, "learning_rate": 0.0003116352483184901, "loss": 0.0, "step": 70010 }, { "epoch": 0.9574009885759993, "grad_norm": NaN, "learning_rate": 0.0003115774269473452, "loss": 0.0, "step": 70020 }, { "epoch": 0.9575377210793664, "grad_norm": NaN, "learning_rate": 0.00031151960206928116, "loss": 0.0, "step": 70030 }, { "epoch": 0.9576744535827334, "grad_norm": NaN, "learning_rate": 0.0003114617736875914, "loss": 0.0, "step": 70040 }, { "epoch": 0.9578111860861005, "grad_norm": NaN, "learning_rate": 0.0003114039418055691, "loss": 0.0, "step": 70050 }, { "epoch": 0.9579479185894675, "grad_norm": NaN, "learning_rate": 0.000311346106426508, "loss": 0.0, "step": 70060 }, { "epoch": 0.9580846510928346, "grad_norm": NaN, "learning_rate": 0.00031128826755370196, "loss": 0.0, "step": 70070 }, { "epoch": 0.9582213835962016, "grad_norm": NaN, "learning_rate": 0.0003112304251904449, "loss": 0.0, "step": 70080 }, { "epoch": 0.9583581160995686, "grad_norm": NaN, "learning_rate": 0.00031117257934003094, "loss": 0.0, "step": 70090 }, { "epoch": 0.9584948486029357, "grad_norm": NaN, "learning_rate": 0.00031111473000575466, "loss": 0.0, "step": 70100 }, { "epoch": 0.9586315811063026, "grad_norm": NaN, "learning_rate": 0.00031105687719091055, "loss": 0.0, "step": 70110 }, { "epoch": 0.9587683136096697, "grad_norm": NaN, "learning_rate": 0.0003109990208987934, "loss": 0.0, "step": 70120 }, { "epoch": 0.9589050461130367, "grad_norm": NaN, "learning_rate": 0.0003109411611326983, "loss": 0.0, "step": 70130 }, { "epoch": 0.9590417786164038, "grad_norm": NaN, "learning_rate": 0.00031088329789592025, "loss": 0.0, "step": 70140 }, { "epoch": 0.9591785111197708, "grad_norm": NaN, "learning_rate": 0.00031082543119175486, "loss": 0.0, "step": 70150 }, { "epoch": 0.9593152436231379, "grad_norm": NaN, "learning_rate": 0.0003107675610234976, "loss": 0.0, "step": 70160 }, { "epoch": 0.9594519761265049, "grad_norm": NaN, "learning_rate": 0.0003107096873944443, "loss": 0.0, "step": 70170 }, { "epoch": 0.959588708629872, "grad_norm": NaN, "learning_rate": 0.000310651810307891, "loss": 0.0, "step": 70180 }, { "epoch": 0.959725441133239, "grad_norm": NaN, "learning_rate": 0.0003105939297671337, "loss": 0.0, "step": 70190 }, { "epoch": 0.959862173636606, "grad_norm": NaN, "learning_rate": 0.0003105360457754689, "loss": 0.0, "step": 70200 }, { "epoch": 0.9599989061399731, "grad_norm": NaN, "learning_rate": 0.00031047815833619316, "loss": 0.0, "step": 70210 }, { "epoch": 0.9601356386433401, "grad_norm": NaN, "learning_rate": 0.0003104202674526032, "loss": 0.0, "step": 70220 }, { "epoch": 0.9602723711467072, "grad_norm": NaN, "learning_rate": 0.0003103623731279961, "loss": 0.0, "step": 70230 }, { "epoch": 0.9604091036500741, "grad_norm": NaN, "learning_rate": 0.00031030447536566894, "loss": 0.0, "step": 70240 }, { "epoch": 0.9605458361534412, "grad_norm": NaN, "learning_rate": 0.00031024657416891903, "loss": 0.0, "step": 70250 }, { "epoch": 0.9606825686568082, "grad_norm": NaN, "learning_rate": 0.000310188669541044, "loss": 0.0, "step": 70260 }, { "epoch": 0.9608193011601753, "grad_norm": NaN, "learning_rate": 0.00031013076148534157, "loss": 0.0, "step": 70270 }, { "epoch": 0.9609560336635423, "grad_norm": NaN, "learning_rate": 0.00031007285000510975, "loss": 0.0, "step": 70280 }, { "epoch": 0.9610927661669094, "grad_norm": NaN, "learning_rate": 0.00031001493510364654, "loss": 0.0, "step": 70290 }, { "epoch": 0.9612294986702764, "grad_norm": NaN, "learning_rate": 0.00030995701678425045, "loss": 0.0, "step": 70300 }, { "epoch": 0.9613662311736434, "grad_norm": NaN, "learning_rate": 0.00030989909505021985, "loss": 0.0, "step": 70310 }, { "epoch": 0.9615029636770105, "grad_norm": NaN, "learning_rate": 0.00030984116990485347, "loss": 0.0, "step": 70320 }, { "epoch": 0.9616396961803775, "grad_norm": NaN, "learning_rate": 0.0003097832413514504, "loss": 0.0, "step": 70330 }, { "epoch": 0.9617764286837446, "grad_norm": NaN, "learning_rate": 0.0003097253093933095, "loss": 0.0, "step": 70340 }, { "epoch": 0.9619131611871116, "grad_norm": NaN, "learning_rate": 0.0003096673740337302, "loss": 0.0, "step": 70350 }, { "epoch": 0.9620498936904787, "grad_norm": NaN, "learning_rate": 0.00030960943527601214, "loss": 0.0, "step": 70360 }, { "epoch": 0.9621866261938457, "grad_norm": NaN, "learning_rate": 0.00030955149312345475, "loss": 0.0, "step": 70370 }, { "epoch": 0.9623233586972127, "grad_norm": NaN, "learning_rate": 0.00030949354757935814, "loss": 0.0, "step": 70380 }, { "epoch": 0.9624600912005797, "grad_norm": NaN, "learning_rate": 0.0003094355986470222, "loss": 0.0, "step": 70390 }, { "epoch": 0.9625968237039468, "grad_norm": NaN, "learning_rate": 0.0003093776463297473, "loss": 0.0, "step": 70400 }, { "epoch": 0.9627335562073138, "grad_norm": NaN, "learning_rate": 0.00030931969063083395, "loss": 0.0, "step": 70410 }, { "epoch": 0.9628702887106808, "grad_norm": NaN, "learning_rate": 0.00030926173155358265, "loss": 0.0, "step": 70420 }, { "epoch": 0.9630070212140479, "grad_norm": NaN, "learning_rate": 0.00030920376910129445, "loss": 0.0, "step": 70430 }, { "epoch": 0.9631437537174149, "grad_norm": NaN, "learning_rate": 0.00030914580327727026, "loss": 0.0, "step": 70440 }, { "epoch": 0.963280486220782, "grad_norm": NaN, "learning_rate": 0.00030908783408481135, "loss": 0.0, "step": 70450 }, { "epoch": 0.963417218724149, "grad_norm": NaN, "learning_rate": 0.00030902986152721914, "loss": 0.0, "step": 70460 }, { "epoch": 0.9635539512275161, "grad_norm": NaN, "learning_rate": 0.0003089718856077952, "loss": 0.0, "step": 70470 }, { "epoch": 0.9636906837308831, "grad_norm": NaN, "learning_rate": 0.0003089139063298414, "loss": 0.0, "step": 70480 }, { "epoch": 0.9638274162342502, "grad_norm": NaN, "learning_rate": 0.00030885592369665977, "loss": 0.0, "step": 70490 }, { "epoch": 0.9639641487376172, "grad_norm": NaN, "learning_rate": 0.0003087979377115523, "loss": 0.0, "step": 70500 }, { "epoch": 0.9641008812409843, "grad_norm": NaN, "learning_rate": 0.00030873994837782174, "loss": 0.0, "step": 70510 }, { "epoch": 0.9642376137443512, "grad_norm": NaN, "learning_rate": 0.00030868195569877033, "loss": 0.0, "step": 70520 }, { "epoch": 0.9643743462477182, "grad_norm": NaN, "learning_rate": 0.0003086239596777009, "loss": 0.0, "step": 70530 }, { "epoch": 0.9645110787510853, "grad_norm": NaN, "learning_rate": 0.0003085659603179165, "loss": 0.0, "step": 70540 }, { "epoch": 0.9646478112544523, "grad_norm": NaN, "learning_rate": 0.00030850795762272034, "loss": 0.0, "step": 70550 }, { "epoch": 0.9647845437578194, "grad_norm": NaN, "learning_rate": 0.00030844995159541545, "loss": 0.0, "step": 70560 }, { "epoch": 0.9649212762611864, "grad_norm": NaN, "learning_rate": 0.00030839194223930566, "loss": 0.0, "step": 70570 }, { "epoch": 0.9650580087645535, "grad_norm": NaN, "learning_rate": 0.00030833392955769453, "loss": 0.0, "step": 70580 }, { "epoch": 0.9651947412679205, "grad_norm": NaN, "learning_rate": 0.000308275913553886, "loss": 0.0, "step": 70590 }, { "epoch": 0.9653314737712876, "grad_norm": NaN, "learning_rate": 0.00030821789423118417, "loss": 0.0, "step": 70600 }, { "epoch": 0.9654682062746546, "grad_norm": NaN, "learning_rate": 0.00030815987159289325, "loss": 0.0, "step": 70610 }, { "epoch": 0.9656049387780217, "grad_norm": NaN, "learning_rate": 0.00030810184564231786, "loss": 0.0, "step": 70620 }, { "epoch": 0.9657416712813887, "grad_norm": NaN, "learning_rate": 0.00030804381638276253, "loss": 0.0, "step": 70630 }, { "epoch": 0.9658784037847556, "grad_norm": NaN, "learning_rate": 0.00030798578381753213, "loss": 0.0, "step": 70640 }, { "epoch": 0.9660151362881227, "grad_norm": NaN, "learning_rate": 0.00030792774794993175, "loss": 0.0, "step": 70650 }, { "epoch": 0.9661518687914897, "grad_norm": NaN, "learning_rate": 0.00030786970878326646, "loss": 0.0, "step": 70660 }, { "epoch": 0.9662886012948568, "grad_norm": NaN, "learning_rate": 0.0003078116663208418, "loss": 0.0, "step": 70670 }, { "epoch": 0.9664253337982238, "grad_norm": NaN, "learning_rate": 0.0003077536205659634, "loss": 0.0, "step": 70680 }, { "epoch": 0.9665620663015909, "grad_norm": NaN, "learning_rate": 0.00030769557152193695, "loss": 0.0, "step": 70690 }, { "epoch": 0.9666987988049579, "grad_norm": NaN, "learning_rate": 0.0003076375191920685, "loss": 0.0, "step": 70700 }, { "epoch": 0.966835531308325, "grad_norm": NaN, "learning_rate": 0.0003075794635796641, "loss": 0.0, "step": 70710 }, { "epoch": 0.966972263811692, "grad_norm": NaN, "learning_rate": 0.00030752140468803026, "loss": 0.0, "step": 70720 }, { "epoch": 0.9671089963150591, "grad_norm": NaN, "learning_rate": 0.0003074633425204734, "loss": 0.0, "step": 70730 }, { "epoch": 0.9672457288184261, "grad_norm": NaN, "learning_rate": 0.00030740527708030023, "loss": 0.0, "step": 70740 }, { "epoch": 0.9673824613217931, "grad_norm": NaN, "learning_rate": 0.00030734720837081775, "loss": 0.0, "step": 70750 }, { "epoch": 0.9675191938251602, "grad_norm": NaN, "learning_rate": 0.0003072891363953329, "loss": 0.0, "step": 70760 }, { "epoch": 0.9676559263285271, "grad_norm": NaN, "learning_rate": 0.00030723106115715317, "loss": 0.0, "step": 70770 }, { "epoch": 0.9677926588318942, "grad_norm": NaN, "learning_rate": 0.00030717298265958584, "loss": 0.0, "step": 70780 }, { "epoch": 0.9679293913352612, "grad_norm": NaN, "learning_rate": 0.0003071149009059386, "loss": 0.0, "step": 70790 }, { "epoch": 0.9680661238386283, "grad_norm": NaN, "learning_rate": 0.00030705681589951933, "loss": 0.0, "step": 70800 }, { "epoch": 0.9682028563419953, "grad_norm": NaN, "learning_rate": 0.000306998727643636, "loss": 0.0, "step": 70810 }, { "epoch": 0.9683395888453624, "grad_norm": NaN, "learning_rate": 0.0003069406361415969, "loss": 0.0, "step": 70820 }, { "epoch": 0.9684763213487294, "grad_norm": NaN, "learning_rate": 0.0003068825413967104, "loss": 0.0, "step": 70830 }, { "epoch": 0.9686130538520965, "grad_norm": NaN, "learning_rate": 0.000306824443412285, "loss": 0.0, "step": 70840 }, { "epoch": 0.9687497863554635, "grad_norm": NaN, "learning_rate": 0.00030676634219162945, "loss": 0.0, "step": 70850 }, { "epoch": 0.9688865188588305, "grad_norm": NaN, "learning_rate": 0.0003067082377380529, "loss": 0.0, "step": 70860 }, { "epoch": 0.9690232513621976, "grad_norm": NaN, "learning_rate": 0.0003066501300548642, "loss": 0.0, "step": 70870 }, { "epoch": 0.9691599838655646, "grad_norm": NaN, "learning_rate": 0.00030659201914537285, "loss": 0.0, "step": 70880 }, { "epoch": 0.9692967163689317, "grad_norm": NaN, "learning_rate": 0.00030653390501288825, "loss": 0.0, "step": 70890 }, { "epoch": 0.9694334488722987, "grad_norm": NaN, "learning_rate": 0.00030647578766072013, "loss": 0.0, "step": 70900 }, { "epoch": 0.9695701813756658, "grad_norm": NaN, "learning_rate": 0.00030641766709217834, "loss": 0.0, "step": 70910 }, { "epoch": 0.9697069138790327, "grad_norm": NaN, "learning_rate": 0.00030635954331057294, "loss": 0.0, "step": 70920 }, { "epoch": 0.9698436463823998, "grad_norm": NaN, "learning_rate": 0.0003063014163192141, "loss": 0.0, "step": 70930 }, { "epoch": 0.9699803788857668, "grad_norm": NaN, "learning_rate": 0.00030624328612141236, "loss": 0.0, "step": 70940 }, { "epoch": 0.9701171113891338, "grad_norm": NaN, "learning_rate": 0.0003061851527204781, "loss": 0.0, "step": 70950 }, { "epoch": 0.9702538438925009, "grad_norm": NaN, "learning_rate": 0.0003061270161197223, "loss": 0.0, "step": 70960 }, { "epoch": 0.9703905763958679, "grad_norm": NaN, "learning_rate": 0.0003060688763224559, "loss": 0.0, "step": 70970 }, { "epoch": 0.970527308899235, "grad_norm": NaN, "learning_rate": 0.0003060107333319899, "loss": 0.0, "step": 70980 }, { "epoch": 0.970664041402602, "grad_norm": NaN, "learning_rate": 0.00030595258715163576, "loss": 0.0, "step": 70990 }, { "epoch": 0.9708007739059691, "grad_norm": NaN, "learning_rate": 0.00030589443778470494, "loss": 0.0, "step": 71000 }, { "epoch": 0.9709375064093361, "grad_norm": NaN, "learning_rate": 0.0003058362852345091, "loss": 0.0, "step": 71010 }, { "epoch": 0.9710742389127032, "grad_norm": NaN, "learning_rate": 0.0003057781295043602, "loss": 0.0, "step": 71020 }, { "epoch": 0.9712109714160702, "grad_norm": NaN, "learning_rate": 0.0003057199705975701, "loss": 0.0, "step": 71030 }, { "epoch": 0.9713477039194373, "grad_norm": NaN, "learning_rate": 0.0003056618085174512, "loss": 0.0, "step": 71040 }, { "epoch": 0.9714844364228042, "grad_norm": NaN, "learning_rate": 0.0003056036432673159, "loss": 0.0, "step": 71050 }, { "epoch": 0.9716211689261712, "grad_norm": NaN, "learning_rate": 0.00030554547485047676, "loss": 0.0, "step": 71060 }, { "epoch": 0.9717579014295383, "grad_norm": NaN, "learning_rate": 0.0003054873032702465, "loss": 0.0, "step": 71070 }, { "epoch": 0.9718946339329053, "grad_norm": NaN, "learning_rate": 0.000305429128529938, "loss": 0.0, "step": 71080 }, { "epoch": 0.9720313664362724, "grad_norm": NaN, "learning_rate": 0.0003053709506328647, "loss": 0.0, "step": 71090 }, { "epoch": 0.9721680989396394, "grad_norm": NaN, "learning_rate": 0.0003053127695823396, "loss": 0.0, "step": 71100 }, { "epoch": 0.9723048314430065, "grad_norm": NaN, "learning_rate": 0.0003052545853816763, "loss": 0.0, "step": 71110 }, { "epoch": 0.9724415639463735, "grad_norm": NaN, "learning_rate": 0.0003051963980341886, "loss": 0.0, "step": 71120 }, { "epoch": 0.9725782964497406, "grad_norm": NaN, "learning_rate": 0.0003051382075431901, "loss": 0.0, "step": 71130 }, { "epoch": 0.9727150289531076, "grad_norm": NaN, "learning_rate": 0.000305080013911995, "loss": 0.0, "step": 71140 }, { "epoch": 0.9728517614564747, "grad_norm": NaN, "learning_rate": 0.00030502181714391746, "loss": 0.0, "step": 71150 }, { "epoch": 0.9729884939598417, "grad_norm": NaN, "learning_rate": 0.00030496361724227183, "loss": 0.0, "step": 71160 }, { "epoch": 0.9731252264632086, "grad_norm": NaN, "learning_rate": 0.00030490541421037277, "loss": 0.0, "step": 71170 }, { "epoch": 0.9732619589665757, "grad_norm": NaN, "learning_rate": 0.000304847208051535, "loss": 0.0, "step": 71180 }, { "epoch": 0.9733986914699427, "grad_norm": NaN, "learning_rate": 0.0003047889987690733, "loss": 0.0, "step": 71190 }, { "epoch": 0.9735354239733098, "grad_norm": NaN, "learning_rate": 0.00030473078636630293, "loss": 0.0, "step": 71200 }, { "epoch": 0.9736721564766768, "grad_norm": NaN, "learning_rate": 0.00030467257084653906, "loss": 0.0, "step": 71210 }, { "epoch": 0.9738088889800439, "grad_norm": NaN, "learning_rate": 0.0003046143522130972, "loss": 0.0, "step": 71220 }, { "epoch": 0.9739456214834109, "grad_norm": NaN, "learning_rate": 0.000304556130469293, "loss": 0.0, "step": 71230 }, { "epoch": 0.974082353986778, "grad_norm": NaN, "learning_rate": 0.0003044979056184422, "loss": 0.0, "step": 71240 }, { "epoch": 0.974219086490145, "grad_norm": NaN, "learning_rate": 0.00030443967766386094, "loss": 0.0, "step": 71250 }, { "epoch": 0.9743558189935121, "grad_norm": NaN, "learning_rate": 0.0003043814466088652, "loss": 0.0, "step": 71260 }, { "epoch": 0.9744925514968791, "grad_norm": NaN, "learning_rate": 0.00030432321245677137, "loss": 0.0, "step": 71270 }, { "epoch": 0.9746292840002461, "grad_norm": NaN, "learning_rate": 0.00030426497521089603, "loss": 0.0, "step": 71280 }, { "epoch": 0.9747660165036132, "grad_norm": NaN, "learning_rate": 0.00030420673487455575, "loss": 0.0, "step": 71290 }, { "epoch": 0.9749027490069802, "grad_norm": NaN, "learning_rate": 0.0003041484914510675, "loss": 0.0, "step": 71300 }, { "epoch": 0.9750394815103473, "grad_norm": NaN, "learning_rate": 0.00030409024494374836, "loss": 0.0, "step": 71310 }, { "epoch": 0.9751762140137142, "grad_norm": NaN, "learning_rate": 0.00030403199535591544, "loss": 0.0, "step": 71320 }, { "epoch": 0.9753129465170813, "grad_norm": NaN, "learning_rate": 0.0003039737426908862, "loss": 0.0, "step": 71330 }, { "epoch": 0.9754496790204483, "grad_norm": NaN, "learning_rate": 0.0003039154869519781, "loss": 0.0, "step": 71340 }, { "epoch": 0.9755864115238154, "grad_norm": NaN, "learning_rate": 0.0003038572281425091, "loss": 0.0, "step": 71350 }, { "epoch": 0.9757231440271824, "grad_norm": NaN, "learning_rate": 0.00030379896626579685, "loss": 0.0, "step": 71360 }, { "epoch": 0.9758598765305495, "grad_norm": NaN, "learning_rate": 0.0003037407013251596, "loss": 0.0, "step": 71370 }, { "epoch": 0.9759966090339165, "grad_norm": NaN, "learning_rate": 0.0003036824333239157, "loss": 0.0, "step": 71380 }, { "epoch": 0.9761333415372835, "grad_norm": NaN, "learning_rate": 0.0003036241622653835, "loss": 0.0, "step": 71390 }, { "epoch": 0.9762700740406506, "grad_norm": NaN, "learning_rate": 0.00030356588815288153, "loss": 0.0, "step": 71400 }, { "epoch": 0.9764068065440176, "grad_norm": NaN, "learning_rate": 0.0003035076109897287, "loss": 0.0, "step": 71410 }, { "epoch": 0.9765435390473847, "grad_norm": NaN, "learning_rate": 0.0003034493307792439, "loss": 0.0, "step": 71420 }, { "epoch": 0.9766802715507517, "grad_norm": NaN, "learning_rate": 0.00030339104752474637, "loss": 0.0, "step": 71430 }, { "epoch": 0.9768170040541188, "grad_norm": NaN, "learning_rate": 0.0003033327612295553, "loss": 0.0, "step": 71440 }, { "epoch": 0.9769537365574857, "grad_norm": NaN, "learning_rate": 0.0003032744718969903, "loss": 0.0, "step": 71450 }, { "epoch": 0.9770904690608528, "grad_norm": NaN, "learning_rate": 0.00030321617953037096, "loss": 0.0, "step": 71460 }, { "epoch": 0.9772272015642198, "grad_norm": NaN, "learning_rate": 0.000303157884133017, "loss": 0.0, "step": 71470 }, { "epoch": 0.9773639340675869, "grad_norm": NaN, "learning_rate": 0.0003030995857082486, "loss": 0.0, "step": 71480 }, { "epoch": 0.9775006665709539, "grad_norm": NaN, "learning_rate": 0.0003030412842593859, "loss": 0.0, "step": 71490 }, { "epoch": 0.9776373990743209, "grad_norm": NaN, "learning_rate": 0.00030298297978974915, "loss": 0.0, "step": 71500 }, { "epoch": 0.977774131577688, "grad_norm": NaN, "learning_rate": 0.0003029246723026589, "loss": 0.0, "step": 71510 }, { "epoch": 0.977910864081055, "grad_norm": NaN, "learning_rate": 0.00030286636180143603, "loss": 0.0, "step": 71520 }, { "epoch": 0.9780475965844221, "grad_norm": NaN, "learning_rate": 0.0003028080482894011, "loss": 0.0, "step": 71530 }, { "epoch": 0.9781843290877891, "grad_norm": NaN, "learning_rate": 0.0003027497317698754, "loss": 0.0, "step": 71540 }, { "epoch": 0.9783210615911562, "grad_norm": NaN, "learning_rate": 0.00030269141224617996, "loss": 0.0, "step": 71550 }, { "epoch": 0.9784577940945232, "grad_norm": NaN, "learning_rate": 0.00030263308972163617, "loss": 0.0, "step": 71560 }, { "epoch": 0.9785945265978903, "grad_norm": NaN, "learning_rate": 0.0003025747641995658, "loss": 0.0, "step": 71570 }, { "epoch": 0.9787312591012572, "grad_norm": NaN, "learning_rate": 0.00030251643568329024, "loss": 0.0, "step": 71580 }, { "epoch": 0.9788679916046243, "grad_norm": NaN, "learning_rate": 0.0003024581041761316, "loss": 0.0, "step": 71590 }, { "epoch": 0.9790047241079913, "grad_norm": NaN, "learning_rate": 0.000302399769681412, "loss": 0.0, "step": 71600 }, { "epoch": 0.9791414566113583, "grad_norm": NaN, "learning_rate": 0.0003023414322024534, "loss": 0.0, "step": 71610 }, { "epoch": 0.9792781891147254, "grad_norm": NaN, "learning_rate": 0.0003022830917425784, "loss": 0.0, "step": 71620 }, { "epoch": 0.9794149216180924, "grad_norm": NaN, "learning_rate": 0.00030222474830510947, "loss": 0.0, "step": 71630 }, { "epoch": 0.9795516541214595, "grad_norm": NaN, "learning_rate": 0.0003021664018933694, "loss": 0.0, "step": 71640 }, { "epoch": 0.9796883866248265, "grad_norm": NaN, "learning_rate": 0.0003021080525106812, "loss": 0.0, "step": 71650 }, { "epoch": 0.9798251191281936, "grad_norm": NaN, "learning_rate": 0.00030204970016036776, "loss": 0.0, "step": 71660 }, { "epoch": 0.9799618516315606, "grad_norm": NaN, "learning_rate": 0.0003019913448457524, "loss": 0.0, "step": 71670 }, { "epoch": 0.9800985841349277, "grad_norm": NaN, "learning_rate": 0.00030193298657015867, "loss": 0.0, "step": 71680 }, { "epoch": 0.9802353166382947, "grad_norm": NaN, "learning_rate": 0.0003018746253369099, "loss": 0.0, "step": 71690 }, { "epoch": 0.9803720491416618, "grad_norm": NaN, "learning_rate": 0.00030181626114933, "loss": 0.0, "step": 71700 }, { "epoch": 0.9805087816450287, "grad_norm": NaN, "learning_rate": 0.00030175789401074287, "loss": 0.0, "step": 71710 }, { "epoch": 0.9806455141483957, "grad_norm": NaN, "learning_rate": 0.00030169952392447265, "loss": 0.0, "step": 71720 }, { "epoch": 0.9807822466517628, "grad_norm": NaN, "learning_rate": 0.0003016411508938435, "loss": 0.0, "step": 71730 }, { "epoch": 0.9809189791551298, "grad_norm": NaN, "learning_rate": 0.0003015827749221798, "loss": 0.0, "step": 71740 }, { "epoch": 0.9810557116584969, "grad_norm": NaN, "learning_rate": 0.0003015243960128063, "loss": 0.0, "step": 71750 }, { "epoch": 0.9811924441618639, "grad_norm": NaN, "learning_rate": 0.00030146601416904773, "loss": 0.0, "step": 71760 }, { "epoch": 0.981329176665231, "grad_norm": NaN, "learning_rate": 0.0003014076293942289, "loss": 0.0, "step": 71770 }, { "epoch": 0.981465909168598, "grad_norm": NaN, "learning_rate": 0.000301349241691675, "loss": 0.0, "step": 71780 }, { "epoch": 0.9816026416719651, "grad_norm": NaN, "learning_rate": 0.00030129085106471123, "loss": 0.0, "step": 71790 }, { "epoch": 0.9817393741753321, "grad_norm": NaN, "learning_rate": 0.0003012324575166631, "loss": 0.0, "step": 71800 }, { "epoch": 0.9818761066786992, "grad_norm": NaN, "learning_rate": 0.0003011740610508561, "loss": 0.0, "step": 71810 }, { "epoch": 0.9820128391820662, "grad_norm": NaN, "learning_rate": 0.00030111566167061605, "loss": 0.0, "step": 71820 }, { "epoch": 0.9821495716854332, "grad_norm": NaN, "learning_rate": 0.0003010572593792689, "loss": 0.0, "step": 71830 }, { "epoch": 0.9822863041888003, "grad_norm": NaN, "learning_rate": 0.0003009988541801407, "loss": 0.0, "step": 71840 }, { "epoch": 0.9824230366921672, "grad_norm": NaN, "learning_rate": 0.00030094044607655767, "loss": 0.0, "step": 71850 }, { "epoch": 0.9825597691955343, "grad_norm": NaN, "learning_rate": 0.00030088203507184634, "loss": 0.0, "step": 71860 }, { "epoch": 0.9826965016989013, "grad_norm": NaN, "learning_rate": 0.00030082362116933323, "loss": 0.0, "step": 71870 }, { "epoch": 0.9828332342022684, "grad_norm": NaN, "learning_rate": 0.00030076520437234504, "loss": 0.0, "step": 71880 }, { "epoch": 0.9829699667056354, "grad_norm": NaN, "learning_rate": 0.0003007067846842088, "loss": 0.0, "step": 71890 }, { "epoch": 0.9831066992090025, "grad_norm": NaN, "learning_rate": 0.00030064836210825146, "loss": 0.0, "step": 71900 }, { "epoch": 0.9832434317123695, "grad_norm": NaN, "learning_rate": 0.0003005899366478003, "loss": 0.0, "step": 71910 }, { "epoch": 0.9833801642157366, "grad_norm": NaN, "learning_rate": 0.00030053150830618294, "loss": 0.0, "step": 71920 }, { "epoch": 0.9835168967191036, "grad_norm": NaN, "learning_rate": 0.0003004730770867266, "loss": 0.0, "step": 71930 }, { "epoch": 0.9836536292224706, "grad_norm": NaN, "learning_rate": 0.00030041464299275934, "loss": 0.0, "step": 71940 }, { "epoch": 0.9837903617258377, "grad_norm": NaN, "learning_rate": 0.0003003562060276088, "loss": 0.0, "step": 71950 }, { "epoch": 0.9839270942292047, "grad_norm": NaN, "learning_rate": 0.00030029776619460325, "loss": 0.0, "step": 71960 }, { "epoch": 0.9840638267325718, "grad_norm": NaN, "learning_rate": 0.0003002393234970708, "loss": 0.0, "step": 71970 }, { "epoch": 0.9842005592359387, "grad_norm": NaN, "learning_rate": 0.0003001808779383399, "loss": 0.0, "step": 71980 }, { "epoch": 0.9843372917393058, "grad_norm": NaN, "learning_rate": 0.00030012242952173914, "loss": 0.0, "step": 71990 }, { "epoch": 0.9844740242426728, "grad_norm": NaN, "learning_rate": 0.00030006397825059705, "loss": 0.0, "step": 72000 }, { "epoch": 0.9846107567460399, "grad_norm": NaN, "learning_rate": 0.00030000552412824276, "loss": 0.0, "step": 72010 }, { "epoch": 0.9847474892494069, "grad_norm": NaN, "learning_rate": 0.0002999470671580052, "loss": 0.0, "step": 72020 }, { "epoch": 0.984884221752774, "grad_norm": NaN, "learning_rate": 0.0002998886073432134, "loss": 0.0, "step": 72030 }, { "epoch": 0.985020954256141, "grad_norm": NaN, "learning_rate": 0.0002998301446871971, "loss": 0.0, "step": 72040 }, { "epoch": 0.985157686759508, "grad_norm": NaN, "learning_rate": 0.00029977167919328554, "loss": 0.0, "step": 72050 }, { "epoch": 0.9852944192628751, "grad_norm": NaN, "learning_rate": 0.0002997132108648084, "loss": 0.0, "step": 72060 }, { "epoch": 0.9854311517662421, "grad_norm": NaN, "learning_rate": 0.00029965473970509573, "loss": 0.0, "step": 72070 }, { "epoch": 0.9855678842696092, "grad_norm": NaN, "learning_rate": 0.0002995962657174774, "loss": 0.0, "step": 72080 }, { "epoch": 0.9857046167729762, "grad_norm": NaN, "learning_rate": 0.00029953778890528366, "loss": 0.0, "step": 72090 }, { "epoch": 0.9858413492763433, "grad_norm": NaN, "learning_rate": 0.0002994793092718448, "loss": 0.0, "step": 72100 }, { "epoch": 0.9859780817797102, "grad_norm": NaN, "learning_rate": 0.0002994208268204913, "loss": 0.0, "step": 72110 }, { "epoch": 0.9861148142830773, "grad_norm": NaN, "learning_rate": 0.0002993623415545539, "loss": 0.0, "step": 72120 }, { "epoch": 0.9862515467864443, "grad_norm": NaN, "learning_rate": 0.0002993038534773632, "loss": 0.0, "step": 72130 }, { "epoch": 0.9863882792898114, "grad_norm": NaN, "learning_rate": 0.0002992453625922505, "loss": 0.0, "step": 72140 }, { "epoch": 0.9865250117931784, "grad_norm": NaN, "learning_rate": 0.0002991868689025467, "loss": 0.0, "step": 72150 }, { "epoch": 0.9866617442965454, "grad_norm": NaN, "learning_rate": 0.00029912837241158314, "loss": 0.0, "step": 72160 }, { "epoch": 0.9867984767999125, "grad_norm": NaN, "learning_rate": 0.00029906987312269134, "loss": 0.0, "step": 72170 }, { "epoch": 0.9869352093032795, "grad_norm": NaN, "learning_rate": 0.00029901137103920285, "loss": 0.0, "step": 72180 }, { "epoch": 0.9870719418066466, "grad_norm": NaN, "learning_rate": 0.00029895286616444947, "loss": 0.0, "step": 72190 }, { "epoch": 0.9872086743100136, "grad_norm": NaN, "learning_rate": 0.0002988943585017631, "loss": 0.0, "step": 72200 }, { "epoch": 0.9873454068133807, "grad_norm": NaN, "learning_rate": 0.00029883584805447594, "loss": 0.0, "step": 72210 }, { "epoch": 0.9874821393167477, "grad_norm": NaN, "learning_rate": 0.00029877733482592004, "loss": 0.0, "step": 72220 }, { "epoch": 0.9876188718201148, "grad_norm": NaN, "learning_rate": 0.00029871881881942807, "loss": 0.0, "step": 72230 }, { "epoch": 0.9877556043234818, "grad_norm": NaN, "learning_rate": 0.0002986603000383324, "loss": 0.0, "step": 72240 }, { "epoch": 0.9878923368268488, "grad_norm": NaN, "learning_rate": 0.0002986017784859658, "loss": 0.0, "step": 72250 }, { "epoch": 0.9880290693302158, "grad_norm": NaN, "learning_rate": 0.0002985432541656612, "loss": 0.0, "step": 72260 }, { "epoch": 0.9881658018335828, "grad_norm": NaN, "learning_rate": 0.0002984847270807515, "loss": 0.0, "step": 72270 }, { "epoch": 0.9883025343369499, "grad_norm": NaN, "learning_rate": 0.00029842619723457017, "loss": 0.0, "step": 72280 }, { "epoch": 0.9884392668403169, "grad_norm": NaN, "learning_rate": 0.0002983676646304503, "loss": 0.0, "step": 72290 }, { "epoch": 0.988575999343684, "grad_norm": NaN, "learning_rate": 0.0002983091292717255, "loss": 0.0, "step": 72300 }, { "epoch": 0.988712731847051, "grad_norm": NaN, "learning_rate": 0.0002982505911617295, "loss": 0.0, "step": 72310 }, { "epoch": 0.9888494643504181, "grad_norm": NaN, "learning_rate": 0.0002981920503037959, "loss": 0.0, "step": 72320 }, { "epoch": 0.9889861968537851, "grad_norm": NaN, "learning_rate": 0.000298133506701259, "loss": 0.0, "step": 72330 }, { "epoch": 0.9891229293571522, "grad_norm": NaN, "learning_rate": 0.0002980749603574528, "loss": 0.0, "step": 72340 }, { "epoch": 0.9892596618605192, "grad_norm": NaN, "learning_rate": 0.00029801641127571147, "loss": 0.0, "step": 72350 }, { "epoch": 0.9893963943638862, "grad_norm": NaN, "learning_rate": 0.00029795785945936973, "loss": 0.0, "step": 72360 }, { "epoch": 0.9895331268672533, "grad_norm": NaN, "learning_rate": 0.00029789930491176187, "loss": 0.0, "step": 72370 }, { "epoch": 0.9896698593706202, "grad_norm": NaN, "learning_rate": 0.00029784074763622285, "loss": 0.0, "step": 72380 }, { "epoch": 0.9898065918739873, "grad_norm": NaN, "learning_rate": 0.0002977821876360877, "loss": 0.0, "step": 72390 }, { "epoch": 0.9899433243773543, "grad_norm": NaN, "learning_rate": 0.00029772362491469113, "loss": 0.0, "step": 72400 }, { "epoch": 0.9900800568807214, "grad_norm": NaN, "learning_rate": 0.0002976650594753687, "loss": 0.0, "step": 72410 }, { "epoch": 0.9902167893840884, "grad_norm": NaN, "learning_rate": 0.00029760649132145564, "loss": 0.0, "step": 72420 }, { "epoch": 0.9903535218874555, "grad_norm": NaN, "learning_rate": 0.00029754792045628747, "loss": 0.0, "step": 72430 }, { "epoch": 0.9904902543908225, "grad_norm": NaN, "learning_rate": 0.0002974893468832, "loss": 0.0, "step": 72440 }, { "epoch": 0.9906269868941896, "grad_norm": NaN, "learning_rate": 0.000297430770605529, "loss": 0.0, "step": 72450 }, { "epoch": 0.9907637193975566, "grad_norm": NaN, "learning_rate": 0.00029737219162661037, "loss": 0.0, "step": 72460 }, { "epoch": 0.9909004519009236, "grad_norm": NaN, "learning_rate": 0.00029731360994978047, "loss": 0.0, "step": 72470 }, { "epoch": 0.9910371844042907, "grad_norm": NaN, "learning_rate": 0.0002972550255783754, "loss": 0.0, "step": 72480 }, { "epoch": 0.9911739169076577, "grad_norm": NaN, "learning_rate": 0.00029719643851573187, "loss": 0.0, "step": 72490 }, { "epoch": 0.9913106494110248, "grad_norm": NaN, "learning_rate": 0.0002971378487651862, "loss": 0.0, "step": 72500 }, { "epoch": 0.9914473819143917, "grad_norm": NaN, "learning_rate": 0.00029707925633007544, "loss": 0.0, "step": 72510 }, { "epoch": 0.9915841144177588, "grad_norm": NaN, "learning_rate": 0.0002970206612137363, "loss": 0.0, "step": 72520 }, { "epoch": 0.9917208469211258, "grad_norm": NaN, "learning_rate": 0.00029696206341950593, "loss": 0.0, "step": 72530 }, { "epoch": 0.9918575794244929, "grad_norm": NaN, "learning_rate": 0.0002969034629507216, "loss": 0.0, "step": 72540 }, { "epoch": 0.9919943119278599, "grad_norm": NaN, "learning_rate": 0.0002968448598107206, "loss": 0.0, "step": 72550 }, { "epoch": 0.992131044431227, "grad_norm": NaN, "learning_rate": 0.00029678625400284046, "loss": 0.0, "step": 72560 }, { "epoch": 0.992267776934594, "grad_norm": NaN, "learning_rate": 0.000296727645530419, "loss": 0.0, "step": 72570 }, { "epoch": 0.992404509437961, "grad_norm": NaN, "learning_rate": 0.00029666903439679385, "loss": 0.0, "step": 72580 }, { "epoch": 0.9925412419413281, "grad_norm": NaN, "learning_rate": 0.00029661042060530313, "loss": 0.0, "step": 72590 }, { "epoch": 0.9926779744446951, "grad_norm": NaN, "learning_rate": 0.0002965518041592849, "loss": 0.0, "step": 72600 }, { "epoch": 0.9928147069480622, "grad_norm": NaN, "learning_rate": 0.00029649318506207753, "loss": 0.0, "step": 72610 }, { "epoch": 0.9929514394514292, "grad_norm": NaN, "learning_rate": 0.0002964345633170194, "loss": 0.0, "step": 72620 }, { "epoch": 0.9930881719547963, "grad_norm": NaN, "learning_rate": 0.00029637593892744917, "loss": 0.0, "step": 72630 }, { "epoch": 0.9932249044581632, "grad_norm": NaN, "learning_rate": 0.00029631731189670545, "loss": 0.0, "step": 72640 }, { "epoch": 0.9933616369615303, "grad_norm": NaN, "learning_rate": 0.00029625868222812727, "loss": 0.0, "step": 72650 }, { "epoch": 0.9934983694648973, "grad_norm": NaN, "learning_rate": 0.00029620004992505356, "loss": 0.0, "step": 72660 }, { "epoch": 0.9936351019682644, "grad_norm": NaN, "learning_rate": 0.0002961414149908236, "loss": 0.0, "step": 72670 }, { "epoch": 0.9937718344716314, "grad_norm": NaN, "learning_rate": 0.00029608277742877666, "loss": 0.0, "step": 72680 }, { "epoch": 0.9939085669749984, "grad_norm": NaN, "learning_rate": 0.00029602413724225227, "loss": 0.0, "step": 72690 }, { "epoch": 0.9940452994783655, "grad_norm": NaN, "learning_rate": 0.00029596549443459003, "loss": 0.0, "step": 72700 }, { "epoch": 0.9941820319817325, "grad_norm": NaN, "learning_rate": 0.00029590684900912973, "loss": 0.0, "step": 72710 }, { "epoch": 0.9943187644850996, "grad_norm": NaN, "learning_rate": 0.0002958482009692114, "loss": 0.0, "step": 72720 }, { "epoch": 0.9944554969884666, "grad_norm": NaN, "learning_rate": 0.000295789550318175, "loss": 0.0, "step": 72730 }, { "epoch": 0.9945922294918337, "grad_norm": NaN, "learning_rate": 0.0002957308970593608, "loss": 0.0, "step": 72740 }, { "epoch": 0.9947289619952007, "grad_norm": NaN, "learning_rate": 0.0002956722411961093, "loss": 0.0, "step": 72750 }, { "epoch": 0.9948656944985678, "grad_norm": NaN, "learning_rate": 0.00029561358273176087, "loss": 0.0, "step": 72760 }, { "epoch": 0.9950024270019348, "grad_norm": NaN, "learning_rate": 0.00029555492166965624, "loss": 0.0, "step": 72770 }, { "epoch": 0.9951391595053019, "grad_norm": NaN, "learning_rate": 0.0002954962580131363, "loss": 0.0, "step": 72780 }, { "epoch": 0.9952758920086688, "grad_norm": NaN, "learning_rate": 0.00029543759176554196, "loss": 0.0, "step": 72790 }, { "epoch": 0.9954126245120358, "grad_norm": NaN, "learning_rate": 0.00029537892293021446, "loss": 0.0, "step": 72800 }, { "epoch": 0.9955493570154029, "grad_norm": NaN, "learning_rate": 0.0002953202515104949, "loss": 0.0, "step": 72810 }, { "epoch": 0.9956860895187699, "grad_norm": NaN, "learning_rate": 0.0002952615775097247, "loss": 0.0, "step": 72820 }, { "epoch": 0.995822822022137, "grad_norm": NaN, "learning_rate": 0.0002952029009312456, "loss": 0.0, "step": 72830 }, { "epoch": 0.995959554525504, "grad_norm": NaN, "learning_rate": 0.00029514422177839914, "loss": 0.0, "step": 72840 }, { "epoch": 0.9960962870288711, "grad_norm": NaN, "learning_rate": 0.0002950855400545273, "loss": 0.0, "step": 72850 }, { "epoch": 0.9962330195322381, "grad_norm": NaN, "learning_rate": 0.00029502685576297203, "loss": 0.0, "step": 72860 }, { "epoch": 0.9963697520356052, "grad_norm": NaN, "learning_rate": 0.0002949681689070755, "loss": 0.0, "step": 72870 }, { "epoch": 0.9965064845389722, "grad_norm": NaN, "learning_rate": 0.0002949094794901799, "loss": 0.0, "step": 72880 }, { "epoch": 0.9966432170423393, "grad_norm": NaN, "learning_rate": 0.0002948507875156279, "loss": 0.0, "step": 72890 }, { "epoch": 0.9967799495457063, "grad_norm": NaN, "learning_rate": 0.00029479209298676184, "loss": 0.0, "step": 72900 }, { "epoch": 0.9969166820490732, "grad_norm": NaN, "learning_rate": 0.0002947333959069246, "loss": 0.0, "step": 72910 }, { "epoch": 0.9970534145524403, "grad_norm": NaN, "learning_rate": 0.0002946746962794591, "loss": 0.0, "step": 72920 }, { "epoch": 0.9971901470558073, "grad_norm": NaN, "learning_rate": 0.0002946159941077082, "loss": 0.0, "step": 72930 }, { "epoch": 0.9973268795591744, "grad_norm": NaN, "learning_rate": 0.0002945572893950152, "loss": 0.0, "step": 72940 }, { "epoch": 0.9974636120625414, "grad_norm": NaN, "learning_rate": 0.0002944985821447233, "loss": 0.0, "step": 72950 }, { "epoch": 0.9976003445659085, "grad_norm": NaN, "learning_rate": 0.00029443987236017614, "loss": 0.0, "step": 72960 }, { "epoch": 0.9977370770692755, "grad_norm": NaN, "learning_rate": 0.0002943811600447172, "loss": 0.0, "step": 72970 }, { "epoch": 0.9978738095726426, "grad_norm": NaN, "learning_rate": 0.00029432244520169015, "loss": 0.0, "step": 72980 }, { "epoch": 0.9980105420760096, "grad_norm": NaN, "learning_rate": 0.0002942637278344391, "loss": 0.0, "step": 72990 }, { "epoch": 0.9981472745793767, "grad_norm": NaN, "learning_rate": 0.00029420500794630786, "loss": 0.0, "step": 73000 }, { "epoch": 0.9982840070827437, "grad_norm": NaN, "learning_rate": 0.00029414628554064073, "loss": 0.0, "step": 73010 }, { "epoch": 0.9984207395861107, "grad_norm": NaN, "learning_rate": 0.000294087560620782, "loss": 0.0, "step": 73020 }, { "epoch": 0.9985574720894778, "grad_norm": NaN, "learning_rate": 0.0002940288331900761, "loss": 0.0, "step": 73030 }, { "epoch": 0.9986942045928447, "grad_norm": NaN, "learning_rate": 0.0002939701032518678, "loss": 0.0, "step": 73040 }, { "epoch": 0.9988309370962118, "grad_norm": NaN, "learning_rate": 0.0002939113708095017, "loss": 0.0, "step": 73050 }, { "epoch": 0.9989676695995788, "grad_norm": NaN, "learning_rate": 0.00029385263586632264, "loss": 0.0, "step": 73060 }, { "epoch": 0.9991044021029459, "grad_norm": NaN, "learning_rate": 0.00029379389842567585, "loss": 0.0, "step": 73070 }, { "epoch": 0.9992411346063129, "grad_norm": NaN, "learning_rate": 0.0002937351584909063, "loss": 0.0, "step": 73080 }, { "epoch": 0.99937786710968, "grad_norm": NaN, "learning_rate": 0.0002936764160653595, "loss": 0.0, "step": 73090 }, { "epoch": 0.999514599613047, "grad_norm": NaN, "learning_rate": 0.00029361767115238083, "loss": 0.0, "step": 73100 }, { "epoch": 0.9996513321164141, "grad_norm": NaN, "learning_rate": 0.0002935589237553158, "loss": 0.0, "step": 73110 }, { "epoch": 0.9997880646197811, "grad_norm": NaN, "learning_rate": 0.00029350017387751024, "loss": 0.0, "step": 73120 }, { "epoch": 0.9999247971231481, "grad_norm": NaN, "learning_rate": 0.0002934414215223101, "loss": 0.0, "step": 73130 } ], "logging_steps": 10, "max_steps": 146270, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.34487557244348e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }