{ "best_metric": 0.6779661016949152, "best_model_checkpoint": "DF_Image_VIT_V1/checkpoint-13812", "epoch": 12.0, "eval_steps": 500, "global_step": 55248, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002172024326672459, "grad_norm": 0.530893087387085, "learning_rate": 4.9972849695916595e-05, "loss": 0.3852, "step": 10 }, { "epoch": 0.004344048653344918, "grad_norm": 0.3032895624637604, "learning_rate": 4.994569939183319e-05, "loss": 0.1074, "step": 20 }, { "epoch": 0.006516072980017376, "grad_norm": 0.9604411125183105, "learning_rate": 4.991854908774978e-05, "loss": 0.1413, "step": 30 }, { "epoch": 0.008688097306689836, "grad_norm": 0.1610596477985382, "learning_rate": 4.989139878366638e-05, "loss": 0.0231, "step": 40 }, { "epoch": 0.010860121633362294, "grad_norm": 0.4416898488998413, "learning_rate": 4.9864248479582974e-05, "loss": 0.068, "step": 50 }, { "epoch": 0.013032145960034752, "grad_norm": 0.19819538295269012, "learning_rate": 4.983709817549957e-05, "loss": 0.1456, "step": 60 }, { "epoch": 0.015204170286707211, "grad_norm": 0.18559947609901428, "learning_rate": 4.980994787141616e-05, "loss": 0.071, "step": 70 }, { "epoch": 0.01737619461337967, "grad_norm": 0.13225772976875305, "learning_rate": 4.978279756733275e-05, "loss": 0.0423, "step": 80 }, { "epoch": 0.01954821894005213, "grad_norm": 0.10720467567443848, "learning_rate": 4.975564726324935e-05, "loss": 0.014, "step": 90 }, { "epoch": 0.021720243266724587, "grad_norm": 0.09248513728380203, "learning_rate": 4.9728496959165946e-05, "loss": 0.0398, "step": 100 }, { "epoch": 0.023892267593397045, "grad_norm": 0.16907687485218048, "learning_rate": 4.970134665508254e-05, "loss": 0.1245, "step": 110 }, { "epoch": 0.026064291920069503, "grad_norm": 0.2754700183868408, "learning_rate": 4.967419635099914e-05, "loss": 0.1191, "step": 120 }, { "epoch": 0.028236316246741965, "grad_norm": 0.11275039613246918, "learning_rate": 4.964704604691573e-05, "loss": 0.0192, "step": 130 }, { "epoch": 0.030408340573414423, "grad_norm": 0.10382300615310669, "learning_rate": 4.9619895742832325e-05, "loss": 0.0674, "step": 140 }, { "epoch": 0.03258036490008688, "grad_norm": 0.10827434808015823, "learning_rate": 4.959274543874892e-05, "loss": 0.0674, "step": 150 }, { "epoch": 0.03475238922675934, "grad_norm": 0.09914221614599228, "learning_rate": 4.956559513466551e-05, "loss": 0.0125, "step": 160 }, { "epoch": 0.0369244135534318, "grad_norm": 0.08502601832151413, "learning_rate": 4.9538444830582104e-05, "loss": 0.039, "step": 170 }, { "epoch": 0.03909643788010426, "grad_norm": 0.0816200003027916, "learning_rate": 4.95112945264987e-05, "loss": 0.0388, "step": 180 }, { "epoch": 0.04126846220677671, "grad_norm": 0.09381763637065887, "learning_rate": 4.948414422241529e-05, "loss": 0.0663, "step": 190 }, { "epoch": 0.043440486533449174, "grad_norm": 0.08192573487758636, "learning_rate": 4.945699391833189e-05, "loss": 0.0679, "step": 200 }, { "epoch": 0.045612510860121636, "grad_norm": 0.10738043487071991, "learning_rate": 4.942984361424848e-05, "loss": 0.0959, "step": 210 }, { "epoch": 0.04778453518679409, "grad_norm": 0.10316260159015656, "learning_rate": 4.9402693310165076e-05, "loss": 0.04, "step": 220 }, { "epoch": 0.04995655951346655, "grad_norm": 0.08651253581047058, "learning_rate": 4.937554300608167e-05, "loss": 0.0113, "step": 230 }, { "epoch": 0.052128583840139006, "grad_norm": 0.46742257475852966, "learning_rate": 4.934839270199826e-05, "loss": 0.097, "step": 240 }, { "epoch": 0.05430060816681147, "grad_norm": 0.10922794789075851, "learning_rate": 4.9321242397914855e-05, "loss": 0.0958, "step": 250 }, { "epoch": 0.05647263249348393, "grad_norm": 0.09597119688987732, "learning_rate": 4.929409209383145e-05, "loss": 0.0126, "step": 260 }, { "epoch": 0.058644656820156384, "grad_norm": 0.08410635590553284, "learning_rate": 4.926694178974805e-05, "loss": 0.01, "step": 270 }, { "epoch": 0.060816681146828845, "grad_norm": 0.07707684487104416, "learning_rate": 4.923979148566464e-05, "loss": 0.0678, "step": 280 }, { "epoch": 0.0629887054735013, "grad_norm": 0.06932105869054794, "learning_rate": 4.9212641181581234e-05, "loss": 0.0085, "step": 290 }, { "epoch": 0.06516072980017376, "grad_norm": 0.07078186422586441, "learning_rate": 4.9185490877497834e-05, "loss": 0.0983, "step": 300 }, { "epoch": 0.06733275412684622, "grad_norm": 0.08902157843112946, "learning_rate": 4.9158340573414427e-05, "loss": 0.0957, "step": 310 }, { "epoch": 0.06950477845351868, "grad_norm": 0.09481088072061539, "learning_rate": 4.913119026933102e-05, "loss": 0.0396, "step": 320 }, { "epoch": 0.07167680278019113, "grad_norm": 1.0230822563171387, "learning_rate": 4.910403996524761e-05, "loss": 0.0947, "step": 330 }, { "epoch": 0.0738488271068636, "grad_norm": 0.19595967233181, "learning_rate": 4.9076889661164206e-05, "loss": 0.0646, "step": 340 }, { "epoch": 0.07602085143353605, "grad_norm": 0.09209536015987396, "learning_rate": 4.9049739357080805e-05, "loss": 0.0148, "step": 350 }, { "epoch": 0.07819287576020852, "grad_norm": 0.0737241730093956, "learning_rate": 4.90225890529974e-05, "loss": 0.0098, "step": 360 }, { "epoch": 0.08036490008688098, "grad_norm": 0.0739888846874237, "learning_rate": 4.899543874891399e-05, "loss": 0.0683, "step": 370 }, { "epoch": 0.08253692441355343, "grad_norm": 0.07659421861171722, "learning_rate": 4.8968288444830584e-05, "loss": 0.0686, "step": 380 }, { "epoch": 0.08470894874022589, "grad_norm": 1.0171195268630981, "learning_rate": 4.894113814074718e-05, "loss": 0.0956, "step": 390 }, { "epoch": 0.08688097306689835, "grad_norm": 0.12161863595247269, "learning_rate": 4.891398783666377e-05, "loss": 0.0384, "step": 400 }, { "epoch": 0.08905299739357081, "grad_norm": 0.1082439124584198, "learning_rate": 4.8886837532580364e-05, "loss": 0.0682, "step": 410 }, { "epoch": 0.09122502172024327, "grad_norm": 0.10778769105672836, "learning_rate": 4.885968722849696e-05, "loss": 0.0662, "step": 420 }, { "epoch": 0.09339704604691572, "grad_norm": 0.12607441842556, "learning_rate": 4.8832536924413556e-05, "loss": 0.0369, "step": 430 }, { "epoch": 0.09556907037358818, "grad_norm": 0.08641614764928818, "learning_rate": 4.880538662033015e-05, "loss": 0.0969, "step": 440 }, { "epoch": 0.09774109470026064, "grad_norm": 0.12377568334341049, "learning_rate": 4.877823631624674e-05, "loss": 0.0665, "step": 450 }, { "epoch": 0.0999131190269331, "grad_norm": 0.09685255587100983, "learning_rate": 4.8751086012163335e-05, "loss": 0.0413, "step": 460 }, { "epoch": 0.10208514335360556, "grad_norm": 0.13495175540447235, "learning_rate": 4.8723935708079935e-05, "loss": 0.0958, "step": 470 }, { "epoch": 0.10425716768027801, "grad_norm": 0.09797213226556778, "learning_rate": 4.869678540399653e-05, "loss": 0.04, "step": 480 }, { "epoch": 0.10642919200695047, "grad_norm": 0.11449749022722244, "learning_rate": 4.866963509991312e-05, "loss": 0.0941, "step": 490 }, { "epoch": 0.10860121633362294, "grad_norm": 0.11653515696525574, "learning_rate": 4.864248479582972e-05, "loss": 0.0394, "step": 500 }, { "epoch": 0.1107732406602954, "grad_norm": 0.4893771708011627, "learning_rate": 4.8615334491746314e-05, "loss": 0.0394, "step": 510 }, { "epoch": 0.11294526498696786, "grad_norm": 0.07817152887582779, "learning_rate": 4.858818418766291e-05, "loss": 0.0377, "step": 520 }, { "epoch": 0.11511728931364032, "grad_norm": 0.07076311856508255, "learning_rate": 4.85610338835795e-05, "loss": 0.0358, "step": 530 }, { "epoch": 0.11728931364031277, "grad_norm": 0.4873422086238861, "learning_rate": 4.853388357949609e-05, "loss": 0.0392, "step": 540 }, { "epoch": 0.11946133796698523, "grad_norm": 0.06412825733423233, "learning_rate": 4.8506733275412686e-05, "loss": 0.0079, "step": 550 }, { "epoch": 0.12163336229365769, "grad_norm": 0.06440827250480652, "learning_rate": 4.847958297132928e-05, "loss": 0.0378, "step": 560 }, { "epoch": 0.12380538662033015, "grad_norm": 0.0619625598192215, "learning_rate": 4.845243266724587e-05, "loss": 0.0376, "step": 570 }, { "epoch": 0.1259774109470026, "grad_norm": 0.060499969869852066, "learning_rate": 4.842528236316247e-05, "loss": 0.0073, "step": 580 }, { "epoch": 0.12814943527367506, "grad_norm": 0.08101051300764084, "learning_rate": 4.8398132059079065e-05, "loss": 0.067, "step": 590 }, { "epoch": 0.13032145960034752, "grad_norm": 0.6294172406196594, "learning_rate": 4.837098175499566e-05, "loss": 0.0336, "step": 600 }, { "epoch": 0.13249348392701998, "grad_norm": 0.44123587012290955, "learning_rate": 4.834383145091225e-05, "loss": 0.1469, "step": 610 }, { "epoch": 0.13466550825369245, "grad_norm": 0.06616009771823883, "learning_rate": 4.8316681146828844e-05, "loss": 0.0364, "step": 620 }, { "epoch": 0.1368375325803649, "grad_norm": 0.06032127887010574, "learning_rate": 4.828953084274544e-05, "loss": 0.0079, "step": 630 }, { "epoch": 0.13900955690703737, "grad_norm": 0.055732373148202896, "learning_rate": 4.826238053866203e-05, "loss": 0.0379, "step": 640 }, { "epoch": 0.14118158123370983, "grad_norm": 0.09018450975418091, "learning_rate": 4.823523023457863e-05, "loss": 0.0682, "step": 650 }, { "epoch": 0.14335360556038226, "grad_norm": 0.07363690435886383, "learning_rate": 4.820807993049522e-05, "loss": 0.0366, "step": 660 }, { "epoch": 0.14552562988705472, "grad_norm": 0.15481476485729218, "learning_rate": 4.818092962641182e-05, "loss": 0.0702, "step": 670 }, { "epoch": 0.1476976542137272, "grad_norm": 0.15850311517715454, "learning_rate": 4.8153779322328416e-05, "loss": 0.1018, "step": 680 }, { "epoch": 0.14986967854039965, "grad_norm": 0.10273230075836182, "learning_rate": 4.812662901824501e-05, "loss": 0.0699, "step": 690 }, { "epoch": 0.1520417028670721, "grad_norm": 0.13007517158985138, "learning_rate": 4.80994787141616e-05, "loss": 0.07, "step": 700 }, { "epoch": 0.15421372719374457, "grad_norm": 0.0926077589392662, "learning_rate": 4.8072328410078195e-05, "loss": 0.0397, "step": 710 }, { "epoch": 0.15638575152041703, "grad_norm": 0.05907576531171799, "learning_rate": 4.804517810599479e-05, "loss": 0.008, "step": 720 }, { "epoch": 0.1585577758470895, "grad_norm": 0.055849187076091766, "learning_rate": 4.801802780191139e-05, "loss": 0.0382, "step": 730 }, { "epoch": 0.16072980017376196, "grad_norm": 0.06694773584604263, "learning_rate": 4.799087749782798e-05, "loss": 0.069, "step": 740 }, { "epoch": 0.16290182450043442, "grad_norm": 0.06179416552186012, "learning_rate": 4.7963727193744574e-05, "loss": 0.0081, "step": 750 }, { "epoch": 0.16507384882710685, "grad_norm": 0.06511629372835159, "learning_rate": 4.7936576889661167e-05, "loss": 0.0689, "step": 760 }, { "epoch": 0.1672458731537793, "grad_norm": 0.05806345120072365, "learning_rate": 4.790942658557776e-05, "loss": 0.0073, "step": 770 }, { "epoch": 0.16941789748045177, "grad_norm": 0.05196061730384827, "learning_rate": 4.788227628149435e-05, "loss": 0.0371, "step": 780 }, { "epoch": 0.17158992180712423, "grad_norm": 0.057679492980241776, "learning_rate": 4.7855125977410946e-05, "loss": 0.0062, "step": 790 }, { "epoch": 0.1737619461337967, "grad_norm": 0.04627954214811325, "learning_rate": 4.782797567332754e-05, "loss": 0.0057, "step": 800 }, { "epoch": 0.17593397046046916, "grad_norm": 0.04635784775018692, "learning_rate": 4.780082536924414e-05, "loss": 0.0707, "step": 810 }, { "epoch": 0.17810599478714162, "grad_norm": 0.6465384364128113, "learning_rate": 4.777367506516073e-05, "loss": 0.1365, "step": 820 }, { "epoch": 0.18027801911381408, "grad_norm": 0.0622185543179512, "learning_rate": 4.7746524761077324e-05, "loss": 0.0364, "step": 830 }, { "epoch": 0.18245004344048654, "grad_norm": 0.49819082021713257, "learning_rate": 4.771937445699392e-05, "loss": 0.0702, "step": 840 }, { "epoch": 0.184622067767159, "grad_norm": 0.07075995206832886, "learning_rate": 4.769222415291052e-05, "loss": 0.0383, "step": 850 }, { "epoch": 0.18679409209383144, "grad_norm": 0.06961839646100998, "learning_rate": 4.766507384882711e-05, "loss": 0.0685, "step": 860 }, { "epoch": 0.1889661164205039, "grad_norm": 0.08239472657442093, "learning_rate": 4.76379235447437e-05, "loss": 0.0386, "step": 870 }, { "epoch": 0.19113814074717636, "grad_norm": 0.5006256699562073, "learning_rate": 4.76107732406603e-05, "loss": 0.0383, "step": 880 }, { "epoch": 0.19331016507384882, "grad_norm": 0.05663491412997246, "learning_rate": 4.7583622936576896e-05, "loss": 0.0067, "step": 890 }, { "epoch": 0.19548218940052128, "grad_norm": 0.061523787677288055, "learning_rate": 4.755647263249349e-05, "loss": 0.068, "step": 900 }, { "epoch": 0.19765421372719374, "grad_norm": 0.09269333630800247, "learning_rate": 4.752932232841008e-05, "loss": 0.0674, "step": 910 }, { "epoch": 0.1998262380538662, "grad_norm": 0.0798824205994606, "learning_rate": 4.7502172024326675e-05, "loss": 0.0379, "step": 920 }, { "epoch": 0.20199826238053867, "grad_norm": 0.13433697819709778, "learning_rate": 4.747502172024327e-05, "loss": 0.0907, "step": 930 }, { "epoch": 0.20417028670721113, "grad_norm": 0.07293786108493805, "learning_rate": 4.744787141615986e-05, "loss": 0.0323, "step": 940 }, { "epoch": 0.2063423110338836, "grad_norm": 0.15695428848266602, "learning_rate": 4.7420721112076454e-05, "loss": 0.0631, "step": 950 }, { "epoch": 0.20851433536055602, "grad_norm": 0.23731303215026855, "learning_rate": 4.7393570807993054e-05, "loss": 0.0594, "step": 960 }, { "epoch": 0.21068635968722849, "grad_norm": 0.21904346346855164, "learning_rate": 4.736642050390965e-05, "loss": 0.0978, "step": 970 }, { "epoch": 0.21285838401390095, "grad_norm": 0.05626118555665016, "learning_rate": 4.733927019982624e-05, "loss": 0.0243, "step": 980 }, { "epoch": 0.2150304083405734, "grad_norm": 0.04844866693019867, "learning_rate": 4.731211989574283e-05, "loss": 0.0059, "step": 990 }, { "epoch": 0.21720243266724587, "grad_norm": 0.5048914551734924, "learning_rate": 4.7284969591659426e-05, "loss": 0.102, "step": 1000 }, { "epoch": 0.21937445699391833, "grad_norm": 0.06435242295265198, "learning_rate": 4.725781928757602e-05, "loss": 0.0381, "step": 1010 }, { "epoch": 0.2215464813205908, "grad_norm": 0.08498011529445648, "learning_rate": 4.723066898349261e-05, "loss": 0.0916, "step": 1020 }, { "epoch": 0.22371850564726325, "grad_norm": 0.07478222995996475, "learning_rate": 4.720351867940921e-05, "loss": 0.0118, "step": 1030 }, { "epoch": 0.22589052997393572, "grad_norm": 0.05415387079119682, "learning_rate": 4.7176368375325805e-05, "loss": 0.0087, "step": 1040 }, { "epoch": 0.22806255430060818, "grad_norm": 0.08049948513507843, "learning_rate": 4.7149218071242405e-05, "loss": 0.0652, "step": 1050 }, { "epoch": 0.23023457862728064, "grad_norm": 0.19372807443141937, "learning_rate": 4.7122067767159e-05, "loss": 0.0472, "step": 1060 }, { "epoch": 0.23240660295395307, "grad_norm": 0.04606153070926666, "learning_rate": 4.709491746307559e-05, "loss": 0.03, "step": 1070 }, { "epoch": 0.23457862728062553, "grad_norm": 0.05246344953775406, "learning_rate": 4.7067767158992184e-05, "loss": 0.0208, "step": 1080 }, { "epoch": 0.236750651607298, "grad_norm": 0.4381423890590668, "learning_rate": 4.704061685490878e-05, "loss": 0.1184, "step": 1090 }, { "epoch": 0.23892267593397046, "grad_norm": 0.05256995931267738, "learning_rate": 4.701346655082537e-05, "loss": 0.046, "step": 1100 }, { "epoch": 0.24109470026064292, "grad_norm": 0.05329786613583565, "learning_rate": 4.698631624674197e-05, "loss": 0.0228, "step": 1110 }, { "epoch": 0.24326672458731538, "grad_norm": 0.04372464492917061, "learning_rate": 4.695916594265856e-05, "loss": 0.0219, "step": 1120 }, { "epoch": 0.24543874891398784, "grad_norm": 0.03980829194188118, "learning_rate": 4.6932015638575156e-05, "loss": 0.0369, "step": 1130 }, { "epoch": 0.2476107732406603, "grad_norm": 0.055453334003686905, "learning_rate": 4.690486533449175e-05, "loss": 0.0341, "step": 1140 }, { "epoch": 0.24978279756733276, "grad_norm": 0.22770388424396515, "learning_rate": 4.687771503040834e-05, "loss": 0.0413, "step": 1150 }, { "epoch": 0.2519548218940052, "grad_norm": 0.8776328563690186, "learning_rate": 4.6850564726324935e-05, "loss": 0.083, "step": 1160 }, { "epoch": 0.25412684622067766, "grad_norm": 0.16721822321414948, "learning_rate": 4.682341442224153e-05, "loss": 0.0813, "step": 1170 }, { "epoch": 0.2562988705473501, "grad_norm": 0.07600212097167969, "learning_rate": 4.679626411815812e-05, "loss": 0.0387, "step": 1180 }, { "epoch": 0.2584708948740226, "grad_norm": 0.049551110714673996, "learning_rate": 4.676911381407472e-05, "loss": 0.0067, "step": 1190 }, { "epoch": 0.26064291920069504, "grad_norm": 0.058788660913705826, "learning_rate": 4.6741963509991314e-05, "loss": 0.0715, "step": 1200 }, { "epoch": 0.2628149435273675, "grad_norm": 0.10510104894638062, "learning_rate": 4.6714813205907907e-05, "loss": 0.0718, "step": 1210 }, { "epoch": 0.26498696785403997, "grad_norm": 0.06653593480587006, "learning_rate": 4.66876629018245e-05, "loss": 0.039, "step": 1220 }, { "epoch": 0.26715899218071243, "grad_norm": 0.04870177060365677, "learning_rate": 4.66605125977411e-05, "loss": 0.0068, "step": 1230 }, { "epoch": 0.2693310165073849, "grad_norm": 0.04352603852748871, "learning_rate": 4.663336229365769e-05, "loss": 0.0384, "step": 1240 }, { "epoch": 0.27150304083405735, "grad_norm": 0.044990599155426025, "learning_rate": 4.6606211989574285e-05, "loss": 0.0383, "step": 1250 }, { "epoch": 0.2736750651607298, "grad_norm": 0.09989727288484573, "learning_rate": 4.6579061685490885e-05, "loss": 0.1971, "step": 1260 }, { "epoch": 0.2758470894874023, "grad_norm": 0.26683279871940613, "learning_rate": 4.655191138140748e-05, "loss": 0.1433, "step": 1270 }, { "epoch": 0.27801911381407474, "grad_norm": 0.13336573541164398, "learning_rate": 4.652476107732407e-05, "loss": 0.0504, "step": 1280 }, { "epoch": 0.2801911381407472, "grad_norm": 0.09248033910989761, "learning_rate": 4.6497610773240664e-05, "loss": 0.0371, "step": 1290 }, { "epoch": 0.28236316246741966, "grad_norm": 0.067159004509449, "learning_rate": 4.647046046915726e-05, "loss": 0.039, "step": 1300 }, { "epoch": 0.2845351867940921, "grad_norm": 0.4917806386947632, "learning_rate": 4.644331016507385e-05, "loss": 0.0648, "step": 1310 }, { "epoch": 0.2867072111207645, "grad_norm": 0.08391285687685013, "learning_rate": 4.641615986099044e-05, "loss": 0.0401, "step": 1320 }, { "epoch": 0.288879235447437, "grad_norm": 0.16138538718223572, "learning_rate": 4.6389009556907036e-05, "loss": 0.0595, "step": 1330 }, { "epoch": 0.29105125977410945, "grad_norm": 0.05601629614830017, "learning_rate": 4.6361859252823636e-05, "loss": 0.0146, "step": 1340 }, { "epoch": 0.2932232841007819, "grad_norm": 0.05195131152868271, "learning_rate": 4.633470894874023e-05, "loss": 0.0651, "step": 1350 }, { "epoch": 0.2953953084274544, "grad_norm": 0.49680355191230774, "learning_rate": 4.630755864465682e-05, "loss": 0.0639, "step": 1360 }, { "epoch": 0.29756733275412683, "grad_norm": 0.2788536250591278, "learning_rate": 4.6280408340573415e-05, "loss": 0.1024, "step": 1370 }, { "epoch": 0.2997393570807993, "grad_norm": 0.21596238017082214, "learning_rate": 4.625325803649001e-05, "loss": 0.0613, "step": 1380 }, { "epoch": 0.30191138140747176, "grad_norm": 0.3460381031036377, "learning_rate": 4.62261077324066e-05, "loss": 0.0602, "step": 1390 }, { "epoch": 0.3040834057341442, "grad_norm": 1.0655152797698975, "learning_rate": 4.6198957428323194e-05, "loss": 0.1124, "step": 1400 }, { "epoch": 0.3062554300608167, "grad_norm": 0.4805503487586975, "learning_rate": 4.6171807124239794e-05, "loss": 0.0272, "step": 1410 }, { "epoch": 0.30842745438748914, "grad_norm": 0.05258309841156006, "learning_rate": 4.614465682015639e-05, "loss": 0.0132, "step": 1420 }, { "epoch": 0.3105994787141616, "grad_norm": 0.056971024721860886, "learning_rate": 4.611750651607299e-05, "loss": 0.08, "step": 1430 }, { "epoch": 0.31277150304083406, "grad_norm": 0.05958361178636551, "learning_rate": 4.609035621198958e-05, "loss": 0.0699, "step": 1440 }, { "epoch": 0.3149435273675065, "grad_norm": 0.05827038362622261, "learning_rate": 4.606320590790617e-05, "loss": 0.0069, "step": 1450 }, { "epoch": 0.317115551694179, "grad_norm": 0.06229276955127716, "learning_rate": 4.6036055603822766e-05, "loss": 0.0696, "step": 1460 }, { "epoch": 0.31928757602085145, "grad_norm": 0.06447760760784149, "learning_rate": 4.600890529973936e-05, "loss": 0.0381, "step": 1470 }, { "epoch": 0.3214596003475239, "grad_norm": 0.09773342311382294, "learning_rate": 4.598175499565595e-05, "loss": 0.1581, "step": 1480 }, { "epoch": 0.32363162467419637, "grad_norm": 0.1255279779434204, "learning_rate": 4.595460469157255e-05, "loss": 0.0407, "step": 1490 }, { "epoch": 0.32580364900086883, "grad_norm": 0.1384665071964264, "learning_rate": 4.5927454387489145e-05, "loss": 0.1221, "step": 1500 }, { "epoch": 0.3279756733275413, "grad_norm": 0.11321841925382614, "learning_rate": 4.590030408340574e-05, "loss": 0.0506, "step": 1510 }, { "epoch": 0.3301476976542137, "grad_norm": 0.4507741332054138, "learning_rate": 4.587315377932233e-05, "loss": 0.0664, "step": 1520 }, { "epoch": 0.33231972198088616, "grad_norm": 0.10015634447336197, "learning_rate": 4.5846003475238924e-05, "loss": 0.0401, "step": 1530 }, { "epoch": 0.3344917463075586, "grad_norm": 0.0798087790608406, "learning_rate": 4.581885317115552e-05, "loss": 0.0391, "step": 1540 }, { "epoch": 0.3366637706342311, "grad_norm": 0.08314526081085205, "learning_rate": 4.579170286707211e-05, "loss": 0.0676, "step": 1550 }, { "epoch": 0.33883579496090355, "grad_norm": 0.07247483730316162, "learning_rate": 4.57645525629887e-05, "loss": 0.0381, "step": 1560 }, { "epoch": 0.341007819287576, "grad_norm": 0.0960722267627716, "learning_rate": 4.57374022589053e-05, "loss": 0.0578, "step": 1570 }, { "epoch": 0.34317984361424847, "grad_norm": 0.1308896243572235, "learning_rate": 4.5710251954821896e-05, "loss": 0.0681, "step": 1580 }, { "epoch": 0.34535186794092093, "grad_norm": 0.6395774483680725, "learning_rate": 4.568310165073849e-05, "loss": 0.0489, "step": 1590 }, { "epoch": 0.3475238922675934, "grad_norm": 0.07124833017587662, "learning_rate": 4.565595134665508e-05, "loss": 0.0915, "step": 1600 }, { "epoch": 0.34969591659426585, "grad_norm": 0.06098182126879692, "learning_rate": 4.562880104257168e-05, "loss": 0.0384, "step": 1610 }, { "epoch": 0.3518679409209383, "grad_norm": 0.055936507880687714, "learning_rate": 4.5601650738488274e-05, "loss": 0.0067, "step": 1620 }, { "epoch": 0.3540399652476108, "grad_norm": 0.05517324060201645, "learning_rate": 4.557450043440487e-05, "loss": 0.0698, "step": 1630 }, { "epoch": 0.35621198957428324, "grad_norm": 0.059903811663389206, "learning_rate": 4.554735013032147e-05, "loss": 0.0381, "step": 1640 }, { "epoch": 0.3583840139009557, "grad_norm": 0.453118234872818, "learning_rate": 4.552019982623806e-05, "loss": 0.1588, "step": 1650 }, { "epoch": 0.36055603822762816, "grad_norm": 0.10766888409852982, "learning_rate": 4.549304952215465e-05, "loss": 0.013, "step": 1660 }, { "epoch": 0.3627280625543006, "grad_norm": 0.4284590482711792, "learning_rate": 4.5465899218071246e-05, "loss": 0.1186, "step": 1670 }, { "epoch": 0.3649000868809731, "grad_norm": 0.1976746916770935, "learning_rate": 4.543874891398784e-05, "loss": 0.0951, "step": 1680 }, { "epoch": 0.36707211120764555, "grad_norm": 0.4342038929462433, "learning_rate": 4.541159860990443e-05, "loss": 0.0699, "step": 1690 }, { "epoch": 0.369244135534318, "grad_norm": 0.09489353746175766, "learning_rate": 4.5384448305821025e-05, "loss": 0.068, "step": 1700 }, { "epoch": 0.37141615986099047, "grad_norm": 0.09361070394515991, "learning_rate": 4.535729800173762e-05, "loss": 0.0676, "step": 1710 }, { "epoch": 0.3735881841876629, "grad_norm": 0.10838571935892105, "learning_rate": 4.533014769765422e-05, "loss": 0.058, "step": 1720 }, { "epoch": 0.37576020851433534, "grad_norm": 0.08518973737955093, "learning_rate": 4.530299739357081e-05, "loss": 0.0118, "step": 1730 }, { "epoch": 0.3779322328410078, "grad_norm": 0.0637778714299202, "learning_rate": 4.5275847089487404e-05, "loss": 0.0088, "step": 1740 }, { "epoch": 0.38010425716768026, "grad_norm": 0.1527567058801651, "learning_rate": 4.5248696785404e-05, "loss": 0.0343, "step": 1750 }, { "epoch": 0.3822762814943527, "grad_norm": 0.05855727940797806, "learning_rate": 4.522154648132059e-05, "loss": 0.0374, "step": 1760 }, { "epoch": 0.3844483058210252, "grad_norm": 0.10861743986606598, "learning_rate": 4.519439617723718e-05, "loss": 0.0705, "step": 1770 }, { "epoch": 0.38662033014769764, "grad_norm": 0.22820153832435608, "learning_rate": 4.5167245873153776e-05, "loss": 0.0659, "step": 1780 }, { "epoch": 0.3887923544743701, "grad_norm": 0.9262331128120422, "learning_rate": 4.5140095569070376e-05, "loss": 0.0662, "step": 1790 }, { "epoch": 0.39096437880104257, "grad_norm": 0.07286658883094788, "learning_rate": 4.511294526498697e-05, "loss": 0.053, "step": 1800 }, { "epoch": 0.39313640312771503, "grad_norm": 0.04754822701215744, "learning_rate": 4.508579496090357e-05, "loss": 0.0305, "step": 1810 }, { "epoch": 0.3953084274543875, "grad_norm": 0.2825242280960083, "learning_rate": 4.505864465682016e-05, "loss": 0.0465, "step": 1820 }, { "epoch": 0.39748045178105995, "grad_norm": 0.6193529963493347, "learning_rate": 4.5031494352736755e-05, "loss": 0.1629, "step": 1830 }, { "epoch": 0.3996524761077324, "grad_norm": 0.05920102447271347, "learning_rate": 4.500434404865335e-05, "loss": 0.0269, "step": 1840 }, { "epoch": 0.4018245004344049, "grad_norm": 0.06066862866282463, "learning_rate": 4.497719374456994e-05, "loss": 0.0329, "step": 1850 }, { "epoch": 0.40399652476107734, "grad_norm": 0.046323712915182114, "learning_rate": 4.4950043440486534e-05, "loss": 0.0056, "step": 1860 }, { "epoch": 0.4061685490877498, "grad_norm": 0.06750129908323288, "learning_rate": 4.4922893136403134e-05, "loss": 0.058, "step": 1870 }, { "epoch": 0.40834057341442226, "grad_norm": 0.045560941100120544, "learning_rate": 4.489574283231973e-05, "loss": 0.0075, "step": 1880 }, { "epoch": 0.4105125977410947, "grad_norm": 0.03991026058793068, "learning_rate": 4.486859252823632e-05, "loss": 0.0052, "step": 1890 }, { "epoch": 0.4126846220677672, "grad_norm": 0.03694348409771919, "learning_rate": 4.484144222415291e-05, "loss": 0.0045, "step": 1900 }, { "epoch": 0.41485664639443964, "grad_norm": 2.1510820388793945, "learning_rate": 4.4814291920069506e-05, "loss": 0.0821, "step": 1910 }, { "epoch": 0.41702867072111205, "grad_norm": 0.18489497900009155, "learning_rate": 4.47871416159861e-05, "loss": 0.0415, "step": 1920 }, { "epoch": 0.4192006950477845, "grad_norm": 0.7301527857780457, "learning_rate": 4.475999131190269e-05, "loss": 0.0817, "step": 1930 }, { "epoch": 0.42137271937445697, "grad_norm": 0.038684993982315063, "learning_rate": 4.4732841007819285e-05, "loss": 0.049, "step": 1940 }, { "epoch": 0.42354474370112943, "grad_norm": 0.9332627654075623, "learning_rate": 4.4705690703735885e-05, "loss": 0.0219, "step": 1950 }, { "epoch": 0.4257167680278019, "grad_norm": 0.14876317977905273, "learning_rate": 4.467854039965248e-05, "loss": 0.0206, "step": 1960 }, { "epoch": 0.42788879235447436, "grad_norm": 0.25213751196861267, "learning_rate": 4.465139009556907e-05, "loss": 0.0104, "step": 1970 }, { "epoch": 0.4300608166811468, "grad_norm": 0.04049533233046532, "learning_rate": 4.4624239791485664e-05, "loss": 0.0306, "step": 1980 }, { "epoch": 0.4322328410078193, "grad_norm": 0.04084054008126259, "learning_rate": 4.4597089487402264e-05, "loss": 0.1077, "step": 1990 }, { "epoch": 0.43440486533449174, "grad_norm": 0.07672455906867981, "learning_rate": 4.4569939183318857e-05, "loss": 0.0064, "step": 2000 }, { "epoch": 0.4365768896611642, "grad_norm": 0.5209121704101562, "learning_rate": 4.454278887923545e-05, "loss": 0.0407, "step": 2010 }, { "epoch": 0.43874891398783666, "grad_norm": 0.046432483941316605, "learning_rate": 4.451563857515205e-05, "loss": 0.0362, "step": 2020 }, { "epoch": 0.4409209383145091, "grad_norm": 0.045268259942531586, "learning_rate": 4.448848827106864e-05, "loss": 0.0487, "step": 2030 }, { "epoch": 0.4430929626411816, "grad_norm": 0.0382797010242939, "learning_rate": 4.4461337966985235e-05, "loss": 0.0173, "step": 2040 }, { "epoch": 0.44526498696785405, "grad_norm": 0.06607993692159653, "learning_rate": 4.443418766290183e-05, "loss": 0.0283, "step": 2050 }, { "epoch": 0.4474370112945265, "grad_norm": 0.13242176175117493, "learning_rate": 4.440703735881842e-05, "loss": 0.065, "step": 2060 }, { "epoch": 0.44960903562119897, "grad_norm": 0.5223703384399414, "learning_rate": 4.4379887054735014e-05, "loss": 0.0592, "step": 2070 }, { "epoch": 0.45178105994787143, "grad_norm": 0.032611947506666183, "learning_rate": 4.435273675065161e-05, "loss": 0.0496, "step": 2080 }, { "epoch": 0.4539530842745439, "grad_norm": 0.5175477266311646, "learning_rate": 4.43255864465682e-05, "loss": 0.0709, "step": 2090 }, { "epoch": 0.45612510860121636, "grad_norm": 0.12021242827177048, "learning_rate": 4.42984361424848e-05, "loss": 0.0696, "step": 2100 }, { "epoch": 0.4582971329278888, "grad_norm": 0.36755773425102234, "learning_rate": 4.427128583840139e-05, "loss": 0.0735, "step": 2110 }, { "epoch": 0.4604691572545613, "grad_norm": 0.05745413899421692, "learning_rate": 4.4244135534317986e-05, "loss": 0.0344, "step": 2120 }, { "epoch": 0.4626411815812337, "grad_norm": 0.040968943387269974, "learning_rate": 4.421698523023458e-05, "loss": 0.0475, "step": 2130 }, { "epoch": 0.46481320590790615, "grad_norm": 0.04184969514608383, "learning_rate": 4.418983492615117e-05, "loss": 0.0383, "step": 2140 }, { "epoch": 0.4669852302345786, "grad_norm": 0.049214381724596024, "learning_rate": 4.4162684622067765e-05, "loss": 0.0702, "step": 2150 }, { "epoch": 0.46915725456125107, "grad_norm": 0.05034701153635979, "learning_rate": 4.413553431798436e-05, "loss": 0.0058, "step": 2160 }, { "epoch": 0.47132927888792353, "grad_norm": 0.1388736218214035, "learning_rate": 4.410838401390096e-05, "loss": 0.0641, "step": 2170 }, { "epoch": 0.473501303214596, "grad_norm": 0.052767496556043625, "learning_rate": 4.408123370981755e-05, "loss": 0.0076, "step": 2180 }, { "epoch": 0.47567332754126845, "grad_norm": 0.04132657125592232, "learning_rate": 4.405408340573415e-05, "loss": 0.1008, "step": 2190 }, { "epoch": 0.4778453518679409, "grad_norm": 0.054085779935121536, "learning_rate": 4.4026933101650744e-05, "loss": 0.038, "step": 2200 }, { "epoch": 0.4800173761946134, "grad_norm": 0.22452211380004883, "learning_rate": 4.399978279756734e-05, "loss": 0.0699, "step": 2210 }, { "epoch": 0.48218940052128584, "grad_norm": 0.04294193536043167, "learning_rate": 4.397263249348393e-05, "loss": 0.0406, "step": 2220 }, { "epoch": 0.4843614248479583, "grad_norm": 0.04693491756916046, "learning_rate": 4.394548218940052e-05, "loss": 0.0344, "step": 2230 }, { "epoch": 0.48653344917463076, "grad_norm": 0.06575354188680649, "learning_rate": 4.3918331885317116e-05, "loss": 0.0333, "step": 2240 }, { "epoch": 0.4887054735013032, "grad_norm": 0.09756498783826828, "learning_rate": 4.3891181581233716e-05, "loss": 0.0091, "step": 2250 }, { "epoch": 0.4908774978279757, "grad_norm": 0.03621898964047432, "learning_rate": 4.386403127715031e-05, "loss": 0.0655, "step": 2260 }, { "epoch": 0.49304952215464815, "grad_norm": 0.33425211906433105, "learning_rate": 4.38368809730669e-05, "loss": 0.141, "step": 2270 }, { "epoch": 0.4952215464813206, "grad_norm": 0.4894958734512329, "learning_rate": 4.3809730668983495e-05, "loss": 0.0667, "step": 2280 }, { "epoch": 0.49739357080799307, "grad_norm": 0.6559067368507385, "learning_rate": 4.378258036490009e-05, "loss": 0.0452, "step": 2290 }, { "epoch": 0.49956559513466553, "grad_norm": 0.04155660793185234, "learning_rate": 4.375543006081668e-05, "loss": 0.0369, "step": 2300 }, { "epoch": 0.501737619461338, "grad_norm": 0.04563942924141884, "learning_rate": 4.3728279756733274e-05, "loss": 0.0297, "step": 2310 }, { "epoch": 0.5039096437880104, "grad_norm": 0.04393638297915459, "learning_rate": 4.370112945264987e-05, "loss": 0.0052, "step": 2320 }, { "epoch": 0.5060816681146829, "grad_norm": 0.03522152081131935, "learning_rate": 4.367397914856647e-05, "loss": 0.0057, "step": 2330 }, { "epoch": 0.5082536924413553, "grad_norm": 0.04149174690246582, "learning_rate": 4.364682884448306e-05, "loss": 0.0327, "step": 2340 }, { "epoch": 0.5104257167680278, "grad_norm": 0.05420512333512306, "learning_rate": 4.361967854039965e-05, "loss": 0.0405, "step": 2350 }, { "epoch": 0.5125977410947002, "grad_norm": 0.29514026641845703, "learning_rate": 4.3592528236316246e-05, "loss": 0.0221, "step": 2360 }, { "epoch": 0.5147697654213728, "grad_norm": 0.04918777570128441, "learning_rate": 4.3565377932232846e-05, "loss": 0.0374, "step": 2370 }, { "epoch": 0.5169417897480452, "grad_norm": 0.769515872001648, "learning_rate": 4.353822762814944e-05, "loss": 0.061, "step": 2380 }, { "epoch": 0.5191138140747177, "grad_norm": 0.058411117643117905, "learning_rate": 4.351107732406603e-05, "loss": 0.0053, "step": 2390 }, { "epoch": 0.5212858384013901, "grad_norm": 0.04015062376856804, "learning_rate": 4.3483927019982625e-05, "loss": 0.0061, "step": 2400 }, { "epoch": 0.5234578627280626, "grad_norm": 0.06921057403087616, "learning_rate": 4.3456776715899224e-05, "loss": 0.02, "step": 2410 }, { "epoch": 0.525629887054735, "grad_norm": 1.794832706451416, "learning_rate": 4.342962641181582e-05, "loss": 0.0521, "step": 2420 }, { "epoch": 0.5278019113814074, "grad_norm": 0.09245938807725906, "learning_rate": 4.340247610773241e-05, "loss": 0.0446, "step": 2430 }, { "epoch": 0.5299739357080799, "grad_norm": 0.06291071325540543, "learning_rate": 4.3375325803649004e-05, "loss": 0.0616, "step": 2440 }, { "epoch": 0.5321459600347523, "grad_norm": 0.9209924936294556, "learning_rate": 4.3348175499565597e-05, "loss": 0.0683, "step": 2450 }, { "epoch": 0.5343179843614249, "grad_norm": 0.05425272881984711, "learning_rate": 4.332102519548219e-05, "loss": 0.0065, "step": 2460 }, { "epoch": 0.5364900086880973, "grad_norm": 0.13152609765529633, "learning_rate": 4.329387489139878e-05, "loss": 0.0236, "step": 2470 }, { "epoch": 0.5386620330147698, "grad_norm": 0.1100706234574318, "learning_rate": 4.326672458731538e-05, "loss": 0.0379, "step": 2480 }, { "epoch": 0.5408340573414422, "grad_norm": 0.06025245040655136, "learning_rate": 4.3239574283231975e-05, "loss": 0.0097, "step": 2490 }, { "epoch": 0.5430060816681147, "grad_norm": 0.02963717095553875, "learning_rate": 4.321242397914857e-05, "loss": 0.0037, "step": 2500 }, { "epoch": 0.5451781059947871, "grad_norm": 0.08883394300937653, "learning_rate": 4.318527367506516e-05, "loss": 0.1101, "step": 2510 }, { "epoch": 0.5473501303214596, "grad_norm": 0.03146001324057579, "learning_rate": 4.3158123370981754e-05, "loss": 0.0062, "step": 2520 }, { "epoch": 0.549522154648132, "grad_norm": 0.06231880933046341, "learning_rate": 4.313097306689835e-05, "loss": 0.0592, "step": 2530 }, { "epoch": 0.5516941789748045, "grad_norm": 0.05751520395278931, "learning_rate": 4.310382276281494e-05, "loss": 0.0111, "step": 2540 }, { "epoch": 0.553866203301477, "grad_norm": 0.060239288955926895, "learning_rate": 4.307667245873154e-05, "loss": 0.0086, "step": 2550 }, { "epoch": 0.5560382276281495, "grad_norm": 0.027065258473157883, "learning_rate": 4.304952215464813e-05, "loss": 0.0403, "step": 2560 }, { "epoch": 0.5582102519548219, "grad_norm": 0.02755948156118393, "learning_rate": 4.302237185056473e-05, "loss": 0.0316, "step": 2570 }, { "epoch": 0.5603822762814944, "grad_norm": 0.0682368353009224, "learning_rate": 4.2995221546481326e-05, "loss": 0.0856, "step": 2580 }, { "epoch": 0.5625543006081668, "grad_norm": 0.3988370895385742, "learning_rate": 4.296807124239792e-05, "loss": 0.0503, "step": 2590 }, { "epoch": 0.5647263249348393, "grad_norm": 0.05351010710000992, "learning_rate": 4.294092093831451e-05, "loss": 0.046, "step": 2600 }, { "epoch": 0.5668983492615117, "grad_norm": 0.06630027294158936, "learning_rate": 4.2913770634231105e-05, "loss": 0.0683, "step": 2610 }, { "epoch": 0.5690703735881842, "grad_norm": 0.5351850390434265, "learning_rate": 4.28866203301477e-05, "loss": 0.0789, "step": 2620 }, { "epoch": 0.5712423979148566, "grad_norm": 0.9941126108169556, "learning_rate": 4.28594700260643e-05, "loss": 0.0497, "step": 2630 }, { "epoch": 0.573414422241529, "grad_norm": 0.06872381269931793, "learning_rate": 4.283231972198089e-05, "loss": 0.0076, "step": 2640 }, { "epoch": 0.5755864465682016, "grad_norm": 0.04038708284497261, "learning_rate": 4.2805169417897484e-05, "loss": 0.008, "step": 2650 }, { "epoch": 0.577758470894874, "grad_norm": 1.9096306562423706, "learning_rate": 4.277801911381408e-05, "loss": 0.0591, "step": 2660 }, { "epoch": 0.5799304952215465, "grad_norm": 0.030831623822450638, "learning_rate": 4.275086880973067e-05, "loss": 0.0055, "step": 2670 }, { "epoch": 0.5821025195482189, "grad_norm": 0.02636183612048626, "learning_rate": 4.272371850564726e-05, "loss": 0.0079, "step": 2680 }, { "epoch": 0.5842745438748914, "grad_norm": 0.153968945145607, "learning_rate": 4.2696568201563856e-05, "loss": 0.1446, "step": 2690 }, { "epoch": 0.5864465682015638, "grad_norm": 0.1505323201417923, "learning_rate": 4.266941789748045e-05, "loss": 0.1777, "step": 2700 }, { "epoch": 0.5886185925282363, "grad_norm": 0.06393560767173767, "learning_rate": 4.264226759339705e-05, "loss": 0.0447, "step": 2710 }, { "epoch": 0.5907906168549087, "grad_norm": 0.1916840672492981, "learning_rate": 4.261511728931364e-05, "loss": 0.0545, "step": 2720 }, { "epoch": 0.5929626411815813, "grad_norm": 0.039754655212163925, "learning_rate": 4.2587966985230235e-05, "loss": 0.0179, "step": 2730 }, { "epoch": 0.5951346655082537, "grad_norm": 0.03142647072672844, "learning_rate": 4.256081668114683e-05, "loss": 0.085, "step": 2740 }, { "epoch": 0.5973066898349262, "grad_norm": 0.22314947843551636, "learning_rate": 4.253366637706343e-05, "loss": 0.055, "step": 2750 }, { "epoch": 0.5994787141615986, "grad_norm": 0.8706423044204712, "learning_rate": 4.250651607298002e-05, "loss": 0.0515, "step": 2760 }, { "epoch": 0.6016507384882711, "grad_norm": 0.17597514390945435, "learning_rate": 4.2479365768896614e-05, "loss": 0.0316, "step": 2770 }, { "epoch": 0.6038227628149435, "grad_norm": 0.02838090807199478, "learning_rate": 4.245221546481321e-05, "loss": 0.0559, "step": 2780 }, { "epoch": 0.605994787141616, "grad_norm": 0.028333989903330803, "learning_rate": 4.2425065160729807e-05, "loss": 0.0058, "step": 2790 }, { "epoch": 0.6081668114682884, "grad_norm": 0.5321160554885864, "learning_rate": 4.23979148566464e-05, "loss": 0.0544, "step": 2800 }, { "epoch": 0.610338835794961, "grad_norm": 3.2245936393737793, "learning_rate": 4.237076455256299e-05, "loss": 0.0498, "step": 2810 }, { "epoch": 0.6125108601216334, "grad_norm": 0.029928002506494522, "learning_rate": 4.2343614248479586e-05, "loss": 0.0181, "step": 2820 }, { "epoch": 0.6146828844483058, "grad_norm": 0.02794519253075123, "learning_rate": 4.231646394439618e-05, "loss": 0.0318, "step": 2830 }, { "epoch": 0.6168549087749783, "grad_norm": 0.02685512974858284, "learning_rate": 4.228931364031277e-05, "loss": 0.015, "step": 2840 }, { "epoch": 0.6190269331016507, "grad_norm": 0.029942605644464493, "learning_rate": 4.2262163336229365e-05, "loss": 0.0033, "step": 2850 }, { "epoch": 0.6211989574283232, "grad_norm": 0.02547876164317131, "learning_rate": 4.2235013032145964e-05, "loss": 0.0389, "step": 2860 }, { "epoch": 0.6233709817549956, "grad_norm": 2.500945568084717, "learning_rate": 4.220786272806256e-05, "loss": 0.0626, "step": 2870 }, { "epoch": 0.6255430060816681, "grad_norm": 0.3283202350139618, "learning_rate": 4.218071242397915e-05, "loss": 0.0393, "step": 2880 }, { "epoch": 0.6277150304083405, "grad_norm": 1.1086935997009277, "learning_rate": 4.2153562119895744e-05, "loss": 0.036, "step": 2890 }, { "epoch": 0.629887054735013, "grad_norm": 0.02584236115217209, "learning_rate": 4.2126411815812337e-05, "loss": 0.0096, "step": 2900 }, { "epoch": 0.6320590790616855, "grad_norm": 0.02778252400457859, "learning_rate": 4.209926151172893e-05, "loss": 0.0478, "step": 2910 }, { "epoch": 0.634231103388358, "grad_norm": 0.026364050805568695, "learning_rate": 4.207211120764552e-05, "loss": 0.0028, "step": 2920 }, { "epoch": 0.6364031277150304, "grad_norm": 0.025249965488910675, "learning_rate": 4.204496090356212e-05, "loss": 0.0526, "step": 2930 }, { "epoch": 0.6385751520417029, "grad_norm": 0.029704120010137558, "learning_rate": 4.2017810599478715e-05, "loss": 0.029, "step": 2940 }, { "epoch": 0.6407471763683753, "grad_norm": 0.041738465428352356, "learning_rate": 4.1990660295395315e-05, "loss": 0.0514, "step": 2950 }, { "epoch": 0.6429192006950478, "grad_norm": 0.02726450003683567, "learning_rate": 4.196350999131191e-05, "loss": 0.0566, "step": 2960 }, { "epoch": 0.6450912250217202, "grad_norm": 0.027154915034770966, "learning_rate": 4.19363596872285e-05, "loss": 0.0075, "step": 2970 }, { "epoch": 0.6472632493483927, "grad_norm": 0.02589614875614643, "learning_rate": 4.1909209383145094e-05, "loss": 0.0032, "step": 2980 }, { "epoch": 0.6494352736750652, "grad_norm": 0.03165976330637932, "learning_rate": 4.188205907906169e-05, "loss": 0.0331, "step": 2990 }, { "epoch": 0.6516072980017377, "grad_norm": 0.46476975083351135, "learning_rate": 4.185490877497828e-05, "loss": 0.0548, "step": 3000 }, { "epoch": 0.6537793223284101, "grad_norm": 0.7368276119232178, "learning_rate": 4.182775847089487e-05, "loss": 0.0981, "step": 3010 }, { "epoch": 0.6559513466550826, "grad_norm": 0.03922104462981224, "learning_rate": 4.180060816681147e-05, "loss": 0.0145, "step": 3020 }, { "epoch": 0.658123370981755, "grad_norm": 0.03890977427363396, "learning_rate": 4.1773457862728066e-05, "loss": 0.026, "step": 3030 }, { "epoch": 0.6602953953084274, "grad_norm": 0.05434383079409599, "learning_rate": 4.174630755864466e-05, "loss": 0.0045, "step": 3040 }, { "epoch": 0.6624674196350999, "grad_norm": 0.032415471971035004, "learning_rate": 4.171915725456125e-05, "loss": 0.0997, "step": 3050 }, { "epoch": 0.6646394439617723, "grad_norm": 0.024642497301101685, "learning_rate": 4.1692006950477845e-05, "loss": 0.016, "step": 3060 }, { "epoch": 0.6668114682884448, "grad_norm": 0.023403340950608253, "learning_rate": 4.166485664639444e-05, "loss": 0.0055, "step": 3070 }, { "epoch": 0.6689834926151172, "grad_norm": 0.07261425256729126, "learning_rate": 4.163770634231103e-05, "loss": 0.1822, "step": 3080 }, { "epoch": 0.6711555169417898, "grad_norm": 0.13217313587665558, "learning_rate": 4.161055603822763e-05, "loss": 0.0597, "step": 3090 }, { "epoch": 0.6733275412684622, "grad_norm": 0.026420656591653824, "learning_rate": 4.1583405734144224e-05, "loss": 0.0204, "step": 3100 }, { "epoch": 0.6754995655951347, "grad_norm": 0.02567846141755581, "learning_rate": 4.155625543006082e-05, "loss": 0.0439, "step": 3110 }, { "epoch": 0.6776715899218071, "grad_norm": 0.19673630595207214, "learning_rate": 4.152910512597741e-05, "loss": 0.0767, "step": 3120 }, { "epoch": 0.6798436142484796, "grad_norm": 0.03708234429359436, "learning_rate": 4.150195482189401e-05, "loss": 0.0228, "step": 3130 }, { "epoch": 0.682015638575152, "grad_norm": 0.026442553848028183, "learning_rate": 4.14748045178106e-05, "loss": 0.0036, "step": 3140 }, { "epoch": 0.6841876629018245, "grad_norm": 0.05390092357993126, "learning_rate": 4.1447654213727196e-05, "loss": 0.0936, "step": 3150 }, { "epoch": 0.6863596872284969, "grad_norm": 0.20121973752975464, "learning_rate": 4.142050390964379e-05, "loss": 0.0275, "step": 3160 }, { "epoch": 0.6885317115551695, "grad_norm": 0.027467237785458565, "learning_rate": 4.139335360556039e-05, "loss": 0.0046, "step": 3170 }, { "epoch": 0.6907037358818419, "grad_norm": 0.022788554430007935, "learning_rate": 4.136620330147698e-05, "loss": 0.0329, "step": 3180 }, { "epoch": 0.6928757602085144, "grad_norm": 0.04069753736257553, "learning_rate": 4.1339052997393575e-05, "loss": 0.0684, "step": 3190 }, { "epoch": 0.6950477845351868, "grad_norm": 0.046147171407938004, "learning_rate": 4.131190269331017e-05, "loss": 0.0289, "step": 3200 }, { "epoch": 0.6972198088618593, "grad_norm": 0.03701859340071678, "learning_rate": 4.128475238922676e-05, "loss": 0.0634, "step": 3210 }, { "epoch": 0.6993918331885317, "grad_norm": 0.053171392530202866, "learning_rate": 4.1257602085143354e-05, "loss": 0.016, "step": 3220 }, { "epoch": 0.7015638575152042, "grad_norm": 0.06105630844831467, "learning_rate": 4.123045178105995e-05, "loss": 0.047, "step": 3230 }, { "epoch": 0.7037358818418766, "grad_norm": 0.058090176433324814, "learning_rate": 4.1203301476976547e-05, "loss": 0.0345, "step": 3240 }, { "epoch": 0.705907906168549, "grad_norm": 1.5910778045654297, "learning_rate": 4.117615117289314e-05, "loss": 0.0487, "step": 3250 }, { "epoch": 0.7080799304952216, "grad_norm": 0.11721807718276978, "learning_rate": 4.114900086880973e-05, "loss": 0.0201, "step": 3260 }, { "epoch": 0.710251954821894, "grad_norm": 1.5974066257476807, "learning_rate": 4.1121850564726326e-05, "loss": 0.0277, "step": 3270 }, { "epoch": 0.7124239791485665, "grad_norm": 0.023307811468839645, "learning_rate": 4.109470026064292e-05, "loss": 0.0028, "step": 3280 }, { "epoch": 0.7145960034752389, "grad_norm": 0.04362959787249565, "learning_rate": 4.106754995655951e-05, "loss": 0.0195, "step": 3290 }, { "epoch": 0.7167680278019114, "grad_norm": 0.2800048291683197, "learning_rate": 4.104039965247611e-05, "loss": 0.0792, "step": 3300 }, { "epoch": 0.7189400521285838, "grad_norm": 0.05572018399834633, "learning_rate": 4.1013249348392704e-05, "loss": 0.0295, "step": 3310 }, { "epoch": 0.7211120764552563, "grad_norm": 0.024460218846797943, "learning_rate": 4.09860990443093e-05, "loss": 0.0026, "step": 3320 }, { "epoch": 0.7232841007819287, "grad_norm": 0.022767340764403343, "learning_rate": 4.09589487402259e-05, "loss": 0.0825, "step": 3330 }, { "epoch": 0.7254561251086012, "grad_norm": 0.03402335196733475, "learning_rate": 4.093179843614249e-05, "loss": 0.0394, "step": 3340 }, { "epoch": 0.7276281494352737, "grad_norm": 0.11153494566679001, "learning_rate": 4.090464813205908e-05, "loss": 0.0049, "step": 3350 }, { "epoch": 0.7298001737619462, "grad_norm": 1.4000017642974854, "learning_rate": 4.0877497827975676e-05, "loss": 0.0656, "step": 3360 }, { "epoch": 0.7319721980886186, "grad_norm": 0.10694686323404312, "learning_rate": 4.085034752389227e-05, "loss": 0.0139, "step": 3370 }, { "epoch": 0.7341442224152911, "grad_norm": 0.022809429094195366, "learning_rate": 4.082319721980886e-05, "loss": 0.0089, "step": 3380 }, { "epoch": 0.7363162467419635, "grad_norm": 0.024037901312112808, "learning_rate": 4.0796046915725455e-05, "loss": 0.0489, "step": 3390 }, { "epoch": 0.738488271068636, "grad_norm": 0.024476533755660057, "learning_rate": 4.0768896611642055e-05, "loss": 0.0344, "step": 3400 }, { "epoch": 0.7406602953953084, "grad_norm": 0.05012943968176842, "learning_rate": 4.074174630755865e-05, "loss": 0.0374, "step": 3410 }, { "epoch": 0.7428323197219809, "grad_norm": 0.1134481132030487, "learning_rate": 4.071459600347524e-05, "loss": 0.0406, "step": 3420 }, { "epoch": 0.7450043440486533, "grad_norm": 0.04325913265347481, "learning_rate": 4.0687445699391834e-05, "loss": 0.0226, "step": 3430 }, { "epoch": 0.7471763683753258, "grad_norm": 0.029155496507883072, "learning_rate": 4.066029539530843e-05, "loss": 0.0342, "step": 3440 }, { "epoch": 0.7493483927019983, "grad_norm": 0.030118346214294434, "learning_rate": 4.063314509122502e-05, "loss": 0.0036, "step": 3450 }, { "epoch": 0.7515204170286707, "grad_norm": 0.024459168314933777, "learning_rate": 4.060599478714161e-05, "loss": 0.0027, "step": 3460 }, { "epoch": 0.7536924413553432, "grad_norm": 0.06704209744930267, "learning_rate": 4.057884448305821e-05, "loss": 0.0283, "step": 3470 }, { "epoch": 0.7558644656820156, "grad_norm": 0.02914384752511978, "learning_rate": 4.0551694178974806e-05, "loss": 0.0316, "step": 3480 }, { "epoch": 0.7580364900086881, "grad_norm": 0.26781970262527466, "learning_rate": 4.05245438748914e-05, "loss": 0.0396, "step": 3490 }, { "epoch": 0.7602085143353605, "grad_norm": 0.19622226059436798, "learning_rate": 4.049739357080799e-05, "loss": 0.0151, "step": 3500 }, { "epoch": 0.762380538662033, "grad_norm": 0.04174257442355156, "learning_rate": 4.047024326672459e-05, "loss": 0.0105, "step": 3510 }, { "epoch": 0.7645525629887054, "grad_norm": 1.6611101627349854, "learning_rate": 4.0443092962641185e-05, "loss": 0.0353, "step": 3520 }, { "epoch": 0.766724587315378, "grad_norm": 1.0151467323303223, "learning_rate": 4.041594265855778e-05, "loss": 0.0443, "step": 3530 }, { "epoch": 0.7688966116420504, "grad_norm": 0.18949908018112183, "learning_rate": 4.038879235447437e-05, "loss": 0.006, "step": 3540 }, { "epoch": 0.7710686359687229, "grad_norm": 0.019808197394013405, "learning_rate": 4.036164205039097e-05, "loss": 0.0038, "step": 3550 }, { "epoch": 0.7732406602953953, "grad_norm": 0.05713880434632301, "learning_rate": 4.0334491746307564e-05, "loss": 0.0373, "step": 3560 }, { "epoch": 0.7754126846220678, "grad_norm": 0.029946787282824516, "learning_rate": 4.030734144222416e-05, "loss": 0.0299, "step": 3570 }, { "epoch": 0.7775847089487402, "grad_norm": 0.07558543980121613, "learning_rate": 4.028019113814075e-05, "loss": 0.0565, "step": 3580 }, { "epoch": 0.7797567332754127, "grad_norm": 0.03241603448987007, "learning_rate": 4.025304083405734e-05, "loss": 0.0051, "step": 3590 }, { "epoch": 0.7819287576020851, "grad_norm": 0.02400217391550541, "learning_rate": 4.0225890529973936e-05, "loss": 0.0363, "step": 3600 }, { "epoch": 0.7841007819287577, "grad_norm": 0.13099756836891174, "learning_rate": 4.019874022589053e-05, "loss": 0.0175, "step": 3610 }, { "epoch": 0.7862728062554301, "grad_norm": 0.7141011357307434, "learning_rate": 4.017158992180712e-05, "loss": 0.0122, "step": 3620 }, { "epoch": 0.7884448305821026, "grad_norm": 0.04486560821533203, "learning_rate": 4.014443961772372e-05, "loss": 0.0279, "step": 3630 }, { "epoch": 0.790616854908775, "grad_norm": 0.02008930593729019, "learning_rate": 4.0117289313640315e-05, "loss": 0.0497, "step": 3640 }, { "epoch": 0.7927888792354474, "grad_norm": 0.1151106208562851, "learning_rate": 4.009013900955691e-05, "loss": 0.0327, "step": 3650 }, { "epoch": 0.7949609035621199, "grad_norm": 0.02791333571076393, "learning_rate": 4.00629887054735e-05, "loss": 0.0057, "step": 3660 }, { "epoch": 0.7971329278887923, "grad_norm": 1.612856388092041, "learning_rate": 4.0035838401390094e-05, "loss": 0.0368, "step": 3670 }, { "epoch": 0.7993049522154648, "grad_norm": 0.03310969099402428, "learning_rate": 4.0008688097306694e-05, "loss": 0.0023, "step": 3680 }, { "epoch": 0.8014769765421372, "grad_norm": 0.021099913865327835, "learning_rate": 3.9981537793223287e-05, "loss": 0.0265, "step": 3690 }, { "epoch": 0.8036490008688097, "grad_norm": 0.03919641301035881, "learning_rate": 3.995438748913988e-05, "loss": 0.0393, "step": 3700 }, { "epoch": 0.8058210251954822, "grad_norm": 2.8318557739257812, "learning_rate": 3.992723718505648e-05, "loss": 0.0158, "step": 3710 }, { "epoch": 0.8079930495221547, "grad_norm": 0.07735323160886765, "learning_rate": 3.990008688097307e-05, "loss": 0.0029, "step": 3720 }, { "epoch": 0.8101650738488271, "grad_norm": 0.0240157600492239, "learning_rate": 3.9872936576889665e-05, "loss": 0.0233, "step": 3730 }, { "epoch": 0.8123370981754996, "grad_norm": 0.04485835134983063, "learning_rate": 3.984578627280626e-05, "loss": 0.0169, "step": 3740 }, { "epoch": 0.814509122502172, "grad_norm": 5.289847373962402, "learning_rate": 3.981863596872285e-05, "loss": 0.0335, "step": 3750 }, { "epoch": 0.8166811468288445, "grad_norm": 0.018795961514115334, "learning_rate": 3.9791485664639444e-05, "loss": 0.0383, "step": 3760 }, { "epoch": 0.8188531711555169, "grad_norm": 0.18189352750778198, "learning_rate": 3.976433536055604e-05, "loss": 0.0718, "step": 3770 }, { "epoch": 0.8210251954821894, "grad_norm": 0.050412606447935104, "learning_rate": 3.973718505647264e-05, "loss": 0.0136, "step": 3780 }, { "epoch": 0.8231972198088618, "grad_norm": 0.21731217205524445, "learning_rate": 3.971003475238923e-05, "loss": 0.0607, "step": 3790 }, { "epoch": 0.8253692441355344, "grad_norm": 0.04902643337845802, "learning_rate": 3.968288444830582e-05, "loss": 0.0061, "step": 3800 }, { "epoch": 0.8275412684622068, "grad_norm": 0.015945184975862503, "learning_rate": 3.9655734144222416e-05, "loss": 0.0021, "step": 3810 }, { "epoch": 0.8297132927888793, "grad_norm": 0.05196581408381462, "learning_rate": 3.962858384013901e-05, "loss": 0.0584, "step": 3820 }, { "epoch": 0.8318853171155517, "grad_norm": 1.6970964670181274, "learning_rate": 3.96014335360556e-05, "loss": 0.0235, "step": 3830 }, { "epoch": 0.8340573414422241, "grad_norm": 0.01579507440328598, "learning_rate": 3.9574283231972195e-05, "loss": 0.0141, "step": 3840 }, { "epoch": 0.8362293657688966, "grad_norm": 0.015089421533048153, "learning_rate": 3.9547132927888795e-05, "loss": 0.0338, "step": 3850 }, { "epoch": 0.838401390095569, "grad_norm": 1.4259192943572998, "learning_rate": 3.951998262380539e-05, "loss": 0.0454, "step": 3860 }, { "epoch": 0.8405734144222415, "grad_norm": 0.6754148602485657, "learning_rate": 3.949283231972198e-05, "loss": 0.08, "step": 3870 }, { "epoch": 0.8427454387489139, "grad_norm": 0.045398175716400146, "learning_rate": 3.946568201563858e-05, "loss": 0.043, "step": 3880 }, { "epoch": 0.8449174630755865, "grad_norm": 0.023143045604228973, "learning_rate": 3.9438531711555174e-05, "loss": 0.0036, "step": 3890 }, { "epoch": 0.8470894874022589, "grad_norm": 0.015820972621440887, "learning_rate": 3.941138140747177e-05, "loss": 0.0062, "step": 3900 }, { "epoch": 0.8492615117289314, "grad_norm": 0.014995508827269077, "learning_rate": 3.938423110338836e-05, "loss": 0.0318, "step": 3910 }, { "epoch": 0.8514335360556038, "grad_norm": 0.01640624739229679, "learning_rate": 3.935708079930495e-05, "loss": 0.04, "step": 3920 }, { "epoch": 0.8536055603822763, "grad_norm": 0.09035991877317429, "learning_rate": 3.932993049522155e-05, "loss": 0.0473, "step": 3930 }, { "epoch": 0.8557775847089487, "grad_norm": 0.1076781302690506, "learning_rate": 3.9302780191138146e-05, "loss": 0.0227, "step": 3940 }, { "epoch": 0.8579496090356212, "grad_norm": 0.017277223989367485, "learning_rate": 3.927562988705474e-05, "loss": 0.0039, "step": 3950 }, { "epoch": 0.8601216333622936, "grad_norm": 0.01418287679553032, "learning_rate": 3.924847958297133e-05, "loss": 0.0016, "step": 3960 }, { "epoch": 0.8622936576889662, "grad_norm": 0.013834814541041851, "learning_rate": 3.9221329278887925e-05, "loss": 0.0017, "step": 3970 }, { "epoch": 0.8644656820156386, "grad_norm": 0.013640154153108597, "learning_rate": 3.919417897480452e-05, "loss": 0.0423, "step": 3980 }, { "epoch": 0.8666377063423111, "grad_norm": 0.09465904533863068, "learning_rate": 3.916702867072111e-05, "loss": 0.0823, "step": 3990 }, { "epoch": 0.8688097306689835, "grad_norm": 0.07072905451059341, "learning_rate": 3.9139878366637704e-05, "loss": 0.0544, "step": 4000 }, { "epoch": 0.870981754995656, "grad_norm": 0.04354293271899223, "learning_rate": 3.9112728062554304e-05, "loss": 0.0084, "step": 4010 }, { "epoch": 0.8731537793223284, "grad_norm": 0.017259210348129272, "learning_rate": 3.90855777584709e-05, "loss": 0.0019, "step": 4020 }, { "epoch": 0.8753258036490009, "grad_norm": 0.018902383744716644, "learning_rate": 3.905842745438749e-05, "loss": 0.0355, "step": 4030 }, { "epoch": 0.8774978279756733, "grad_norm": 0.022762592881917953, "learning_rate": 3.903127715030408e-05, "loss": 0.0376, "step": 4040 }, { "epoch": 0.8796698523023457, "grad_norm": 0.810174822807312, "learning_rate": 3.9004126846220676e-05, "loss": 0.1255, "step": 4050 }, { "epoch": 0.8818418766290183, "grad_norm": 0.024319645017385483, "learning_rate": 3.8976976542137276e-05, "loss": 0.0112, "step": 4060 }, { "epoch": 0.8840139009556907, "grad_norm": 0.01799897663295269, "learning_rate": 3.894982623805387e-05, "loss": 0.0022, "step": 4070 }, { "epoch": 0.8861859252823632, "grad_norm": 0.016442058607935905, "learning_rate": 3.892267593397046e-05, "loss": 0.0018, "step": 4080 }, { "epoch": 0.8883579496090356, "grad_norm": 0.015257969498634338, "learning_rate": 3.889552562988706e-05, "loss": 0.064, "step": 4090 }, { "epoch": 0.8905299739357081, "grad_norm": 0.03301286697387695, "learning_rate": 3.8868375325803654e-05, "loss": 0.0188, "step": 4100 }, { "epoch": 0.8927019982623805, "grad_norm": 0.09779565036296844, "learning_rate": 3.884122502172025e-05, "loss": 0.0401, "step": 4110 }, { "epoch": 0.894874022589053, "grad_norm": 0.10050684213638306, "learning_rate": 3.881407471763684e-05, "loss": 0.0068, "step": 4120 }, { "epoch": 0.8970460469157254, "grad_norm": 1.210329294204712, "learning_rate": 3.8786924413553433e-05, "loss": 0.0995, "step": 4130 }, { "epoch": 0.8992180712423979, "grad_norm": 2.245310068130493, "learning_rate": 3.876248913987837e-05, "loss": 0.1687, "step": 4140 }, { "epoch": 0.9013900955690703, "grad_norm": 0.03958917781710625, "learning_rate": 3.873533883579497e-05, "loss": 0.0064, "step": 4150 }, { "epoch": 0.9035621198957429, "grad_norm": 0.018245557323098183, "learning_rate": 3.870818853171156e-05, "loss": 0.03, "step": 4160 }, { "epoch": 0.9057341442224153, "grad_norm": 0.017429698258638382, "learning_rate": 3.868103822762815e-05, "loss": 0.0024, "step": 4170 }, { "epoch": 0.9079061685490878, "grad_norm": 0.029029618948698044, "learning_rate": 3.8653887923544746e-05, "loss": 0.0026, "step": 4180 }, { "epoch": 0.9100781928757602, "grad_norm": 0.034047931432724, "learning_rate": 3.862673761946134e-05, "loss": 0.0189, "step": 4190 }, { "epoch": 0.9122502172024327, "grad_norm": 0.06103040277957916, "learning_rate": 3.859958731537793e-05, "loss": 0.0151, "step": 4200 }, { "epoch": 0.9144222415291051, "grad_norm": 0.018298335373401642, "learning_rate": 3.8572437011294525e-05, "loss": 0.0399, "step": 4210 }, { "epoch": 0.9165942658557776, "grad_norm": 0.018746716901659966, "learning_rate": 3.854528670721112e-05, "loss": 0.0136, "step": 4220 }, { "epoch": 0.91876629018245, "grad_norm": 2.7724406719207764, "learning_rate": 3.851813640312772e-05, "loss": 0.0163, "step": 4230 }, { "epoch": 0.9209383145091226, "grad_norm": 0.03073030896484852, "learning_rate": 3.849098609904431e-05, "loss": 0.0025, "step": 4240 }, { "epoch": 0.923110338835795, "grad_norm": 0.06461982429027557, "learning_rate": 3.8463835794960904e-05, "loss": 0.0488, "step": 4250 }, { "epoch": 0.9252823631624674, "grad_norm": 0.023927874863147736, "learning_rate": 3.84366854908775e-05, "loss": 0.0078, "step": 4260 }, { "epoch": 0.9274543874891399, "grad_norm": 0.022562723606824875, "learning_rate": 3.840953518679409e-05, "loss": 0.0357, "step": 4270 }, { "epoch": 0.9296264118158123, "grad_norm": 0.10135874897241592, "learning_rate": 3.838238488271069e-05, "loss": 0.0118, "step": 4280 }, { "epoch": 0.9317984361424848, "grad_norm": 0.014547540806233883, "learning_rate": 3.835523457862728e-05, "loss": 0.022, "step": 4290 }, { "epoch": 0.9339704604691572, "grad_norm": 0.013702181167900562, "learning_rate": 3.8328084274543876e-05, "loss": 0.0181, "step": 4300 }, { "epoch": 0.9361424847958297, "grad_norm": 0.022185347974300385, "learning_rate": 3.8300933970460476e-05, "loss": 0.0714, "step": 4310 }, { "epoch": 0.9383145091225021, "grad_norm": 0.04213215410709381, "learning_rate": 3.827378366637707e-05, "loss": 0.0432, "step": 4320 }, { "epoch": 0.9404865334491747, "grad_norm": 1.9515026807785034, "learning_rate": 3.824663336229366e-05, "loss": 0.0362, "step": 4330 }, { "epoch": 0.9426585577758471, "grad_norm": 0.03761598840355873, "learning_rate": 3.8219483058210255e-05, "loss": 0.0458, "step": 4340 }, { "epoch": 0.9448305821025196, "grad_norm": 0.0594487339258194, "learning_rate": 3.819233275412685e-05, "loss": 0.0042, "step": 4350 }, { "epoch": 0.947002606429192, "grad_norm": 0.01828021928668022, "learning_rate": 3.816518245004344e-05, "loss": 0.0374, "step": 4360 }, { "epoch": 0.9491746307558645, "grad_norm": 0.01723085157573223, "learning_rate": 3.8138032145960034e-05, "loss": 0.0033, "step": 4370 }, { "epoch": 0.9513466550825369, "grad_norm": 1.6202086210250854, "learning_rate": 3.8110881841876633e-05, "loss": 0.0281, "step": 4380 }, { "epoch": 0.9535186794092094, "grad_norm": 1.0708816051483154, "learning_rate": 3.8083731537793227e-05, "loss": 0.0467, "step": 4390 }, { "epoch": 0.9556907037358818, "grad_norm": 0.08704803138971329, "learning_rate": 3.805658123370982e-05, "loss": 0.0152, "step": 4400 }, { "epoch": 0.9578627280625543, "grad_norm": 0.014155888929963112, "learning_rate": 3.802943092962641e-05, "loss": 0.0148, "step": 4410 }, { "epoch": 0.9600347523892268, "grad_norm": 0.02981710433959961, "learning_rate": 3.8002280625543006e-05, "loss": 0.0496, "step": 4420 }, { "epoch": 0.9622067767158993, "grad_norm": 0.05970924347639084, "learning_rate": 3.79751303214596e-05, "loss": 0.0223, "step": 4430 }, { "epoch": 0.9643788010425717, "grad_norm": 0.03441289812326431, "learning_rate": 3.794798001737619e-05, "loss": 0.0024, "step": 4440 }, { "epoch": 0.9665508253692441, "grad_norm": 0.024355070665478706, "learning_rate": 3.792082971329279e-05, "loss": 0.0502, "step": 4450 }, { "epoch": 0.9687228496959166, "grad_norm": 0.10018374770879745, "learning_rate": 3.7893679409209384e-05, "loss": 0.0919, "step": 4460 }, { "epoch": 0.970894874022589, "grad_norm": 0.0919993445277214, "learning_rate": 3.786652910512598e-05, "loss": 0.0154, "step": 4470 }, { "epoch": 0.9730668983492615, "grad_norm": 0.03763913735747337, "learning_rate": 3.783937880104258e-05, "loss": 0.0172, "step": 4480 }, { "epoch": 0.9752389226759339, "grad_norm": 0.17443352937698364, "learning_rate": 3.781222849695917e-05, "loss": 0.0353, "step": 4490 }, { "epoch": 0.9774109470026064, "grad_norm": 0.08582145720720291, "learning_rate": 3.778507819287576e-05, "loss": 0.0328, "step": 4500 }, { "epoch": 0.9795829713292789, "grad_norm": 0.017119983211159706, "learning_rate": 3.7757927888792356e-05, "loss": 0.0056, "step": 4510 }, { "epoch": 0.9817549956559514, "grad_norm": 0.014104608446359634, "learning_rate": 3.773077758470895e-05, "loss": 0.0532, "step": 4520 }, { "epoch": 0.9839270199826238, "grad_norm": 0.13239271938800812, "learning_rate": 3.770362728062555e-05, "loss": 0.0027, "step": 4530 }, { "epoch": 0.9860990443092963, "grad_norm": 0.020031528547406197, "learning_rate": 3.767647697654214e-05, "loss": 0.0028, "step": 4540 }, { "epoch": 0.9882710686359687, "grad_norm": 0.014798794873058796, "learning_rate": 3.7649326672458735e-05, "loss": 0.0385, "step": 4550 }, { "epoch": 0.9904430929626412, "grad_norm": 0.16992108523845673, "learning_rate": 3.762217636837533e-05, "loss": 0.0496, "step": 4560 }, { "epoch": 0.9926151172893136, "grad_norm": 0.06289473921060562, "learning_rate": 3.759502606429192e-05, "loss": 0.0077, "step": 4570 }, { "epoch": 0.9947871416159861, "grad_norm": 0.03464280068874359, "learning_rate": 3.7567875760208514e-05, "loss": 0.0296, "step": 4580 }, { "epoch": 0.9969591659426585, "grad_norm": 0.01265657227486372, "learning_rate": 3.754072545612511e-05, "loss": 0.0051, "step": 4590 }, { "epoch": 0.9991311902693311, "grad_norm": 0.8480960130691528, "learning_rate": 3.75135751520417e-05, "loss": 0.0462, "step": 4600 }, { "epoch": 1.0, "eval_f1": 0.4793388429752066, "eval_loss": 0.05696646869182587, "eval_runtime": 83.2949, "eval_samples_per_second": 119.755, "eval_steps_per_second": 7.491, "step": 4604 }, { "epoch": 1.0013032145960035, "grad_norm": 2.0061440467834473, "learning_rate": 3.74864248479583e-05, "loss": 0.0313, "step": 4610 }, { "epoch": 1.003475238922676, "grad_norm": 0.05946578085422516, "learning_rate": 3.745927454387489e-05, "loss": 0.0033, "step": 4620 }, { "epoch": 1.0056472632493485, "grad_norm": 0.04357343912124634, "learning_rate": 3.7432124239791486e-05, "loss": 0.0493, "step": 4630 }, { "epoch": 1.0078192875760208, "grad_norm": 0.03947281464934349, "learning_rate": 3.740497393570808e-05, "loss": 0.0024, "step": 4640 }, { "epoch": 1.0099913119026933, "grad_norm": 0.016838541254401207, "learning_rate": 3.737782363162468e-05, "loss": 0.0163, "step": 4650 }, { "epoch": 1.0121633362293658, "grad_norm": 0.06586115807294846, "learning_rate": 3.735067332754127e-05, "loss": 0.0603, "step": 4660 }, { "epoch": 1.0143353605560381, "grad_norm": 1.1860322952270508, "learning_rate": 3.7323523023457865e-05, "loss": 0.0339, "step": 4670 }, { "epoch": 1.0165073848827106, "grad_norm": 0.16159577667713165, "learning_rate": 3.729637271937446e-05, "loss": 0.0216, "step": 4680 }, { "epoch": 1.0186794092093832, "grad_norm": 0.01223788969218731, "learning_rate": 3.726922241529106e-05, "loss": 0.0045, "step": 4690 }, { "epoch": 1.0208514335360557, "grad_norm": 0.011866359040141106, "learning_rate": 3.724207211120765e-05, "loss": 0.0014, "step": 4700 }, { "epoch": 1.023023457862728, "grad_norm": 0.011750188656151295, "learning_rate": 3.7214921807124244e-05, "loss": 0.0295, "step": 4710 }, { "epoch": 1.0251954821894005, "grad_norm": 0.028903665021061897, "learning_rate": 3.718777150304084e-05, "loss": 0.0051, "step": 4720 }, { "epoch": 1.027367506516073, "grad_norm": 0.034531235694885254, "learning_rate": 3.716062119895743e-05, "loss": 0.0242, "step": 4730 }, { "epoch": 1.0295395308427455, "grad_norm": 0.078121617436409, "learning_rate": 3.713347089487402e-05, "loss": 0.0477, "step": 4740 }, { "epoch": 1.0317115551694178, "grad_norm": 0.06223325431346893, "learning_rate": 3.7106320590790616e-05, "loss": 0.0186, "step": 4750 }, { "epoch": 1.0338835794960903, "grad_norm": 0.052579279989004135, "learning_rate": 3.7079170286707216e-05, "loss": 0.0125, "step": 4760 }, { "epoch": 1.0360556038227628, "grad_norm": 1.9749809503555298, "learning_rate": 3.705201998262381e-05, "loss": 0.0553, "step": 4770 }, { "epoch": 1.0382276281494354, "grad_norm": 0.20051714777946472, "learning_rate": 3.70248696785404e-05, "loss": 0.0504, "step": 4780 }, { "epoch": 1.0403996524761077, "grad_norm": 0.011600709520280361, "learning_rate": 3.6997719374456995e-05, "loss": 0.0343, "step": 4790 }, { "epoch": 1.0425716768027802, "grad_norm": 4.0308122634887695, "learning_rate": 3.697056907037359e-05, "loss": 0.0265, "step": 4800 }, { "epoch": 1.0447437011294527, "grad_norm": 1.1246380805969238, "learning_rate": 3.694341876629018e-05, "loss": 0.0407, "step": 4810 }, { "epoch": 1.0469157254561252, "grad_norm": 0.07232671976089478, "learning_rate": 3.6916268462206774e-05, "loss": 0.0088, "step": 4820 }, { "epoch": 1.0490877497827975, "grad_norm": 0.03161951154470444, "learning_rate": 3.6889118158123373e-05, "loss": 0.038, "step": 4830 }, { "epoch": 1.05125977410947, "grad_norm": 0.08089262247085571, "learning_rate": 3.6861967854039967e-05, "loss": 0.0022, "step": 4840 }, { "epoch": 1.0534317984361425, "grad_norm": 0.019513120874762535, "learning_rate": 3.683481754995656e-05, "loss": 0.0115, "step": 4850 }, { "epoch": 1.0556038227628148, "grad_norm": 0.010743933729827404, "learning_rate": 3.680766724587316e-05, "loss": 0.0013, "step": 4860 }, { "epoch": 1.0577758470894874, "grad_norm": 0.010847543366253376, "learning_rate": 3.678051694178975e-05, "loss": 0.0013, "step": 4870 }, { "epoch": 1.0599478714161599, "grad_norm": 0.010532297194004059, "learning_rate": 3.6753366637706345e-05, "loss": 0.0092, "step": 4880 }, { "epoch": 1.0621198957428324, "grad_norm": 0.010408210568130016, "learning_rate": 3.672621633362294e-05, "loss": 0.0424, "step": 4890 }, { "epoch": 1.0642919200695047, "grad_norm": 0.055000320076942444, "learning_rate": 3.669906602953953e-05, "loss": 0.0496, "step": 4900 }, { "epoch": 1.0664639443961772, "grad_norm": 0.03006312996149063, "learning_rate": 3.667191572545613e-05, "loss": 0.0073, "step": 4910 }, { "epoch": 1.0686359687228497, "grad_norm": 0.02324208803474903, "learning_rate": 3.6644765421372724e-05, "loss": 0.0053, "step": 4920 }, { "epoch": 1.0708079930495222, "grad_norm": 0.015525688417255878, "learning_rate": 3.661761511728932e-05, "loss": 0.052, "step": 4930 }, { "epoch": 1.0729800173761945, "grad_norm": 0.5552154779434204, "learning_rate": 3.659046481320591e-05, "loss": 0.0397, "step": 4940 }, { "epoch": 1.075152041702867, "grad_norm": 0.14947287738323212, "learning_rate": 3.65633145091225e-05, "loss": 0.0036, "step": 4950 }, { "epoch": 1.0773240660295396, "grad_norm": 0.018405791372060776, "learning_rate": 3.6536164205039096e-05, "loss": 0.0019, "step": 4960 }, { "epoch": 1.079496090356212, "grad_norm": 0.012576512061059475, "learning_rate": 3.650901390095569e-05, "loss": 0.0031, "step": 4970 }, { "epoch": 1.0816681146828844, "grad_norm": 0.4258500933647156, "learning_rate": 3.648186359687228e-05, "loss": 0.0888, "step": 4980 }, { "epoch": 1.083840139009557, "grad_norm": 0.11915474385023117, "learning_rate": 3.645471329278888e-05, "loss": 0.0081, "step": 4990 }, { "epoch": 1.0860121633362294, "grad_norm": 0.011587638407945633, "learning_rate": 3.6427562988705475e-05, "loss": 0.0148, "step": 5000 }, { "epoch": 1.088184187662902, "grad_norm": 0.014459837228059769, "learning_rate": 3.640041268462207e-05, "loss": 0.0026, "step": 5010 }, { "epoch": 1.0903562119895742, "grad_norm": 0.010975486598908901, "learning_rate": 3.637326238053866e-05, "loss": 0.0292, "step": 5020 }, { "epoch": 1.0925282363162467, "grad_norm": 0.1021094024181366, "learning_rate": 3.634611207645526e-05, "loss": 0.0045, "step": 5030 }, { "epoch": 1.0947002606429193, "grad_norm": 0.030722634866833687, "learning_rate": 3.6318961772371854e-05, "loss": 0.0021, "step": 5040 }, { "epoch": 1.0968722849695918, "grad_norm": 0.010209214873611927, "learning_rate": 3.629181146828845e-05, "loss": 0.0021, "step": 5050 }, { "epoch": 1.099044309296264, "grad_norm": 0.009853394702076912, "learning_rate": 3.626466116420504e-05, "loss": 0.0017, "step": 5060 }, { "epoch": 1.1012163336229366, "grad_norm": 0.009726290591061115, "learning_rate": 3.623751086012164e-05, "loss": 0.0319, "step": 5070 }, { "epoch": 1.103388357949609, "grad_norm": 0.010656571947038174, "learning_rate": 3.621036055603823e-05, "loss": 0.0017, "step": 5080 }, { "epoch": 1.1055603822762814, "grad_norm": 0.009740895591676235, "learning_rate": 3.6183210251954826e-05, "loss": 0.0016, "step": 5090 }, { "epoch": 1.107732406602954, "grad_norm": 0.009546713903546333, "learning_rate": 3.615605994787142e-05, "loss": 0.0027, "step": 5100 }, { "epoch": 1.1099044309296264, "grad_norm": 0.11545804142951965, "learning_rate": 3.612890964378801e-05, "loss": 0.032, "step": 5110 }, { "epoch": 1.112076455256299, "grad_norm": 0.23652075231075287, "learning_rate": 3.6101759339704605e-05, "loss": 0.0037, "step": 5120 }, { "epoch": 1.1142484795829712, "grad_norm": 0.009880350902676582, "learning_rate": 3.60746090356212e-05, "loss": 0.0279, "step": 5130 }, { "epoch": 1.1164205039096438, "grad_norm": 0.057632774114608765, "learning_rate": 3.60474587315378e-05, "loss": 0.0849, "step": 5140 }, { "epoch": 1.1185925282363163, "grad_norm": 0.07699901610612869, "learning_rate": 3.602030842745439e-05, "loss": 0.005, "step": 5150 }, { "epoch": 1.1207645525629888, "grad_norm": 0.5865737795829773, "learning_rate": 3.5993158123370984e-05, "loss": 0.0401, "step": 5160 }, { "epoch": 1.122936576889661, "grad_norm": 0.03473491966724396, "learning_rate": 3.596600781928758e-05, "loss": 0.0057, "step": 5170 }, { "epoch": 1.1251086012163336, "grad_norm": 3.234090805053711, "learning_rate": 3.593885751520417e-05, "loss": 0.045, "step": 5180 }, { "epoch": 1.1272806255430061, "grad_norm": 0.038007259368896484, "learning_rate": 3.591170721112076e-05, "loss": 0.0038, "step": 5190 }, { "epoch": 1.1294526498696786, "grad_norm": 8.481440544128418, "learning_rate": 3.5884556907037356e-05, "loss": 0.0237, "step": 5200 }, { "epoch": 1.131624674196351, "grad_norm": 0.010371362790465355, "learning_rate": 3.5857406602953956e-05, "loss": 0.0243, "step": 5210 }, { "epoch": 1.1337966985230234, "grad_norm": 0.013176986016333103, "learning_rate": 3.583025629887055e-05, "loss": 0.032, "step": 5220 }, { "epoch": 1.135968722849696, "grad_norm": 0.0369015671312809, "learning_rate": 3.580310599478715e-05, "loss": 0.0279, "step": 5230 }, { "epoch": 1.1381407471763683, "grad_norm": 0.18637622892856598, "learning_rate": 3.577595569070374e-05, "loss": 0.0033, "step": 5240 }, { "epoch": 1.1403127715030408, "grad_norm": 0.05641024932265282, "learning_rate": 3.5748805386620334e-05, "loss": 0.014, "step": 5250 }, { "epoch": 1.1424847958297133, "grad_norm": 0.19416458904743195, "learning_rate": 3.572165508253693e-05, "loss": 0.004, "step": 5260 }, { "epoch": 1.1446568201563858, "grad_norm": 0.009675233624875546, "learning_rate": 3.569450477845352e-05, "loss": 0.0154, "step": 5270 }, { "epoch": 1.1468288444830583, "grad_norm": 0.019023172557353973, "learning_rate": 3.5667354474370113e-05, "loss": 0.0136, "step": 5280 }, { "epoch": 1.1490008688097306, "grad_norm": 0.02198546566069126, "learning_rate": 3.5640204170286706e-05, "loss": 0.0453, "step": 5290 }, { "epoch": 1.1511728931364031, "grad_norm": 0.47042351961135864, "learning_rate": 3.5613053866203306e-05, "loss": 0.0341, "step": 5300 }, { "epoch": 1.1533449174630757, "grad_norm": 0.008669150061905384, "learning_rate": 3.55859035621199e-05, "loss": 0.0225, "step": 5310 }, { "epoch": 1.155516941789748, "grad_norm": 0.008939978666603565, "learning_rate": 3.555875325803649e-05, "loss": 0.0277, "step": 5320 }, { "epoch": 1.1576889661164205, "grad_norm": 2.583348512649536, "learning_rate": 3.5531602953953085e-05, "loss": 0.147, "step": 5330 }, { "epoch": 1.159860990443093, "grad_norm": 0.8401560187339783, "learning_rate": 3.550445264986968e-05, "loss": 0.0502, "step": 5340 }, { "epoch": 1.1620330147697655, "grad_norm": 0.07709191739559174, "learning_rate": 3.547730234578627e-05, "loss": 0.0124, "step": 5350 }, { "epoch": 1.1642050390964378, "grad_norm": 0.010382450185716152, "learning_rate": 3.5450152041702864e-05, "loss": 0.0038, "step": 5360 }, { "epoch": 1.1663770634231103, "grad_norm": 0.2649940252304077, "learning_rate": 3.5423001737619464e-05, "loss": 0.0034, "step": 5370 }, { "epoch": 1.1685490877497828, "grad_norm": 0.013010102324187756, "learning_rate": 3.539585143353606e-05, "loss": 0.0011, "step": 5380 }, { "epoch": 1.1707211120764554, "grad_norm": 0.014443314634263515, "learning_rate": 3.536870112945265e-05, "loss": 0.0625, "step": 5390 }, { "epoch": 1.1728931364031276, "grad_norm": 1.5826534032821655, "learning_rate": 3.534155082536924e-05, "loss": 0.0399, "step": 5400 }, { "epoch": 1.1750651607298002, "grad_norm": 0.1101546585559845, "learning_rate": 3.531440052128584e-05, "loss": 0.0501, "step": 5410 }, { "epoch": 1.1772371850564727, "grad_norm": 0.4018704891204834, "learning_rate": 3.5287250217202436e-05, "loss": 0.0336, "step": 5420 }, { "epoch": 1.1794092093831452, "grad_norm": 0.009711096063256264, "learning_rate": 3.526009991311903e-05, "loss": 0.005, "step": 5430 }, { "epoch": 1.1815812337098175, "grad_norm": 0.01917942240834236, "learning_rate": 3.523294960903562e-05, "loss": 0.1212, "step": 5440 }, { "epoch": 1.18375325803649, "grad_norm": 0.02508840151131153, "learning_rate": 3.520579930495222e-05, "loss": 0.0028, "step": 5450 }, { "epoch": 1.1859252823631625, "grad_norm": 0.0388546884059906, "learning_rate": 3.5178649000868815e-05, "loss": 0.0312, "step": 5460 }, { "epoch": 1.1880973066898348, "grad_norm": 0.06240135803818703, "learning_rate": 3.515149869678541e-05, "loss": 0.0353, "step": 5470 }, { "epoch": 1.1902693310165073, "grad_norm": 0.061889566481113434, "learning_rate": 3.5124348392702e-05, "loss": 0.0255, "step": 5480 }, { "epoch": 1.1924413553431799, "grad_norm": 0.07585006207227707, "learning_rate": 3.5097198088618594e-05, "loss": 0.004, "step": 5490 }, { "epoch": 1.1946133796698524, "grad_norm": 0.039220329374074936, "learning_rate": 3.507004778453519e-05, "loss": 0.024, "step": 5500 }, { "epoch": 1.1967854039965247, "grad_norm": 0.17422033846378326, "learning_rate": 3.504289748045178e-05, "loss": 0.0257, "step": 5510 }, { "epoch": 1.1989574283231972, "grad_norm": 0.02887544222176075, "learning_rate": 3.501574717636838e-05, "loss": 0.0038, "step": 5520 }, { "epoch": 1.2011294526498697, "grad_norm": 0.044750556349754333, "learning_rate": 3.498859687228497e-05, "loss": 0.0329, "step": 5530 }, { "epoch": 1.2033014769765422, "grad_norm": 0.00962216965854168, "learning_rate": 3.4961446568201566e-05, "loss": 0.0027, "step": 5540 }, { "epoch": 1.2054735013032145, "grad_norm": 0.008415351621806622, "learning_rate": 3.493429626411816e-05, "loss": 0.0022, "step": 5550 }, { "epoch": 1.207645525629887, "grad_norm": 0.1523517519235611, "learning_rate": 3.490714596003475e-05, "loss": 0.0168, "step": 5560 }, { "epoch": 1.2098175499565595, "grad_norm": 0.008230285719037056, "learning_rate": 3.4879995655951345e-05, "loss": 0.0202, "step": 5570 }, { "epoch": 1.211989574283232, "grad_norm": 0.008194214664399624, "learning_rate": 3.485284535186794e-05, "loss": 0.0053, "step": 5580 }, { "epoch": 1.2141615986099044, "grad_norm": 0.008157436735928059, "learning_rate": 3.482569504778454e-05, "loss": 0.008, "step": 5590 }, { "epoch": 1.2163336229365769, "grad_norm": 0.008164693601429462, "learning_rate": 3.479854474370113e-05, "loss": 0.0326, "step": 5600 }, { "epoch": 1.2185056472632494, "grad_norm": 0.01076839491724968, "learning_rate": 3.477139443961773e-05, "loss": 0.0011, "step": 5610 }, { "epoch": 1.2206776715899217, "grad_norm": 0.007989328354597092, "learning_rate": 3.4744244135534323e-05, "loss": 0.0058, "step": 5620 }, { "epoch": 1.2228496959165942, "grad_norm": 0.08358601480722427, "learning_rate": 3.4717093831450917e-05, "loss": 0.045, "step": 5630 }, { "epoch": 1.2250217202432667, "grad_norm": 0.08348576724529266, "learning_rate": 3.468994352736751e-05, "loss": 0.0024, "step": 5640 }, { "epoch": 1.2271937445699392, "grad_norm": 0.008693045936524868, "learning_rate": 3.46627932232841e-05, "loss": 0.0026, "step": 5650 }, { "epoch": 1.2293657688966118, "grad_norm": 0.007211147341877222, "learning_rate": 3.4635642919200696e-05, "loss": 0.0096, "step": 5660 }, { "epoch": 1.231537793223284, "grad_norm": 0.007140511646866798, "learning_rate": 3.460849261511729e-05, "loss": 0.0008, "step": 5670 }, { "epoch": 1.2337098175499566, "grad_norm": 0.8093301653862, "learning_rate": 3.458134231103389e-05, "loss": 0.0773, "step": 5680 }, { "epoch": 1.235881841876629, "grad_norm": 0.04519034922122955, "learning_rate": 3.455419200695048e-05, "loss": 0.0012, "step": 5690 }, { "epoch": 1.2380538662033014, "grad_norm": 1.5662466287612915, "learning_rate": 3.4527041702867074e-05, "loss": 0.0354, "step": 5700 }, { "epoch": 1.240225890529974, "grad_norm": 0.051535408943891525, "learning_rate": 3.449989139878367e-05, "loss": 0.0272, "step": 5710 }, { "epoch": 1.2423979148566464, "grad_norm": 0.008189026266336441, "learning_rate": 3.447274109470026e-05, "loss": 0.0048, "step": 5720 }, { "epoch": 1.244569939183319, "grad_norm": 0.007900476455688477, "learning_rate": 3.4445590790616853e-05, "loss": 0.0009, "step": 5730 }, { "epoch": 1.2467419635099912, "grad_norm": 0.00759408064186573, "learning_rate": 3.4418440486533446e-05, "loss": 0.0008, "step": 5740 }, { "epoch": 1.2489139878366637, "grad_norm": 0.7654576301574707, "learning_rate": 3.4391290182450046e-05, "loss": 0.0588, "step": 5750 }, { "epoch": 1.2510860121633363, "grad_norm": 0.008799172006547451, "learning_rate": 3.436413987836664e-05, "loss": 0.0014, "step": 5760 }, { "epoch": 1.2532580364900086, "grad_norm": 0.011267498135566711, "learning_rate": 3.433698957428323e-05, "loss": 0.0523, "step": 5770 }, { "epoch": 1.255430060816681, "grad_norm": 0.029857400804758072, "learning_rate": 3.4309839270199825e-05, "loss": 0.0369, "step": 5780 }, { "epoch": 1.2576020851433536, "grad_norm": 0.024623876437544823, "learning_rate": 3.4282688966116425e-05, "loss": 0.0054, "step": 5790 }, { "epoch": 1.259774109470026, "grad_norm": 0.012604706920683384, "learning_rate": 3.425553866203302e-05, "loss": 0.0031, "step": 5800 }, { "epoch": 1.2619461337966986, "grad_norm": 0.009479483589529991, "learning_rate": 3.422838835794961e-05, "loss": 0.0117, "step": 5810 }, { "epoch": 1.264118158123371, "grad_norm": 0.018399232998490334, "learning_rate": 3.4201238053866204e-05, "loss": 0.0335, "step": 5820 }, { "epoch": 1.2662901824500434, "grad_norm": 0.024806447327136993, "learning_rate": 3.4174087749782804e-05, "loss": 0.0021, "step": 5830 }, { "epoch": 1.268462206776716, "grad_norm": 0.01572875864803791, "learning_rate": 3.41469374456994e-05, "loss": 0.0192, "step": 5840 }, { "epoch": 1.2706342311033882, "grad_norm": 0.009287680499255657, "learning_rate": 3.411978714161599e-05, "loss": 0.0046, "step": 5850 }, { "epoch": 1.2728062554300608, "grad_norm": 3.3354904651641846, "learning_rate": 3.409263683753258e-05, "loss": 0.0236, "step": 5860 }, { "epoch": 1.2749782797567333, "grad_norm": 0.01085092592984438, "learning_rate": 3.4065486533449176e-05, "loss": 0.0758, "step": 5870 }, { "epoch": 1.2771503040834058, "grad_norm": 0.06153455376625061, "learning_rate": 3.403833622936577e-05, "loss": 0.0023, "step": 5880 }, { "epoch": 1.2793223284100783, "grad_norm": 0.08027364313602448, "learning_rate": 3.401118592528236e-05, "loss": 0.0242, "step": 5890 }, { "epoch": 1.2814943527367506, "grad_norm": 0.026004912331700325, "learning_rate": 3.3984035621198955e-05, "loss": 0.0135, "step": 5900 }, { "epoch": 1.2836663770634231, "grad_norm": 0.007019513752311468, "learning_rate": 3.3956885317115555e-05, "loss": 0.0018, "step": 5910 }, { "epoch": 1.2858384013900956, "grad_norm": 0.007902990095317364, "learning_rate": 3.392973501303215e-05, "loss": 0.0461, "step": 5920 }, { "epoch": 1.288010425716768, "grad_norm": 0.008273580111563206, "learning_rate": 3.390258470894874e-05, "loss": 0.0015, "step": 5930 }, { "epoch": 1.2901824500434405, "grad_norm": 0.008654528297483921, "learning_rate": 3.3875434404865334e-05, "loss": 0.0637, "step": 5940 }, { "epoch": 1.292354474370113, "grad_norm": 0.3597409129142761, "learning_rate": 3.384828410078193e-05, "loss": 0.0082, "step": 5950 }, { "epoch": 1.2945264986967855, "grad_norm": 0.014476552605628967, "learning_rate": 3.382113379669852e-05, "loss": 0.0055, "step": 5960 }, { "epoch": 1.2966985230234578, "grad_norm": 3.3513331413269043, "learning_rate": 3.379398349261512e-05, "loss": 0.0815, "step": 5970 }, { "epoch": 1.2988705473501303, "grad_norm": 0.008756415918469429, "learning_rate": 3.376683318853171e-05, "loss": 0.0009, "step": 5980 }, { "epoch": 1.3010425716768028, "grad_norm": 0.01109595037996769, "learning_rate": 3.373968288444831e-05, "loss": 0.0193, "step": 5990 }, { "epoch": 1.3032145960034751, "grad_norm": 0.05595744401216507, "learning_rate": 3.3712532580364906e-05, "loss": 0.0018, "step": 6000 }, { "epoch": 1.3053866203301476, "grad_norm": 0.008088390342891216, "learning_rate": 3.36853822762815e-05, "loss": 0.0552, "step": 6010 }, { "epoch": 1.3075586446568201, "grad_norm": 0.012108515948057175, "learning_rate": 3.365823197219809e-05, "loss": 0.0019, "step": 6020 }, { "epoch": 1.3097306689834927, "grad_norm": 4.250258922576904, "learning_rate": 3.3631081668114685e-05, "loss": 0.0069, "step": 6030 }, { "epoch": 1.3119026933101652, "grad_norm": 0.018538329750299454, "learning_rate": 3.360393136403128e-05, "loss": 0.0014, "step": 6040 }, { "epoch": 1.3140747176368375, "grad_norm": 0.05477520078420639, "learning_rate": 3.357678105994787e-05, "loss": 0.0172, "step": 6050 }, { "epoch": 1.31624674196351, "grad_norm": 0.4852977991104126, "learning_rate": 3.354963075586447e-05, "loss": 0.0555, "step": 6060 }, { "epoch": 1.3184187662901825, "grad_norm": 0.013744533993303776, "learning_rate": 3.3522480451781063e-05, "loss": 0.0072, "step": 6070 }, { "epoch": 1.3205907906168548, "grad_norm": 0.008594054728746414, "learning_rate": 3.3495330147697656e-05, "loss": 0.0135, "step": 6080 }, { "epoch": 1.3227628149435273, "grad_norm": 0.008579927496612072, "learning_rate": 3.346817984361425e-05, "loss": 0.0484, "step": 6090 }, { "epoch": 1.3249348392701998, "grad_norm": 0.02037675306200981, "learning_rate": 3.344102953953084e-05, "loss": 0.0017, "step": 6100 }, { "epoch": 1.3271068635968724, "grad_norm": 0.040782492607831955, "learning_rate": 3.3413879235447436e-05, "loss": 0.014, "step": 6110 }, { "epoch": 1.3292788879235449, "grad_norm": 0.04753238335251808, "learning_rate": 3.338672893136403e-05, "loss": 0.0246, "step": 6120 }, { "epoch": 1.3314509122502172, "grad_norm": 0.049100227653980255, "learning_rate": 3.335957862728063e-05, "loss": 0.0279, "step": 6130 }, { "epoch": 1.3336229365768897, "grad_norm": 0.008865290321409702, "learning_rate": 3.333242832319722e-05, "loss": 0.0015, "step": 6140 }, { "epoch": 1.3357949609035622, "grad_norm": 0.4136160910129547, "learning_rate": 3.3305278019113814e-05, "loss": 0.0028, "step": 6150 }, { "epoch": 1.3379669852302345, "grad_norm": 0.9577689170837402, "learning_rate": 3.327812771503041e-05, "loss": 0.0394, "step": 6160 }, { "epoch": 1.340139009556907, "grad_norm": 0.04995536804199219, "learning_rate": 3.325097741094701e-05, "loss": 0.0231, "step": 6170 }, { "epoch": 1.3423110338835795, "grad_norm": 0.03455106168985367, "learning_rate": 3.32238271068636e-05, "loss": 0.0118, "step": 6180 }, { "epoch": 1.344483058210252, "grad_norm": 0.4952455163002014, "learning_rate": 3.319667680278019e-05, "loss": 0.0239, "step": 6190 }, { "epoch": 1.3466550825369243, "grad_norm": 0.006735064554959536, "learning_rate": 3.3169526498696786e-05, "loss": 0.0028, "step": 6200 }, { "epoch": 1.3488271068635969, "grad_norm": 0.0067639597691595554, "learning_rate": 3.3142376194613386e-05, "loss": 0.0008, "step": 6210 }, { "epoch": 1.3509991311902694, "grad_norm": 0.0066012111492455006, "learning_rate": 3.311522589052998e-05, "loss": 0.0506, "step": 6220 }, { "epoch": 1.3531711555169417, "grad_norm": 1.9521390199661255, "learning_rate": 3.308807558644657e-05, "loss": 0.0134, "step": 6230 }, { "epoch": 1.3553431798436142, "grad_norm": 0.346284419298172, "learning_rate": 3.3060925282363165e-05, "loss": 0.0114, "step": 6240 }, { "epoch": 1.3575152041702867, "grad_norm": 0.006885781418532133, "learning_rate": 3.303377497827976e-05, "loss": 0.0011, "step": 6250 }, { "epoch": 1.3596872284969592, "grad_norm": 0.006113228388130665, "learning_rate": 3.300662467419635e-05, "loss": 0.0017, "step": 6260 }, { "epoch": 1.3618592528236317, "grad_norm": 0.006051701493561268, "learning_rate": 3.2979474370112944e-05, "loss": 0.0032, "step": 6270 }, { "epoch": 1.364031277150304, "grad_norm": 0.0061963628977537155, "learning_rate": 3.295232406602954e-05, "loss": 0.0423, "step": 6280 }, { "epoch": 1.3662033014769766, "grad_norm": 0.006955439690500498, "learning_rate": 3.292517376194614e-05, "loss": 0.0011, "step": 6290 }, { "epoch": 1.368375325803649, "grad_norm": 0.04997705668210983, "learning_rate": 3.289802345786273e-05, "loss": 0.0041, "step": 6300 }, { "epoch": 1.3705473501303214, "grad_norm": 2.2008578777313232, "learning_rate": 3.287087315377932e-05, "loss": 0.017, "step": 6310 }, { "epoch": 1.3727193744569939, "grad_norm": 0.046700820326805115, "learning_rate": 3.2843722849695916e-05, "loss": 0.0021, "step": 6320 }, { "epoch": 1.3748913987836664, "grad_norm": 0.005715570878237486, "learning_rate": 3.281657254561251e-05, "loss": 0.0402, "step": 6330 }, { "epoch": 1.377063423110339, "grad_norm": 0.7199205160140991, "learning_rate": 3.27894222415291e-05, "loss": 0.0022, "step": 6340 }, { "epoch": 1.3792354474370114, "grad_norm": 0.007373593281954527, "learning_rate": 3.27622719374457e-05, "loss": 0.023, "step": 6350 }, { "epoch": 1.3814074717636837, "grad_norm": 0.020932350307703018, "learning_rate": 3.2735121633362295e-05, "loss": 0.0097, "step": 6360 }, { "epoch": 1.3835794960903562, "grad_norm": 0.010742595419287682, "learning_rate": 3.2707971329278895e-05, "loss": 0.0029, "step": 6370 }, { "epoch": 1.3857515204170285, "grad_norm": 0.006945365574210882, "learning_rate": 3.268082102519549e-05, "loss": 0.0162, "step": 6380 }, { "epoch": 1.387923544743701, "grad_norm": 0.048244886100292206, "learning_rate": 3.265367072111208e-05, "loss": 0.0015, "step": 6390 }, { "epoch": 1.3900955690703736, "grad_norm": 0.005674061365425587, "learning_rate": 3.2626520417028674e-05, "loss": 0.0245, "step": 6400 }, { "epoch": 1.392267593397046, "grad_norm": 0.06562032550573349, "learning_rate": 3.259937011294527e-05, "loss": 0.0474, "step": 6410 }, { "epoch": 1.3944396177237186, "grad_norm": 0.07147029787302017, "learning_rate": 3.257221980886186e-05, "loss": 0.0056, "step": 6420 }, { "epoch": 1.396611642050391, "grad_norm": 0.009265787899494171, "learning_rate": 3.254506950477845e-05, "loss": 0.0161, "step": 6430 }, { "epoch": 1.3987836663770634, "grad_norm": 0.011842530220746994, "learning_rate": 3.252063423110339e-05, "loss": 0.0094, "step": 6440 }, { "epoch": 1.400955690703736, "grad_norm": 6.259805679321289, "learning_rate": 3.2493483927019986e-05, "loss": 0.0427, "step": 6450 }, { "epoch": 1.4031277150304082, "grad_norm": 0.017940033227205276, "learning_rate": 3.246633362293658e-05, "loss": 0.0049, "step": 6460 }, { "epoch": 1.4052997393570807, "grad_norm": 0.05736970901489258, "learning_rate": 3.243918331885317e-05, "loss": 0.0303, "step": 6470 }, { "epoch": 1.4074717636837533, "grad_norm": 0.006364389322698116, "learning_rate": 3.2412033014769765e-05, "loss": 0.0033, "step": 6480 }, { "epoch": 1.4096437880104258, "grad_norm": 0.006534558720886707, "learning_rate": 3.238488271068636e-05, "loss": 0.0095, "step": 6490 }, { "epoch": 1.4118158123370983, "grad_norm": 0.01259972807019949, "learning_rate": 3.235773240660295e-05, "loss": 0.036, "step": 6500 }, { "epoch": 1.4139878366637706, "grad_norm": 0.08793149143457413, "learning_rate": 3.233058210251955e-05, "loss": 0.0258, "step": 6510 }, { "epoch": 1.416159860990443, "grad_norm": 0.007453892845660448, "learning_rate": 3.2303431798436144e-05, "loss": 0.0432, "step": 6520 }, { "epoch": 1.4183318853171156, "grad_norm": 0.008104286156594753, "learning_rate": 3.227628149435274e-05, "loss": 0.0372, "step": 6530 }, { "epoch": 1.420503909643788, "grad_norm": 0.16239531338214874, "learning_rate": 3.224913119026933e-05, "loss": 0.0201, "step": 6540 }, { "epoch": 1.4226759339704604, "grad_norm": 0.014620975591242313, "learning_rate": 3.222198088618592e-05, "loss": 0.0251, "step": 6550 }, { "epoch": 1.424847958297133, "grad_norm": 0.006428881548345089, "learning_rate": 3.219483058210252e-05, "loss": 0.0058, "step": 6560 }, { "epoch": 1.4270199826238055, "grad_norm": 0.006015344522893429, "learning_rate": 3.2167680278019116e-05, "loss": 0.0249, "step": 6570 }, { "epoch": 1.4291920069504778, "grad_norm": 0.00819337647408247, "learning_rate": 3.214052997393571e-05, "loss": 0.0389, "step": 6580 }, { "epoch": 1.4313640312771503, "grad_norm": 0.0471440814435482, "learning_rate": 3.211337966985231e-05, "loss": 0.0275, "step": 6590 }, { "epoch": 1.4335360556038228, "grad_norm": 0.005635848734527826, "learning_rate": 3.20862293657689e-05, "loss": 0.0028, "step": 6600 }, { "epoch": 1.435708079930495, "grad_norm": 0.005153193604201078, "learning_rate": 3.2059079061685495e-05, "loss": 0.0015, "step": 6610 }, { "epoch": 1.4378801042571676, "grad_norm": 0.0051053185015916824, "learning_rate": 3.203192875760209e-05, "loss": 0.0015, "step": 6620 }, { "epoch": 1.4400521285838401, "grad_norm": 0.005034744273871183, "learning_rate": 3.200477845351868e-05, "loss": 0.0006, "step": 6630 }, { "epoch": 1.4422241529105126, "grad_norm": 0.6263902187347412, "learning_rate": 3.1977628149435274e-05, "loss": 0.0472, "step": 6640 }, { "epoch": 1.4443961772371852, "grad_norm": 0.009957981295883656, "learning_rate": 3.195047784535187e-05, "loss": 0.0027, "step": 6650 }, { "epoch": 1.4465682015638575, "grad_norm": 0.007074551656842232, "learning_rate": 3.192332754126847e-05, "loss": 0.0009, "step": 6660 }, { "epoch": 1.44874022589053, "grad_norm": 0.01237443182617426, "learning_rate": 3.189617723718506e-05, "loss": 0.0315, "step": 6670 }, { "epoch": 1.4509122502172025, "grad_norm": 0.020478179678320885, "learning_rate": 3.1871741963509993e-05, "loss": 0.0585, "step": 6680 }, { "epoch": 1.4530842745438748, "grad_norm": 0.06119263172149658, "learning_rate": 3.1844591659426586e-05, "loss": 0.0347, "step": 6690 }, { "epoch": 1.4552562988705473, "grad_norm": 0.20599764585494995, "learning_rate": 3.181744135534318e-05, "loss": 0.0266, "step": 6700 }, { "epoch": 1.4574283231972198, "grad_norm": 0.024101588875055313, "learning_rate": 3.179029105125977e-05, "loss": 0.013, "step": 6710 }, { "epoch": 1.4596003475238923, "grad_norm": 1.3978750705718994, "learning_rate": 3.1763140747176366e-05, "loss": 0.0416, "step": 6720 }, { "epoch": 1.4617723718505649, "grad_norm": 0.008877950720489025, "learning_rate": 3.1735990443092965e-05, "loss": 0.0457, "step": 6730 }, { "epoch": 1.4639443961772372, "grad_norm": 0.026043567806482315, "learning_rate": 3.170884013900956e-05, "loss": 0.0263, "step": 6740 }, { "epoch": 1.4661164205039097, "grad_norm": 0.0366000272333622, "learning_rate": 3.168168983492615e-05, "loss": 0.0043, "step": 6750 }, { "epoch": 1.4682884448305822, "grad_norm": 0.02416500821709633, "learning_rate": 3.1654539530842744e-05, "loss": 0.0028, "step": 6760 }, { "epoch": 1.4704604691572545, "grad_norm": 0.12652349472045898, "learning_rate": 3.1627389226759344e-05, "loss": 0.0322, "step": 6770 }, { "epoch": 1.472632493483927, "grad_norm": 0.017712270841002464, "learning_rate": 3.160023892267594e-05, "loss": 0.0192, "step": 6780 }, { "epoch": 1.4748045178105995, "grad_norm": 2.6074209213256836, "learning_rate": 3.157308861859253e-05, "loss": 0.0178, "step": 6790 }, { "epoch": 1.476976542137272, "grad_norm": 0.03420431539416313, "learning_rate": 3.154593831450912e-05, "loss": 0.0362, "step": 6800 }, { "epoch": 1.4791485664639443, "grad_norm": 0.028371965512633324, "learning_rate": 3.151878801042572e-05, "loss": 0.0052, "step": 6810 }, { "epoch": 1.4813205907906168, "grad_norm": 0.019446449354290962, "learning_rate": 3.1491637706342316e-05, "loss": 0.001, "step": 6820 }, { "epoch": 1.4834926151172894, "grad_norm": 0.022431597113609314, "learning_rate": 3.146448740225891e-05, "loss": 0.015, "step": 6830 }, { "epoch": 1.4856646394439617, "grad_norm": 0.0063674296252429485, "learning_rate": 3.14373370981755e-05, "loss": 0.0399, "step": 6840 }, { "epoch": 1.4878366637706342, "grad_norm": 0.02433244325220585, "learning_rate": 3.1410186794092095e-05, "loss": 0.0308, "step": 6850 }, { "epoch": 1.4900086880973067, "grad_norm": 0.12433426082134247, "learning_rate": 3.138303649000869e-05, "loss": 0.0901, "step": 6860 }, { "epoch": 1.4921807124239792, "grad_norm": 0.027635198086500168, "learning_rate": 3.135588618592528e-05, "loss": 0.0095, "step": 6870 }, { "epoch": 1.4943527367506517, "grad_norm": 0.01609298586845398, "learning_rate": 3.132873588184188e-05, "loss": 0.021, "step": 6880 }, { "epoch": 1.496524761077324, "grad_norm": 0.005982758477330208, "learning_rate": 3.1301585577758474e-05, "loss": 0.0007, "step": 6890 }, { "epoch": 1.4986967854039965, "grad_norm": 0.12338759750127792, "learning_rate": 3.127443527367507e-05, "loss": 0.0042, "step": 6900 }, { "epoch": 1.5008688097306688, "grad_norm": 0.009839468635618687, "learning_rate": 3.124728496959166e-05, "loss": 0.0007, "step": 6910 }, { "epoch": 1.5030408340573413, "grad_norm": 0.00818830356001854, "learning_rate": 3.122013466550825e-05, "loss": 0.0014, "step": 6920 }, { "epoch": 1.5052128583840139, "grad_norm": 0.0049653262831270695, "learning_rate": 3.1192984361424846e-05, "loss": 0.0248, "step": 6930 }, { "epoch": 1.5073848827106864, "grad_norm": 0.017730310559272766, "learning_rate": 3.116583405734144e-05, "loss": 0.0364, "step": 6940 }, { "epoch": 1.509556907037359, "grad_norm": 0.07770511507987976, "learning_rate": 3.113868375325804e-05, "loss": 0.0154, "step": 6950 }, { "epoch": 1.5117289313640314, "grad_norm": 0.00605663051828742, "learning_rate": 3.111153344917463e-05, "loss": 0.0015, "step": 6960 }, { "epoch": 1.5139009556907037, "grad_norm": 0.01187584176659584, "learning_rate": 3.1084383145091225e-05, "loss": 0.0025, "step": 6970 }, { "epoch": 1.5160729800173762, "grad_norm": 0.004929904360324144, "learning_rate": 3.1057232841007825e-05, "loss": 0.0097, "step": 6980 }, { "epoch": 1.5182450043440485, "grad_norm": 0.0053964219987392426, "learning_rate": 3.103008253692442e-05, "loss": 0.0145, "step": 6990 }, { "epoch": 1.520417028670721, "grad_norm": 0.004978001583367586, "learning_rate": 3.100293223284101e-05, "loss": 0.046, "step": 7000 }, { "epoch": 1.5225890529973936, "grad_norm": 0.09007082879543304, "learning_rate": 3.0975781928757604e-05, "loss": 0.0418, "step": 7010 }, { "epoch": 1.524761077324066, "grad_norm": 0.11491013318300247, "learning_rate": 3.09486316246742e-05, "loss": 0.03, "step": 7020 }, { "epoch": 1.5269331016507386, "grad_norm": 0.005492928437888622, "learning_rate": 3.092148132059079e-05, "loss": 0.0038, "step": 7030 }, { "epoch": 1.529105125977411, "grad_norm": 0.005056523717939854, "learning_rate": 3.089433101650739e-05, "loss": 0.0238, "step": 7040 }, { "epoch": 1.5312771503040834, "grad_norm": 0.006425573956221342, "learning_rate": 3.086718071242398e-05, "loss": 0.0015, "step": 7050 }, { "epoch": 1.533449174630756, "grad_norm": 0.005666010081768036, "learning_rate": 3.0840030408340576e-05, "loss": 0.0023, "step": 7060 }, { "epoch": 1.5356211989574282, "grad_norm": 0.004847542382776737, "learning_rate": 3.081288010425717e-05, "loss": 0.0033, "step": 7070 }, { "epoch": 1.5377932232841007, "grad_norm": 0.00474773533642292, "learning_rate": 3.078572980017376e-05, "loss": 0.0306, "step": 7080 }, { "epoch": 1.5399652476107732, "grad_norm": 0.11169460415840149, "learning_rate": 3.0758579496090355e-05, "loss": 0.0159, "step": 7090 }, { "epoch": 1.5421372719374458, "grad_norm": 4.95255708694458, "learning_rate": 3.073142919200695e-05, "loss": 0.0364, "step": 7100 }, { "epoch": 1.5443092962641183, "grad_norm": 0.26123908162117004, "learning_rate": 3.070427888792355e-05, "loss": 0.0052, "step": 7110 }, { "epoch": 1.5464813205907906, "grad_norm": 0.004803699441254139, "learning_rate": 3.067712858384014e-05, "loss": 0.0012, "step": 7120 }, { "epoch": 1.548653344917463, "grad_norm": 0.004582292400300503, "learning_rate": 3.0649978279756733e-05, "loss": 0.0005, "step": 7130 }, { "epoch": 1.5508253692441354, "grad_norm": 0.004967282060533762, "learning_rate": 3.0622827975673326e-05, "loss": 0.0453, "step": 7140 }, { "epoch": 1.552997393570808, "grad_norm": 3.779393196105957, "learning_rate": 3.0595677671589926e-05, "loss": 0.0148, "step": 7150 }, { "epoch": 1.5551694178974804, "grad_norm": 0.08057697117328644, "learning_rate": 3.056852736750652e-05, "loss": 0.0015, "step": 7160 }, { "epoch": 1.557341442224153, "grad_norm": 0.020298315212130547, "learning_rate": 3.054137706342311e-05, "loss": 0.0022, "step": 7170 }, { "epoch": 1.5595134665508255, "grad_norm": 0.005744563415646553, "learning_rate": 3.0514226759339702e-05, "loss": 0.0013, "step": 7180 }, { "epoch": 1.561685490877498, "grad_norm": 0.005050037521868944, "learning_rate": 3.0487076455256302e-05, "loss": 0.0365, "step": 7190 }, { "epoch": 1.5638575152041703, "grad_norm": 0.02648780681192875, "learning_rate": 3.0459926151172895e-05, "loss": 0.0017, "step": 7200 }, { "epoch": 1.5660295395308428, "grad_norm": 1.0146911144256592, "learning_rate": 3.043277584708949e-05, "loss": 0.0024, "step": 7210 }, { "epoch": 1.568201563857515, "grad_norm": 0.005274607799947262, "learning_rate": 3.0405625543006084e-05, "loss": 0.0475, "step": 7220 }, { "epoch": 1.5703735881841876, "grad_norm": 0.013579404912889004, "learning_rate": 3.0378475238922677e-05, "loss": 0.0108, "step": 7230 }, { "epoch": 1.5725456125108601, "grad_norm": 0.015852799639105797, "learning_rate": 3.035132493483927e-05, "loss": 0.0227, "step": 7240 }, { "epoch": 1.5747176368375326, "grad_norm": 0.014161293394863605, "learning_rate": 3.0324174630755863e-05, "loss": 0.0028, "step": 7250 }, { "epoch": 1.5768896611642051, "grad_norm": 0.005616712383925915, "learning_rate": 3.0297024326672463e-05, "loss": 0.0012, "step": 7260 }, { "epoch": 1.5790616854908774, "grad_norm": 0.008326984010636806, "learning_rate": 3.0269874022589056e-05, "loss": 0.001, "step": 7270 }, { "epoch": 1.58123370981755, "grad_norm": 0.016356853768229485, "learning_rate": 3.024272371850565e-05, "loss": 0.0765, "step": 7280 }, { "epoch": 1.5834057341442223, "grad_norm": 0.020525842905044556, "learning_rate": 3.0215573414422242e-05, "loss": 0.0195, "step": 7290 }, { "epoch": 1.5855777584708948, "grad_norm": 0.012340064160525799, "learning_rate": 3.018842311033884e-05, "loss": 0.003, "step": 7300 }, { "epoch": 1.5877497827975673, "grad_norm": 0.008134052157402039, "learning_rate": 3.016127280625543e-05, "loss": 0.0016, "step": 7310 }, { "epoch": 1.5899218071242398, "grad_norm": 0.8095653057098389, "learning_rate": 3.0134122502172024e-05, "loss": 0.0154, "step": 7320 }, { "epoch": 1.5920938314509123, "grad_norm": 0.007166721858084202, "learning_rate": 3.0106972198088617e-05, "loss": 0.0307, "step": 7330 }, { "epoch": 1.5942658557775848, "grad_norm": 0.036194682121276855, "learning_rate": 3.0079821894005217e-05, "loss": 0.0414, "step": 7340 }, { "epoch": 1.5964378801042571, "grad_norm": 1.6515989303588867, "learning_rate": 3.005267158992181e-05, "loss": 0.0297, "step": 7350 }, { "epoch": 1.5986099044309297, "grad_norm": 0.015606805682182312, "learning_rate": 3.0025521285838403e-05, "loss": 0.0033, "step": 7360 }, { "epoch": 1.600781928757602, "grad_norm": 0.010313029401004314, "learning_rate": 2.9998370981754996e-05, "loss": 0.0069, "step": 7370 }, { "epoch": 1.6029539530842745, "grad_norm": 0.2324070781469345, "learning_rate": 2.997122067767159e-05, "loss": 0.0293, "step": 7380 }, { "epoch": 1.605125977410947, "grad_norm": 2.7912869453430176, "learning_rate": 2.9944070373588186e-05, "loss": 0.0525, "step": 7390 }, { "epoch": 1.6072980017376195, "grad_norm": 0.10823327302932739, "learning_rate": 2.991692006950478e-05, "loss": 0.0319, "step": 7400 }, { "epoch": 1.609470026064292, "grad_norm": 0.3346640169620514, "learning_rate": 2.9889769765421372e-05, "loss": 0.0528, "step": 7410 }, { "epoch": 1.6116420503909645, "grad_norm": 0.4158894121646881, "learning_rate": 2.986261946133797e-05, "loss": 0.031, "step": 7420 }, { "epoch": 1.6138140747176368, "grad_norm": 0.005024017300456762, "learning_rate": 2.9835469157254565e-05, "loss": 0.0017, "step": 7430 }, { "epoch": 1.6159860990443093, "grad_norm": 0.00963117741048336, "learning_rate": 2.9808318853171158e-05, "loss": 0.0482, "step": 7440 }, { "epoch": 1.6181581233709816, "grad_norm": 0.011439714580774307, "learning_rate": 2.978116854908775e-05, "loss": 0.0035, "step": 7450 }, { "epoch": 1.6203301476976542, "grad_norm": 0.004979605786502361, "learning_rate": 2.9754018245004344e-05, "loss": 0.0055, "step": 7460 }, { "epoch": 1.6225021720243267, "grad_norm": 0.005157648120075464, "learning_rate": 2.9726867940920937e-05, "loss": 0.0006, "step": 7470 }, { "epoch": 1.6246741963509992, "grad_norm": 0.004680620972067118, "learning_rate": 2.9699717636837533e-05, "loss": 0.0241, "step": 7480 }, { "epoch": 1.6268462206776717, "grad_norm": 0.004479921422898769, "learning_rate": 2.967256733275413e-05, "loss": 0.0006, "step": 7490 }, { "epoch": 1.629018245004344, "grad_norm": 0.00438233558088541, "learning_rate": 2.9645417028670726e-05, "loss": 0.0464, "step": 7500 }, { "epoch": 1.6311902693310165, "grad_norm": 0.29184991121292114, "learning_rate": 2.961826672458732e-05, "loss": 0.0013, "step": 7510 }, { "epoch": 1.6333622936576888, "grad_norm": 3.4082252979278564, "learning_rate": 2.9591116420503912e-05, "loss": 0.0357, "step": 7520 }, { "epoch": 1.6355343179843613, "grad_norm": 0.11537332832813263, "learning_rate": 2.9563966116420505e-05, "loss": 0.003, "step": 7530 }, { "epoch": 1.6377063423110338, "grad_norm": 0.08221070468425751, "learning_rate": 2.9536815812337098e-05, "loss": 0.0031, "step": 7540 }, { "epoch": 1.6398783666377064, "grad_norm": 0.00624883221462369, "learning_rate": 2.950966550825369e-05, "loss": 0.0018, "step": 7550 }, { "epoch": 1.6420503909643789, "grad_norm": 0.004528459627181292, "learning_rate": 2.9482515204170284e-05, "loss": 0.0006, "step": 7560 }, { "epoch": 1.6442224152910514, "grad_norm": 0.007416573353111744, "learning_rate": 2.9455364900086884e-05, "loss": 0.0349, "step": 7570 }, { "epoch": 1.6463944396177237, "grad_norm": 0.020044928416609764, "learning_rate": 2.9428214596003477e-05, "loss": 0.004, "step": 7580 }, { "epoch": 1.6485664639443962, "grad_norm": 0.01522949431091547, "learning_rate": 2.9401064291920073e-05, "loss": 0.0012, "step": 7590 }, { "epoch": 1.6507384882710685, "grad_norm": 0.012193024158477783, "learning_rate": 2.9373913987836666e-05, "loss": 0.0382, "step": 7600 }, { "epoch": 1.652910512597741, "grad_norm": 0.004249626770615578, "learning_rate": 2.934676368375326e-05, "loss": 0.0019, "step": 7610 }, { "epoch": 1.6550825369244135, "grad_norm": 0.0253335889428854, "learning_rate": 2.9319613379669852e-05, "loss": 0.0213, "step": 7620 }, { "epoch": 1.657254561251086, "grad_norm": 0.0044834488071501255, "learning_rate": 2.9292463075586445e-05, "loss": 0.0022, "step": 7630 }, { "epoch": 1.6594265855777586, "grad_norm": 0.1263313889503479, "learning_rate": 2.9265312771503038e-05, "loss": 0.0162, "step": 7640 }, { "epoch": 1.661598609904431, "grad_norm": 0.46258336305618286, "learning_rate": 2.9238162467419638e-05, "loss": 0.0038, "step": 7650 }, { "epoch": 1.6637706342311034, "grad_norm": 2.930629014968872, "learning_rate": 2.921101216333623e-05, "loss": 0.0198, "step": 7660 }, { "epoch": 1.665942658557776, "grad_norm": 0.0038381244521588087, "learning_rate": 2.9183861859252824e-05, "loss": 0.0004, "step": 7670 }, { "epoch": 1.6681146828844482, "grad_norm": 0.013386134058237076, "learning_rate": 2.915671155516942e-05, "loss": 0.0656, "step": 7680 }, { "epoch": 1.6702867072111207, "grad_norm": 0.00613776408135891, "learning_rate": 2.9129561251086014e-05, "loss": 0.0009, "step": 7690 }, { "epoch": 1.6724587315377932, "grad_norm": 0.005761744920164347, "learning_rate": 2.9102410947002607e-05, "loss": 0.0017, "step": 7700 }, { "epoch": 1.6746307558644657, "grad_norm": 0.01169886626303196, "learning_rate": 2.90752606429192e-05, "loss": 0.0161, "step": 7710 }, { "epoch": 1.6768027801911383, "grad_norm": 0.005424773786216974, "learning_rate": 2.90481103388358e-05, "loss": 0.0016, "step": 7720 }, { "epoch": 1.6789748045178106, "grad_norm": 0.004224838223308325, "learning_rate": 2.9020960034752392e-05, "loss": 0.0016, "step": 7730 }, { "epoch": 1.681146828844483, "grad_norm": 0.004218948073685169, "learning_rate": 2.8993809730668985e-05, "loss": 0.0005, "step": 7740 }, { "epoch": 1.6833188531711554, "grad_norm": 0.030858902260661125, "learning_rate": 2.896665942658558e-05, "loss": 0.0009, "step": 7750 }, { "epoch": 1.6854908774978279, "grad_norm": 0.0144006023183465, "learning_rate": 2.893950912250217e-05, "loss": 0.0419, "step": 7760 }, { "epoch": 1.6876629018245004, "grad_norm": 0.00441219424828887, "learning_rate": 2.8912358818418768e-05, "loss": 0.0011, "step": 7770 }, { "epoch": 1.689834926151173, "grad_norm": 0.004708564840257168, "learning_rate": 2.888520851433536e-05, "loss": 0.0049, "step": 7780 }, { "epoch": 1.6920069504778454, "grad_norm": 0.004145478829741478, "learning_rate": 2.8858058210251954e-05, "loss": 0.0154, "step": 7790 }, { "epoch": 1.694178974804518, "grad_norm": 0.012138472869992256, "learning_rate": 2.8830907906168554e-05, "loss": 0.0174, "step": 7800 }, { "epoch": 1.6963509991311903, "grad_norm": 0.03401601314544678, "learning_rate": 2.8803757602085147e-05, "loss": 0.0013, "step": 7810 }, { "epoch": 1.6985230234578628, "grad_norm": 0.7554841041564941, "learning_rate": 2.877660729800174e-05, "loss": 0.0083, "step": 7820 }, { "epoch": 1.700695047784535, "grad_norm": 0.0041222646832466125, "learning_rate": 2.8749456993918333e-05, "loss": 0.0012, "step": 7830 }, { "epoch": 1.7028670721112076, "grad_norm": 0.003780083265155554, "learning_rate": 2.8722306689834926e-05, "loss": 0.0007, "step": 7840 }, { "epoch": 1.70503909643788, "grad_norm": 4.778973579406738, "learning_rate": 2.869515638575152e-05, "loss": 0.0362, "step": 7850 }, { "epoch": 1.7072111207645526, "grad_norm": 0.005503606982529163, "learning_rate": 2.8668006081668115e-05, "loss": 0.0258, "step": 7860 }, { "epoch": 1.7093831450912251, "grad_norm": 0.003981790505349636, "learning_rate": 2.864085577758471e-05, "loss": 0.0149, "step": 7870 }, { "epoch": 1.7115551694178974, "grad_norm": 0.003801483428105712, "learning_rate": 2.8613705473501308e-05, "loss": 0.0013, "step": 7880 }, { "epoch": 1.71372719374457, "grad_norm": 0.0036825397983193398, "learning_rate": 2.85865551694179e-05, "loss": 0.0068, "step": 7890 }, { "epoch": 1.7158992180712422, "grad_norm": 0.004739957861602306, "learning_rate": 2.8559404865334494e-05, "loss": 0.0004, "step": 7900 }, { "epoch": 1.7180712423979148, "grad_norm": 8.044977188110352, "learning_rate": 2.8532254561251087e-05, "loss": 0.0233, "step": 7910 }, { "epoch": 1.7202432667245873, "grad_norm": 3.087222099304199, "learning_rate": 2.850510425716768e-05, "loss": 0.0223, "step": 7920 }, { "epoch": 1.7224152910512598, "grad_norm": 0.003672607010230422, "learning_rate": 2.8477953953084273e-05, "loss": 0.0361, "step": 7930 }, { "epoch": 1.7245873153779323, "grad_norm": 0.004437604453414679, "learning_rate": 2.845080364900087e-05, "loss": 0.0008, "step": 7940 }, { "epoch": 1.7267593397046048, "grad_norm": 0.006330874748528004, "learning_rate": 2.8423653344917466e-05, "loss": 0.0191, "step": 7950 }, { "epoch": 1.7289313640312771, "grad_norm": 0.3795784115791321, "learning_rate": 2.839650304083406e-05, "loss": 0.0023, "step": 7960 }, { "epoch": 1.7311033883579496, "grad_norm": 0.00351201300509274, "learning_rate": 2.8369352736750655e-05, "loss": 0.0163, "step": 7970 }, { "epoch": 1.733275412684622, "grad_norm": 1.7485036849975586, "learning_rate": 2.834220243266725e-05, "loss": 0.0028, "step": 7980 }, { "epoch": 1.7354474370112944, "grad_norm": 0.0035417363978922367, "learning_rate": 2.831505212858384e-05, "loss": 0.0199, "step": 7990 }, { "epoch": 1.737619461337967, "grad_norm": 0.0039270068518817425, "learning_rate": 2.8287901824500434e-05, "loss": 0.0013, "step": 8000 }, { "epoch": 1.7397914856646395, "grad_norm": 0.004426254890859127, "learning_rate": 2.8260751520417027e-05, "loss": 0.0074, "step": 8010 }, { "epoch": 1.741963509991312, "grad_norm": 0.0035433934535831213, "learning_rate": 2.823360121633362e-05, "loss": 0.0154, "step": 8020 }, { "epoch": 1.7441355343179845, "grad_norm": 0.003466078545898199, "learning_rate": 2.820645091225022e-05, "loss": 0.011, "step": 8030 }, { "epoch": 1.7463075586446568, "grad_norm": 0.1723119467496872, "learning_rate": 2.8179300608166813e-05, "loss": 0.0241, "step": 8040 }, { "epoch": 1.7484795829713293, "grad_norm": 0.005867532454431057, "learning_rate": 2.8152150304083406e-05, "loss": 0.0022, "step": 8050 }, { "epoch": 1.7506516072980016, "grad_norm": 0.005134823732078075, "learning_rate": 2.8125000000000003e-05, "loss": 0.0006, "step": 8060 }, { "epoch": 1.7528236316246741, "grad_norm": 0.003376134904101491, "learning_rate": 2.8097849695916596e-05, "loss": 0.0021, "step": 8070 }, { "epoch": 1.7549956559513467, "grad_norm": 0.007784237619489431, "learning_rate": 2.807069939183319e-05, "loss": 0.0128, "step": 8080 }, { "epoch": 1.7571676802780192, "grad_norm": 0.5563734173774719, "learning_rate": 2.804354908774978e-05, "loss": 0.0192, "step": 8090 }, { "epoch": 1.7593397046046917, "grad_norm": 0.0036883733700960875, "learning_rate": 2.801639878366638e-05, "loss": 0.0062, "step": 8100 }, { "epoch": 1.761511728931364, "grad_norm": 1.4399293661117554, "learning_rate": 2.7989248479582974e-05, "loss": 0.0084, "step": 8110 }, { "epoch": 1.7636837532580365, "grad_norm": 0.003304542973637581, "learning_rate": 2.7962098175499567e-05, "loss": 0.0004, "step": 8120 }, { "epoch": 1.7658557775847088, "grad_norm": 0.003243118291720748, "learning_rate": 2.793494787141616e-05, "loss": 0.0088, "step": 8130 }, { "epoch": 1.7680278019113813, "grad_norm": 0.12104818224906921, "learning_rate": 2.7907797567332754e-05, "loss": 0.0015, "step": 8140 }, { "epoch": 1.7701998262380538, "grad_norm": 0.0031961267814040184, "learning_rate": 2.788064726324935e-05, "loss": 0.0205, "step": 8150 }, { "epoch": 1.7723718505647263, "grad_norm": 0.03292842581868172, "learning_rate": 2.7853496959165943e-05, "loss": 0.0036, "step": 8160 }, { "epoch": 1.7745438748913989, "grad_norm": 0.003530114656314254, "learning_rate": 2.7826346655082536e-05, "loss": 0.0016, "step": 8170 }, { "epoch": 1.7767158992180714, "grad_norm": 0.012900624424219131, "learning_rate": 2.7799196350999136e-05, "loss": 0.0022, "step": 8180 }, { "epoch": 1.7788879235447437, "grad_norm": 0.003065924858674407, "learning_rate": 2.777204604691573e-05, "loss": 0.0013, "step": 8190 }, { "epoch": 1.7810599478714162, "grad_norm": 0.0030597923323512077, "learning_rate": 2.7744895742832322e-05, "loss": 0.0009, "step": 8200 }, { "epoch": 1.7832319721980885, "grad_norm": 0.0034443363547325134, "learning_rate": 2.7717745438748915e-05, "loss": 0.0933, "step": 8210 }, { "epoch": 1.785403996524761, "grad_norm": 0.00810242909938097, "learning_rate": 2.7690595134665508e-05, "loss": 0.0394, "step": 8220 }, { "epoch": 1.7875760208514335, "grad_norm": 2.509730339050293, "learning_rate": 2.7663444830582104e-05, "loss": 0.0056, "step": 8230 }, { "epoch": 1.789748045178106, "grad_norm": 0.024317113682627678, "learning_rate": 2.7636294526498697e-05, "loss": 0.0033, "step": 8240 }, { "epoch": 1.7919200695047786, "grad_norm": 0.18328578770160675, "learning_rate": 2.760914422241529e-05, "loss": 0.0531, "step": 8250 }, { "epoch": 1.794092093831451, "grad_norm": 0.008703135885298252, "learning_rate": 2.758199391833189e-05, "loss": 0.0014, "step": 8260 }, { "epoch": 1.7962641181581234, "grad_norm": 0.005929219536483288, "learning_rate": 2.7554843614248483e-05, "loss": 0.0204, "step": 8270 }, { "epoch": 1.7984361424847957, "grad_norm": 0.11236939579248428, "learning_rate": 2.7527693310165076e-05, "loss": 0.0121, "step": 8280 }, { "epoch": 1.8006081668114682, "grad_norm": 0.00475434260442853, "learning_rate": 2.750054300608167e-05, "loss": 0.01, "step": 8290 }, { "epoch": 1.8027801911381407, "grad_norm": 0.004544104915112257, "learning_rate": 2.7473392701998262e-05, "loss": 0.0007, "step": 8300 }, { "epoch": 1.8049522154648132, "grad_norm": 0.0046235802583396435, "learning_rate": 2.7446242397914855e-05, "loss": 0.0018, "step": 8310 }, { "epoch": 1.8071242397914857, "grad_norm": 4.546655654907227, "learning_rate": 2.741909209383145e-05, "loss": 0.0212, "step": 8320 }, { "epoch": 1.8092962641181582, "grad_norm": 0.25990429520606995, "learning_rate": 2.7391941789748048e-05, "loss": 0.0025, "step": 8330 }, { "epoch": 1.8114682884448305, "grad_norm": 0.003434494836255908, "learning_rate": 2.736479148566464e-05, "loss": 0.0004, "step": 8340 }, { "epoch": 1.813640312771503, "grad_norm": 0.0053975642658770084, "learning_rate": 2.7337641181581237e-05, "loss": 0.0004, "step": 8350 }, { "epoch": 1.8158123370981754, "grad_norm": 0.003612579545006156, "learning_rate": 2.731049087749783e-05, "loss": 0.0004, "step": 8360 }, { "epoch": 1.8179843614248479, "grad_norm": 0.004297258798032999, "learning_rate": 2.7283340573414423e-05, "loss": 0.0133, "step": 8370 }, { "epoch": 1.8201563857515204, "grad_norm": 0.00486018368974328, "learning_rate": 2.7256190269331016e-05, "loss": 0.0007, "step": 8380 }, { "epoch": 1.822328410078193, "grad_norm": 0.00850472692400217, "learning_rate": 2.722903996524761e-05, "loss": 0.0006, "step": 8390 }, { "epoch": 1.8245004344048654, "grad_norm": 4.226654529571533, "learning_rate": 2.7201889661164202e-05, "loss": 0.0146, "step": 8400 }, { "epoch": 1.826672458731538, "grad_norm": 0.0532936193048954, "learning_rate": 2.7174739357080802e-05, "loss": 0.0008, "step": 8410 }, { "epoch": 1.8288444830582102, "grad_norm": 0.0031467361841350794, "learning_rate": 2.7147589052997395e-05, "loss": 0.002, "step": 8420 }, { "epoch": 1.8310165073848828, "grad_norm": 0.0035019817296415567, "learning_rate": 2.7120438748913988e-05, "loss": 0.0013, "step": 8430 }, { "epoch": 1.833188531711555, "grad_norm": 0.15777327120304108, "learning_rate": 2.7093288444830585e-05, "loss": 0.0053, "step": 8440 }, { "epoch": 1.8353605560382276, "grad_norm": 0.0041242060251533985, "learning_rate": 2.7066138140747178e-05, "loss": 0.0009, "step": 8450 }, { "epoch": 1.8375325803649, "grad_norm": 0.004727715160697699, "learning_rate": 2.703898783666377e-05, "loss": 0.0372, "step": 8460 }, { "epoch": 1.8397046046915726, "grad_norm": 0.2560582160949707, "learning_rate": 2.7011837532580364e-05, "loss": 0.0013, "step": 8470 }, { "epoch": 1.8418766290182451, "grad_norm": 0.002973441733047366, "learning_rate": 2.6984687228496964e-05, "loss": 0.0075, "step": 8480 }, { "epoch": 1.8440486533449174, "grad_norm": 0.03613729402422905, "learning_rate": 2.6957536924413557e-05, "loss": 0.0368, "step": 8490 }, { "epoch": 1.84622067767159, "grad_norm": 0.006184692494571209, "learning_rate": 2.693038662033015e-05, "loss": 0.0405, "step": 8500 }, { "epoch": 1.8483927019982622, "grad_norm": 0.0032991603948175907, "learning_rate": 2.6903236316246743e-05, "loss": 0.0159, "step": 8510 }, { "epoch": 1.8505647263249347, "grad_norm": 0.0034411856904625893, "learning_rate": 2.687608601216334e-05, "loss": 0.0012, "step": 8520 }, { "epoch": 1.8527367506516073, "grad_norm": 0.016189776360988617, "learning_rate": 2.6848935708079932e-05, "loss": 0.1094, "step": 8530 }, { "epoch": 1.8549087749782798, "grad_norm": 0.45711269974708557, "learning_rate": 2.6821785403996525e-05, "loss": 0.0633, "step": 8540 }, { "epoch": 1.8570807993049523, "grad_norm": 2.4776110649108887, "learning_rate": 2.6794635099913118e-05, "loss": 0.0258, "step": 8550 }, { "epoch": 1.8592528236316248, "grad_norm": 0.008575936779379845, "learning_rate": 2.6767484795829718e-05, "loss": 0.0021, "step": 8560 }, { "epoch": 1.861424847958297, "grad_norm": 1.326846957206726, "learning_rate": 2.674033449174631e-05, "loss": 0.0428, "step": 8570 }, { "epoch": 1.8635968722849696, "grad_norm": 0.011363858357071877, "learning_rate": 2.6713184187662904e-05, "loss": 0.0029, "step": 8580 }, { "epoch": 1.865768896611642, "grad_norm": 0.015298294834792614, "learning_rate": 2.6686033883579497e-05, "loss": 0.0035, "step": 8590 }, { "epoch": 1.8679409209383144, "grad_norm": 0.014568958431482315, "learning_rate": 2.665888357949609e-05, "loss": 0.001, "step": 8600 }, { "epoch": 1.870112945264987, "grad_norm": 1.8582741022109985, "learning_rate": 2.6631733275412686e-05, "loss": 0.048, "step": 8610 }, { "epoch": 1.8722849695916595, "grad_norm": 0.08303183317184448, "learning_rate": 2.660458297132928e-05, "loss": 0.0029, "step": 8620 }, { "epoch": 1.874456993918332, "grad_norm": 1.4877798557281494, "learning_rate": 2.6577432667245872e-05, "loss": 0.0439, "step": 8630 }, { "epoch": 1.8766290182450045, "grad_norm": 0.13910454511642456, "learning_rate": 2.6550282363162472e-05, "loss": 0.0043, "step": 8640 }, { "epoch": 1.8788010425716768, "grad_norm": 0.07800116389989853, "learning_rate": 2.6523132059079065e-05, "loss": 0.0185, "step": 8650 }, { "epoch": 1.8809730668983493, "grad_norm": 1.8513163328170776, "learning_rate": 2.6495981754995658e-05, "loss": 0.0057, "step": 8660 }, { "epoch": 1.8831450912250216, "grad_norm": 0.006727566011250019, "learning_rate": 2.646883145091225e-05, "loss": 0.0011, "step": 8670 }, { "epoch": 1.8853171155516941, "grad_norm": 0.004973508417606354, "learning_rate": 2.6441681146828844e-05, "loss": 0.0011, "step": 8680 }, { "epoch": 1.8874891398783666, "grad_norm": 0.011856785044074059, "learning_rate": 2.6414530842745437e-05, "loss": 0.0356, "step": 8690 }, { "epoch": 1.8896611642050392, "grad_norm": 0.025869742035865784, "learning_rate": 2.6387380538662034e-05, "loss": 0.0203, "step": 8700 }, { "epoch": 1.8918331885317117, "grad_norm": 0.010730310343205929, "learning_rate": 2.636023023457863e-05, "loss": 0.018, "step": 8710 }, { "epoch": 1.894005212858384, "grad_norm": 0.009472482837736607, "learning_rate": 2.6333079930495223e-05, "loss": 0.0025, "step": 8720 }, { "epoch": 1.8961772371850565, "grad_norm": 0.0053679742850363255, "learning_rate": 2.630592962641182e-05, "loss": 0.0017, "step": 8730 }, { "epoch": 1.8983492615117288, "grad_norm": 0.004012218210846186, "learning_rate": 2.6278779322328412e-05, "loss": 0.0005, "step": 8740 }, { "epoch": 1.9005212858384013, "grad_norm": 0.019520413130521774, "learning_rate": 2.6251629018245006e-05, "loss": 0.0337, "step": 8750 }, { "epoch": 1.9026933101650738, "grad_norm": 0.014791909605264664, "learning_rate": 2.62244787141616e-05, "loss": 0.0047, "step": 8760 }, { "epoch": 1.9048653344917463, "grad_norm": 0.00672262255102396, "learning_rate": 2.619732841007819e-05, "loss": 0.0015, "step": 8770 }, { "epoch": 1.9070373588184188, "grad_norm": 0.005134627688676119, "learning_rate": 2.6170178105994785e-05, "loss": 0.0008, "step": 8780 }, { "epoch": 1.9092093831450914, "grad_norm": 0.009598666802048683, "learning_rate": 2.6143027801911384e-05, "loss": 0.0689, "step": 8790 }, { "epoch": 1.9113814074717637, "grad_norm": 0.010584644973278046, "learning_rate": 2.6115877497827977e-05, "loss": 0.0013, "step": 8800 }, { "epoch": 1.9135534317984362, "grad_norm": 0.003948741592466831, "learning_rate": 2.6088727193744574e-05, "loss": 0.0026, "step": 8810 }, { "epoch": 1.9157254561251085, "grad_norm": 0.02011152356863022, "learning_rate": 2.6061576889661167e-05, "loss": 0.017, "step": 8820 }, { "epoch": 1.917897480451781, "grad_norm": 0.013354657217860222, "learning_rate": 2.603442658557776e-05, "loss": 0.0027, "step": 8830 }, { "epoch": 1.9200695047784535, "grad_norm": 0.003748238319531083, "learning_rate": 2.6007276281494353e-05, "loss": 0.0152, "step": 8840 }, { "epoch": 1.922241529105126, "grad_norm": 0.01269373670220375, "learning_rate": 2.5980125977410946e-05, "loss": 0.0008, "step": 8850 }, { "epoch": 1.9244135534317985, "grad_norm": 0.00882689282298088, "learning_rate": 2.595297567332754e-05, "loss": 0.0025, "step": 8860 }, { "epoch": 1.926585577758471, "grad_norm": 0.005238520447164774, "learning_rate": 2.592582536924414e-05, "loss": 0.004, "step": 8870 }, { "epoch": 1.9287576020851434, "grad_norm": 0.06422706693410873, "learning_rate": 2.589867506516073e-05, "loss": 0.0325, "step": 8880 }, { "epoch": 1.9309296264118156, "grad_norm": 0.0035638187546283007, "learning_rate": 2.5871524761077325e-05, "loss": 0.0205, "step": 8890 }, { "epoch": 1.9331016507384882, "grad_norm": 0.1594795435667038, "learning_rate": 2.584437445699392e-05, "loss": 0.0606, "step": 8900 }, { "epoch": 1.9352736750651607, "grad_norm": 0.012897428125143051, "learning_rate": 2.5817224152910514e-05, "loss": 0.0529, "step": 8910 }, { "epoch": 1.9374456993918332, "grad_norm": 0.00483663659542799, "learning_rate": 2.5790073848827107e-05, "loss": 0.004, "step": 8920 }, { "epoch": 1.9396177237185057, "grad_norm": 0.004800234921276569, "learning_rate": 2.57629235447437e-05, "loss": 0.0017, "step": 8930 }, { "epoch": 1.9417897480451782, "grad_norm": 0.09830110520124435, "learning_rate": 2.57357732406603e-05, "loss": 0.0016, "step": 8940 }, { "epoch": 1.9439617723718505, "grad_norm": 0.0042363316752016544, "learning_rate": 2.5708622936576893e-05, "loss": 0.0442, "step": 8950 }, { "epoch": 1.946133796698523, "grad_norm": 0.014568965882062912, "learning_rate": 2.5681472632493486e-05, "loss": 0.0379, "step": 8960 }, { "epoch": 1.9483058210251953, "grad_norm": 0.010344590991735458, "learning_rate": 2.565432232841008e-05, "loss": 0.0207, "step": 8970 }, { "epoch": 1.9504778453518679, "grad_norm": 0.017466790974140167, "learning_rate": 2.5627172024326672e-05, "loss": 0.0023, "step": 8980 }, { "epoch": 1.9526498696785404, "grad_norm": 0.027372797951102257, "learning_rate": 2.560002172024327e-05, "loss": 0.0014, "step": 8990 }, { "epoch": 1.954821894005213, "grad_norm": 0.049418918788433075, "learning_rate": 2.557287141615986e-05, "loss": 0.0149, "step": 9000 }, { "epoch": 1.9569939183318854, "grad_norm": 0.002828997327014804, "learning_rate": 2.5545721112076454e-05, "loss": 0.0004, "step": 9010 }, { "epoch": 1.959165942658558, "grad_norm": 0.0028039535973221064, "learning_rate": 2.5518570807993054e-05, "loss": 0.0168, "step": 9020 }, { "epoch": 1.9613379669852302, "grad_norm": 0.01572626270353794, "learning_rate": 2.5491420503909647e-05, "loss": 0.0564, "step": 9030 }, { "epoch": 1.9635099913119027, "grad_norm": 0.08528812974691391, "learning_rate": 2.546427019982624e-05, "loss": 0.0128, "step": 9040 }, { "epoch": 1.965682015638575, "grad_norm": 0.018535811454057693, "learning_rate": 2.5437119895742833e-05, "loss": 0.0102, "step": 9050 }, { "epoch": 1.9678540399652475, "grad_norm": 0.007433717139065266, "learning_rate": 2.5409969591659426e-05, "loss": 0.0039, "step": 9060 }, { "epoch": 1.97002606429192, "grad_norm": 0.0035428928676992655, "learning_rate": 2.538281928757602e-05, "loss": 0.0226, "step": 9070 }, { "epoch": 1.9721980886185926, "grad_norm": 0.011540411040186882, "learning_rate": 2.5355668983492616e-05, "loss": 0.0093, "step": 9080 }, { "epoch": 1.974370112945265, "grad_norm": 1.2338663339614868, "learning_rate": 2.5328518679409212e-05, "loss": 0.0123, "step": 9090 }, { "epoch": 1.9765421372719374, "grad_norm": 0.003352933330461383, "learning_rate": 2.5301368375325805e-05, "loss": 0.0008, "step": 9100 }, { "epoch": 1.97871416159861, "grad_norm": 0.0044760508462786674, "learning_rate": 2.52742180712424e-05, "loss": 0.0374, "step": 9110 }, { "epoch": 1.9808861859252822, "grad_norm": 0.0031131410505622625, "learning_rate": 2.5247067767158995e-05, "loss": 0.0009, "step": 9120 }, { "epoch": 1.9830582102519547, "grad_norm": 0.1342082917690277, "learning_rate": 2.5219917463075588e-05, "loss": 0.0052, "step": 9130 }, { "epoch": 1.9852302345786272, "grad_norm": 0.005118147935718298, "learning_rate": 2.519276715899218e-05, "loss": 0.0006, "step": 9140 }, { "epoch": 1.9874022589052998, "grad_norm": 0.0030676417518407106, "learning_rate": 2.5165616854908774e-05, "loss": 0.0006, "step": 9150 }, { "epoch": 1.9895742832319723, "grad_norm": 0.005789658520370722, "learning_rate": 2.5138466550825367e-05, "loss": 0.0044, "step": 9160 }, { "epoch": 1.9917463075586448, "grad_norm": 0.025706937536597252, "learning_rate": 2.5111316246741966e-05, "loss": 0.0048, "step": 9170 }, { "epoch": 1.993918331885317, "grad_norm": 0.002730116480961442, "learning_rate": 2.508416594265856e-05, "loss": 0.0008, "step": 9180 }, { "epoch": 1.9960903562119896, "grad_norm": 0.0026551971677690744, "learning_rate": 2.5057015638575156e-05, "loss": 0.001, "step": 9190 }, { "epoch": 1.998262380538662, "grad_norm": 0.0026213659439235926, "learning_rate": 2.502986533449175e-05, "loss": 0.0006, "step": 9200 }, { "epoch": 2.0, "eval_f1": 0.4260869565217391, "eval_loss": 0.0783366709947586, "eval_runtime": 82.7104, "eval_samples_per_second": 120.601, "eval_steps_per_second": 7.544, "step": 9208 }, { "epoch": 2.0004344048653344, "grad_norm": 0.0034962480422109365, "learning_rate": 2.5002715030408342e-05, "loss": 0.0416, "step": 9210 }, { "epoch": 2.002606429192007, "grad_norm": 0.0033830376341938972, "learning_rate": 2.4975564726324935e-05, "loss": 0.0013, "step": 9220 }, { "epoch": 2.0047784535186794, "grad_norm": 0.020555773749947548, "learning_rate": 2.494841442224153e-05, "loss": 0.0007, "step": 9230 }, { "epoch": 2.006950477845352, "grad_norm": 0.00492148706689477, "learning_rate": 2.4921264118158124e-05, "loss": 0.0037, "step": 9240 }, { "epoch": 2.0091225021720245, "grad_norm": 0.06383500248193741, "learning_rate": 2.4894113814074717e-05, "loss": 0.0013, "step": 9250 }, { "epoch": 2.011294526498697, "grad_norm": 0.007955756969749928, "learning_rate": 2.486696350999131e-05, "loss": 0.0005, "step": 9260 }, { "epoch": 2.013466550825369, "grad_norm": 0.002853860380128026, "learning_rate": 2.4839813205907907e-05, "loss": 0.0223, "step": 9270 }, { "epoch": 2.0156385751520416, "grad_norm": 0.0032983573619276285, "learning_rate": 2.4812662901824503e-05, "loss": 0.0005, "step": 9280 }, { "epoch": 2.017810599478714, "grad_norm": 0.015170352533459663, "learning_rate": 2.4785512597741096e-05, "loss": 0.0023, "step": 9290 }, { "epoch": 2.0199826238053866, "grad_norm": 0.014421436935663223, "learning_rate": 2.4758362293657693e-05, "loss": 0.001, "step": 9300 }, { "epoch": 2.022154648132059, "grad_norm": 0.7703703045845032, "learning_rate": 2.4731211989574286e-05, "loss": 0.0185, "step": 9310 }, { "epoch": 2.0243266724587317, "grad_norm": 0.003181320382282138, "learning_rate": 2.470406168549088e-05, "loss": 0.0006, "step": 9320 }, { "epoch": 2.026498696785404, "grad_norm": 0.004758354276418686, "learning_rate": 2.467691138140747e-05, "loss": 0.0023, "step": 9330 }, { "epoch": 2.0286707211120762, "grad_norm": 0.02517046220600605, "learning_rate": 2.4649761077324068e-05, "loss": 0.0263, "step": 9340 }, { "epoch": 2.0308427454387488, "grad_norm": 0.012879346497356892, "learning_rate": 2.462261077324066e-05, "loss": 0.0165, "step": 9350 }, { "epoch": 2.0330147697654213, "grad_norm": 0.06726440042257309, "learning_rate": 2.4595460469157254e-05, "loss": 0.0007, "step": 9360 }, { "epoch": 2.035186794092094, "grad_norm": 0.0029317401349544525, "learning_rate": 2.456831016507385e-05, "loss": 0.0176, "step": 9370 }, { "epoch": 2.0373588184187663, "grad_norm": 0.0026387099642306566, "learning_rate": 2.4541159860990447e-05, "loss": 0.001, "step": 9380 }, { "epoch": 2.039530842745439, "grad_norm": 12.957719802856445, "learning_rate": 2.451400955690704e-05, "loss": 0.0049, "step": 9390 }, { "epoch": 2.0417028670721113, "grad_norm": 0.012641023844480515, "learning_rate": 2.4486859252823633e-05, "loss": 0.0017, "step": 9400 }, { "epoch": 2.043874891398784, "grad_norm": 0.004745169542729855, "learning_rate": 2.4459708948740226e-05, "loss": 0.0157, "step": 9410 }, { "epoch": 2.046046915725456, "grad_norm": 0.008116445504128933, "learning_rate": 2.4432558644656822e-05, "loss": 0.0014, "step": 9420 }, { "epoch": 2.0482189400521285, "grad_norm": 0.005568632390350103, "learning_rate": 2.4405408340573415e-05, "loss": 0.0014, "step": 9430 }, { "epoch": 2.050390964378801, "grad_norm": 0.01662755198776722, "learning_rate": 2.437825803649001e-05, "loss": 0.0308, "step": 9440 }, { "epoch": 2.0525629887054735, "grad_norm": 0.46532508730888367, "learning_rate": 2.43511077324066e-05, "loss": 0.0008, "step": 9450 }, { "epoch": 2.054735013032146, "grad_norm": 0.05777544528245926, "learning_rate": 2.4323957428323198e-05, "loss": 0.0006, "step": 9460 }, { "epoch": 2.0569070373588185, "grad_norm": 0.003521420992910862, "learning_rate": 2.4296807124239794e-05, "loss": 0.0006, "step": 9470 }, { "epoch": 2.059079061685491, "grad_norm": 0.0035868764389306307, "learning_rate": 2.4269656820156387e-05, "loss": 0.0012, "step": 9480 }, { "epoch": 2.061251086012163, "grad_norm": 0.002437378978356719, "learning_rate": 2.4242506516072984e-05, "loss": 0.0023, "step": 9490 }, { "epoch": 2.0634231103388356, "grad_norm": 0.0028121236246079206, "learning_rate": 2.4215356211989577e-05, "loss": 0.0259, "step": 9500 }, { "epoch": 2.065595134665508, "grad_norm": 0.0023697256110608578, "learning_rate": 2.418820590790617e-05, "loss": 0.0139, "step": 9510 }, { "epoch": 2.0677671589921807, "grad_norm": 0.003545396961271763, "learning_rate": 2.4161055603822763e-05, "loss": 0.0173, "step": 9520 }, { "epoch": 2.069939183318853, "grad_norm": 0.0023652813397347927, "learning_rate": 2.413390529973936e-05, "loss": 0.0002, "step": 9530 }, { "epoch": 2.0721112076455257, "grad_norm": 0.0023803082294762135, "learning_rate": 2.4106754995655952e-05, "loss": 0.0003, "step": 9540 }, { "epoch": 2.074283231972198, "grad_norm": 3.721370220184326, "learning_rate": 2.4079604691572545e-05, "loss": 0.0289, "step": 9550 }, { "epoch": 2.0764552562988707, "grad_norm": 0.0023493345361202955, "learning_rate": 2.405245438748914e-05, "loss": 0.0017, "step": 9560 }, { "epoch": 2.078627280625543, "grad_norm": 0.0024111897218972445, "learning_rate": 2.4025304083405738e-05, "loss": 0.0007, "step": 9570 }, { "epoch": 2.0807993049522153, "grad_norm": 0.00238398858346045, "learning_rate": 2.399815377932233e-05, "loss": 0.0007, "step": 9580 }, { "epoch": 2.082971329278888, "grad_norm": 0.0024116358254104853, "learning_rate": 2.3971003475238924e-05, "loss": 0.0009, "step": 9590 }, { "epoch": 2.0851433536055604, "grad_norm": 0.0063408599235117435, "learning_rate": 2.3943853171155517e-05, "loss": 0.0205, "step": 9600 }, { "epoch": 2.087315377932233, "grad_norm": 0.21969419717788696, "learning_rate": 2.3916702867072113e-05, "loss": 0.0012, "step": 9610 }, { "epoch": 2.0894874022589054, "grad_norm": 0.003895159810781479, "learning_rate": 2.3889552562988706e-05, "loss": 0.0006, "step": 9620 }, { "epoch": 2.091659426585578, "grad_norm": 0.005609441548585892, "learning_rate": 2.386511728931364e-05, "loss": 0.0084, "step": 9630 }, { "epoch": 2.0938314509122504, "grad_norm": 0.002843148773536086, "learning_rate": 2.3837966985230237e-05, "loss": 0.0007, "step": 9640 }, { "epoch": 2.0960034752389225, "grad_norm": 0.0022862793412059546, "learning_rate": 2.381081668114683e-05, "loss": 0.0018, "step": 9650 }, { "epoch": 2.098175499565595, "grad_norm": 0.007815422490239143, "learning_rate": 2.3783666377063423e-05, "loss": 0.0011, "step": 9660 }, { "epoch": 2.1003475238922675, "grad_norm": 0.0026114368811249733, "learning_rate": 2.375651607298002e-05, "loss": 0.0004, "step": 9670 }, { "epoch": 2.10251954821894, "grad_norm": 0.002247220603749156, "learning_rate": 2.3729365768896612e-05, "loss": 0.0276, "step": 9680 }, { "epoch": 2.1046915725456126, "grad_norm": 0.0022397038992494345, "learning_rate": 2.370221546481321e-05, "loss": 0.0016, "step": 9690 }, { "epoch": 2.106863596872285, "grad_norm": 0.003445403417572379, "learning_rate": 2.36750651607298e-05, "loss": 0.0244, "step": 9700 }, { "epoch": 2.1090356211989576, "grad_norm": 0.002695605391636491, "learning_rate": 2.3647914856646394e-05, "loss": 0.0002, "step": 9710 }, { "epoch": 2.1112076455256297, "grad_norm": 0.00220641796477139, "learning_rate": 2.362076455256299e-05, "loss": 0.042, "step": 9720 }, { "epoch": 2.113379669852302, "grad_norm": 0.0028647775761783123, "learning_rate": 2.3593614248479584e-05, "loss": 0.0146, "step": 9730 }, { "epoch": 2.1155516941789747, "grad_norm": 0.00231398968026042, "learning_rate": 2.3566463944396177e-05, "loss": 0.0007, "step": 9740 }, { "epoch": 2.1177237185056472, "grad_norm": 0.003436851780861616, "learning_rate": 2.3539313640312773e-05, "loss": 0.0083, "step": 9750 }, { "epoch": 2.1198957428323197, "grad_norm": 0.01339609082788229, "learning_rate": 2.3512163336229366e-05, "loss": 0.0007, "step": 9760 }, { "epoch": 2.1220677671589923, "grad_norm": 0.035412754863500595, "learning_rate": 2.3485013032145963e-05, "loss": 0.0165, "step": 9770 }, { "epoch": 2.1242397914856648, "grad_norm": 0.012655205093324184, "learning_rate": 2.3457862728062556e-05, "loss": 0.0003, "step": 9780 }, { "epoch": 2.1264118158123373, "grad_norm": 0.10211930423974991, "learning_rate": 2.3430712423979152e-05, "loss": 0.0015, "step": 9790 }, { "epoch": 2.1285838401390094, "grad_norm": 0.11602195352315903, "learning_rate": 2.3403562119895745e-05, "loss": 0.0007, "step": 9800 }, { "epoch": 2.130755864465682, "grad_norm": 0.0032904541585594416, "learning_rate": 2.3376411815812338e-05, "loss": 0.0307, "step": 9810 }, { "epoch": 2.1329278887923544, "grad_norm": 0.0022344952449202538, "learning_rate": 2.334926151172893e-05, "loss": 0.0494, "step": 9820 }, { "epoch": 2.135099913119027, "grad_norm": 0.008065508678555489, "learning_rate": 2.3322111207645528e-05, "loss": 0.0079, "step": 9830 }, { "epoch": 2.1372719374456994, "grad_norm": 0.0043975287117064, "learning_rate": 2.329496090356212e-05, "loss": 0.0031, "step": 9840 }, { "epoch": 2.139443961772372, "grad_norm": 0.003410003613680601, "learning_rate": 2.3267810599478714e-05, "loss": 0.0006, "step": 9850 }, { "epoch": 2.1416159860990445, "grad_norm": 0.005292691756039858, "learning_rate": 2.324066029539531e-05, "loss": 0.0144, "step": 9860 }, { "epoch": 2.143788010425717, "grad_norm": 0.04600781202316284, "learning_rate": 2.3213509991311903e-05, "loss": 0.0011, "step": 9870 }, { "epoch": 2.145960034752389, "grad_norm": 0.06467035412788391, "learning_rate": 2.31863596872285e-05, "loss": 0.0013, "step": 9880 }, { "epoch": 2.1481320590790616, "grad_norm": 0.0022334170062094927, "learning_rate": 2.3159209383145092e-05, "loss": 0.0015, "step": 9890 }, { "epoch": 2.150304083405734, "grad_norm": 0.0021759674418717623, "learning_rate": 2.3132059079061685e-05, "loss": 0.0004, "step": 9900 }, { "epoch": 2.1524761077324066, "grad_norm": 0.002184486947953701, "learning_rate": 2.3104908774978282e-05, "loss": 0.0003, "step": 9910 }, { "epoch": 2.154648132059079, "grad_norm": 0.02917756699025631, "learning_rate": 2.3077758470894875e-05, "loss": 0.0203, "step": 9920 }, { "epoch": 2.1568201563857516, "grad_norm": 0.005450095981359482, "learning_rate": 2.3050608166811468e-05, "loss": 0.0004, "step": 9930 }, { "epoch": 2.158992180712424, "grad_norm": 0.002119156066328287, "learning_rate": 2.3023457862728064e-05, "loss": 0.0077, "step": 9940 }, { "epoch": 2.1611642050390962, "grad_norm": 0.0021557000000029802, "learning_rate": 2.2996307558644657e-05, "loss": 0.0003, "step": 9950 }, { "epoch": 2.1633362293657687, "grad_norm": 0.0028954967856407166, "learning_rate": 2.2969157254561254e-05, "loss": 0.0337, "step": 9960 }, { "epoch": 2.1655082536924413, "grad_norm": 0.0021395536605268717, "learning_rate": 2.2942006950477847e-05, "loss": 0.0006, "step": 9970 }, { "epoch": 2.167680278019114, "grad_norm": 0.005348121747374535, "learning_rate": 2.2914856646394443e-05, "loss": 0.0235, "step": 9980 }, { "epoch": 2.1698523023457863, "grad_norm": 0.05934173986315727, "learning_rate": 2.2887706342311036e-05, "loss": 0.0008, "step": 9990 }, { "epoch": 2.172024326672459, "grad_norm": 0.00287908548489213, "learning_rate": 2.286327106863597e-05, "loss": 0.026, "step": 10000 }, { "epoch": 2.1741963509991313, "grad_norm": 0.0036688735708594322, "learning_rate": 2.2836120764552566e-05, "loss": 0.0004, "step": 10010 }, { "epoch": 2.176368375325804, "grad_norm": 0.002232051221653819, "learning_rate": 2.280897046046916e-05, "loss": 0.0028, "step": 10020 }, { "epoch": 2.178540399652476, "grad_norm": 0.0030915099196135998, "learning_rate": 2.2781820156385752e-05, "loss": 0.0045, "step": 10030 }, { "epoch": 2.1807124239791484, "grad_norm": 0.019997352734208107, "learning_rate": 2.2754669852302345e-05, "loss": 0.0019, "step": 10040 }, { "epoch": 2.182884448305821, "grad_norm": 0.010031803511083126, "learning_rate": 2.2727519548218942e-05, "loss": 0.0034, "step": 10050 }, { "epoch": 2.1850564726324935, "grad_norm": 0.007232017815113068, "learning_rate": 2.2700369244135535e-05, "loss": 0.0005, "step": 10060 }, { "epoch": 2.187228496959166, "grad_norm": 0.0020421240478754044, "learning_rate": 2.2673218940052128e-05, "loss": 0.0002, "step": 10070 }, { "epoch": 2.1894005212858385, "grad_norm": 0.00201141694560647, "learning_rate": 2.2646068635968724e-05, "loss": 0.0003, "step": 10080 }, { "epoch": 2.191572545612511, "grad_norm": 0.0020108462776988745, "learning_rate": 2.261891833188532e-05, "loss": 0.0002, "step": 10090 }, { "epoch": 2.1937445699391835, "grad_norm": 0.010289808735251427, "learning_rate": 2.2591768027801914e-05, "loss": 0.0355, "step": 10100 }, { "epoch": 2.1959165942658556, "grad_norm": 0.0021487479098141193, "learning_rate": 2.2564617723718507e-05, "loss": 0.0006, "step": 10110 }, { "epoch": 2.198088618592528, "grad_norm": 0.0020929924212396145, "learning_rate": 2.25374674196351e-05, "loss": 0.0004, "step": 10120 }, { "epoch": 2.2002606429192006, "grad_norm": 0.0024244049564003944, "learning_rate": 2.2510317115551696e-05, "loss": 0.0004, "step": 10130 }, { "epoch": 2.202432667245873, "grad_norm": 0.007566337939351797, "learning_rate": 2.248316681146829e-05, "loss": 0.0008, "step": 10140 }, { "epoch": 2.2046046915725457, "grad_norm": 0.0022506555542349815, "learning_rate": 2.2456016507384882e-05, "loss": 0.0002, "step": 10150 }, { "epoch": 2.206776715899218, "grad_norm": 0.002210445236414671, "learning_rate": 2.2428866203301475e-05, "loss": 0.0006, "step": 10160 }, { "epoch": 2.2089487402258907, "grad_norm": 0.0020521217957139015, "learning_rate": 2.240171589921807e-05, "loss": 0.0005, "step": 10170 }, { "epoch": 2.211120764552563, "grad_norm": 0.015064552426338196, "learning_rate": 2.2374565595134668e-05, "loss": 0.0154, "step": 10180 }, { "epoch": 2.2132927888792353, "grad_norm": 0.0021721182856708765, "learning_rate": 2.234741529105126e-05, "loss": 0.0005, "step": 10190 }, { "epoch": 2.215464813205908, "grad_norm": 0.001961242873221636, "learning_rate": 2.2320264986967854e-05, "loss": 0.0004, "step": 10200 }, { "epoch": 2.2176368375325803, "grad_norm": 0.005401493050158024, "learning_rate": 2.229311468288445e-05, "loss": 0.0003, "step": 10210 }, { "epoch": 2.219808861859253, "grad_norm": 0.004179791547358036, "learning_rate": 2.2265964378801043e-05, "loss": 0.0005, "step": 10220 }, { "epoch": 2.2219808861859254, "grad_norm": 0.003323676297441125, "learning_rate": 2.2238814074717636e-05, "loss": 0.0003, "step": 10230 }, { "epoch": 2.224152910512598, "grad_norm": 0.01833084411919117, "learning_rate": 2.2211663770634233e-05, "loss": 0.0269, "step": 10240 }, { "epoch": 2.22632493483927, "grad_norm": 0.018702253699302673, "learning_rate": 2.2184513466550826e-05, "loss": 0.0007, "step": 10250 }, { "epoch": 2.2284969591659425, "grad_norm": 0.04174269735813141, "learning_rate": 2.215736316246742e-05, "loss": 0.0005, "step": 10260 }, { "epoch": 2.230668983492615, "grad_norm": 0.005739922169595957, "learning_rate": 2.2130212858384015e-05, "loss": 0.0023, "step": 10270 }, { "epoch": 2.2328410078192875, "grad_norm": 0.019641762599349022, "learning_rate": 2.210306255430061e-05, "loss": 0.0068, "step": 10280 }, { "epoch": 2.23501303214596, "grad_norm": 0.0034746015444397926, "learning_rate": 2.2075912250217205e-05, "loss": 0.0003, "step": 10290 }, { "epoch": 2.2371850564726325, "grad_norm": 0.008811332285404205, "learning_rate": 2.2048761946133798e-05, "loss": 0.0291, "step": 10300 }, { "epoch": 2.239357080799305, "grad_norm": 0.014716439880430698, "learning_rate": 2.202161164205039e-05, "loss": 0.0004, "step": 10310 }, { "epoch": 2.2415291051259776, "grad_norm": 0.013416060246527195, "learning_rate": 2.1994461337966987e-05, "loss": 0.001, "step": 10320 }, { "epoch": 2.24370112945265, "grad_norm": 0.0019183940021321177, "learning_rate": 2.196731103388358e-05, "loss": 0.0003, "step": 10330 }, { "epoch": 2.245873153779322, "grad_norm": 0.002004083478823304, "learning_rate": 2.1940160729800173e-05, "loss": 0.0004, "step": 10340 }, { "epoch": 2.2480451781059947, "grad_norm": 0.01202855259180069, "learning_rate": 2.191301042571677e-05, "loss": 0.0005, "step": 10350 }, { "epoch": 2.250217202432667, "grad_norm": 0.0019037555903196335, "learning_rate": 2.1885860121633363e-05, "loss": 0.0006, "step": 10360 }, { "epoch": 2.2523892267593397, "grad_norm": 0.0023449785076081753, "learning_rate": 2.185870981754996e-05, "loss": 0.0003, "step": 10370 }, { "epoch": 2.2545612510860122, "grad_norm": 0.007250937633216381, "learning_rate": 2.1831559513466552e-05, "loss": 0.0004, "step": 10380 }, { "epoch": 2.2567332754126848, "grad_norm": 0.0018250870052725077, "learning_rate": 2.1804409209383145e-05, "loss": 0.0004, "step": 10390 }, { "epoch": 2.2589052997393573, "grad_norm": 0.00181575957685709, "learning_rate": 2.177725890529974e-05, "loss": 0.0002, "step": 10400 }, { "epoch": 2.2610773240660293, "grad_norm": 0.0019296440295875072, "learning_rate": 2.1750108601216334e-05, "loss": 0.0102, "step": 10410 }, { "epoch": 2.263249348392702, "grad_norm": 0.008913841098546982, "learning_rate": 2.1722958297132927e-05, "loss": 0.0157, "step": 10420 }, { "epoch": 2.2654213727193744, "grad_norm": 0.001792456954717636, "learning_rate": 2.1695807993049524e-05, "loss": 0.0002, "step": 10430 }, { "epoch": 2.267593397046047, "grad_norm": 0.0018146372167393565, "learning_rate": 2.1668657688966117e-05, "loss": 0.0106, "step": 10440 }, { "epoch": 2.2697654213727194, "grad_norm": 0.53955078125, "learning_rate": 2.164150738488271e-05, "loss": 0.0019, "step": 10450 }, { "epoch": 2.271937445699392, "grad_norm": 0.0025254616048187017, "learning_rate": 2.1614357080799306e-05, "loss": 0.0327, "step": 10460 }, { "epoch": 2.2741094700260645, "grad_norm": 0.05029534921050072, "learning_rate": 2.1587206776715903e-05, "loss": 0.0236, "step": 10470 }, { "epoch": 2.2762814943527365, "grad_norm": 0.006896906066685915, "learning_rate": 2.1560056472632496e-05, "loss": 0.0014, "step": 10480 }, { "epoch": 2.278453518679409, "grad_norm": 0.007253032643347979, "learning_rate": 2.153290616854909e-05, "loss": 0.0004, "step": 10490 }, { "epoch": 2.2806255430060816, "grad_norm": 0.002225355012342334, "learning_rate": 2.1505755864465682e-05, "loss": 0.0007, "step": 10500 }, { "epoch": 2.282797567332754, "grad_norm": 0.0017743059433996677, "learning_rate": 2.1478605560382278e-05, "loss": 0.0295, "step": 10510 }, { "epoch": 2.2849695916594266, "grad_norm": 0.001814755261875689, "learning_rate": 2.145145525629887e-05, "loss": 0.0002, "step": 10520 }, { "epoch": 2.287141615986099, "grad_norm": 0.002265684073790908, "learning_rate": 2.1424304952215464e-05, "loss": 0.0332, "step": 10530 }, { "epoch": 2.2893136403127716, "grad_norm": 0.004274342674762011, "learning_rate": 2.139715464813206e-05, "loss": 0.0009, "step": 10540 }, { "epoch": 2.291485664639444, "grad_norm": 0.0074489060789346695, "learning_rate": 2.1370004344048654e-05, "loss": 0.0005, "step": 10550 }, { "epoch": 2.2936576889661167, "grad_norm": 0.0019445127109065652, "learning_rate": 2.134285403996525e-05, "loss": 0.0006, "step": 10560 }, { "epoch": 2.2958297132927887, "grad_norm": 0.0019618631340563297, "learning_rate": 2.1315703735881843e-05, "loss": 0.0421, "step": 10570 }, { "epoch": 2.2980017376194612, "grad_norm": 0.001958042150363326, "learning_rate": 2.1288553431798436e-05, "loss": 0.0002, "step": 10580 }, { "epoch": 2.3001737619461338, "grad_norm": 2.4066147804260254, "learning_rate": 2.1261403127715032e-05, "loss": 0.0734, "step": 10590 }, { "epoch": 2.3023457862728063, "grad_norm": 0.020740212872624397, "learning_rate": 2.1234252823631625e-05, "loss": 0.0007, "step": 10600 }, { "epoch": 2.304517810599479, "grad_norm": 0.20123733580112457, "learning_rate": 2.120710251954822e-05, "loss": 0.0017, "step": 10610 }, { "epoch": 2.3066898349261513, "grad_norm": 0.5305845141410828, "learning_rate": 2.1179952215464815e-05, "loss": 0.0378, "step": 10620 }, { "epoch": 2.308861859252824, "grad_norm": 0.007456798106431961, "learning_rate": 2.1152801911381408e-05, "loss": 0.0003, "step": 10630 }, { "epoch": 2.311033883579496, "grad_norm": 0.0020253192633390427, "learning_rate": 2.1125651607298004e-05, "loss": 0.0047, "step": 10640 }, { "epoch": 2.3132059079061684, "grad_norm": 0.0039650010876357555, "learning_rate": 2.1098501303214597e-05, "loss": 0.0023, "step": 10650 }, { "epoch": 2.315377932232841, "grad_norm": 0.015204093419015408, "learning_rate": 2.1071350999131194e-05, "loss": 0.0017, "step": 10660 }, { "epoch": 2.3175499565595135, "grad_norm": 0.001984816510230303, "learning_rate": 2.1044200695047787e-05, "loss": 0.0015, "step": 10670 }, { "epoch": 2.319721980886186, "grad_norm": 0.0019000971224159002, "learning_rate": 2.101705039096438e-05, "loss": 0.0012, "step": 10680 }, { "epoch": 2.3218940052128585, "grad_norm": 0.0021752913016825914, "learning_rate": 2.0989900086880973e-05, "loss": 0.0027, "step": 10690 }, { "epoch": 2.324066029539531, "grad_norm": 0.003734230063855648, "learning_rate": 2.096274978279757e-05, "loss": 0.0017, "step": 10700 }, { "epoch": 2.326238053866203, "grad_norm": 0.007717654574662447, "learning_rate": 2.0935599478714162e-05, "loss": 0.0165, "step": 10710 }, { "epoch": 2.3284100781928756, "grad_norm": 0.12917552888393402, "learning_rate": 2.0908449174630755e-05, "loss": 0.0012, "step": 10720 }, { "epoch": 2.330582102519548, "grad_norm": 0.0019150119042024016, "learning_rate": 2.088129887054735e-05, "loss": 0.005, "step": 10730 }, { "epoch": 2.3327541268462206, "grad_norm": 0.0018117213621735573, "learning_rate": 2.0854148566463945e-05, "loss": 0.0003, "step": 10740 }, { "epoch": 2.334926151172893, "grad_norm": 0.003375353990122676, "learning_rate": 2.082699826238054e-05, "loss": 0.0007, "step": 10750 }, { "epoch": 2.3370981754995657, "grad_norm": 0.00251543871127069, "learning_rate": 2.0799847958297134e-05, "loss": 0.0011, "step": 10760 }, { "epoch": 2.339270199826238, "grad_norm": 0.0018188374815508723, "learning_rate": 2.0772697654213727e-05, "loss": 0.0002, "step": 10770 }, { "epoch": 2.3414422241529107, "grad_norm": 0.0017927787266671658, "learning_rate": 2.0745547350130324e-05, "loss": 0.0113, "step": 10780 }, { "epoch": 2.343614248479583, "grad_norm": 0.15189605951309204, "learning_rate": 2.0718397046046917e-05, "loss": 0.0196, "step": 10790 }, { "epoch": 2.3457862728062553, "grad_norm": 0.0038990580942481756, "learning_rate": 2.069124674196351e-05, "loss": 0.0374, "step": 10800 }, { "epoch": 2.347958297132928, "grad_norm": 0.07599227130413055, "learning_rate": 2.0664096437880106e-05, "loss": 0.0011, "step": 10810 }, { "epoch": 2.3501303214596003, "grad_norm": 0.3914039731025696, "learning_rate": 2.06369461337967e-05, "loss": 0.0038, "step": 10820 }, { "epoch": 2.352302345786273, "grad_norm": 0.0017740110633894801, "learning_rate": 2.0609795829713295e-05, "loss": 0.0292, "step": 10830 }, { "epoch": 2.3544743701129454, "grad_norm": 0.047251638025045395, "learning_rate": 2.058264552562989e-05, "loss": 0.0012, "step": 10840 }, { "epoch": 2.356646394439618, "grad_norm": 0.0017566693713888526, "learning_rate": 2.0555495221546485e-05, "loss": 0.0008, "step": 10850 }, { "epoch": 2.3588184187662904, "grad_norm": 0.006062482949346304, "learning_rate": 2.0528344917463078e-05, "loss": 0.0021, "step": 10860 }, { "epoch": 2.3609904430929625, "grad_norm": 0.0026041665114462376, "learning_rate": 2.050119461337967e-05, "loss": 0.0009, "step": 10870 }, { "epoch": 2.363162467419635, "grad_norm": 0.0018312680767849088, "learning_rate": 2.0474044309296264e-05, "loss": 0.0007, "step": 10880 }, { "epoch": 2.3653344917463075, "grad_norm": 0.0016842596232891083, "learning_rate": 2.044689400521286e-05, "loss": 0.0002, "step": 10890 }, { "epoch": 2.36750651607298, "grad_norm": 9.34298038482666, "learning_rate": 2.0419743701129453e-05, "loss": 0.0413, "step": 10900 }, { "epoch": 2.3696785403996525, "grad_norm": 0.6906639337539673, "learning_rate": 2.0392593397046046e-05, "loss": 0.0139, "step": 10910 }, { "epoch": 2.371850564726325, "grad_norm": 0.019118599593639374, "learning_rate": 2.0365443092962643e-05, "loss": 0.0012, "step": 10920 }, { "epoch": 2.3740225890529976, "grad_norm": 0.0017927911831066012, "learning_rate": 2.0338292788879236e-05, "loss": 0.0018, "step": 10930 }, { "epoch": 2.3761946133796696, "grad_norm": 0.0017185697797685862, "learning_rate": 2.0311142484795832e-05, "loss": 0.0003, "step": 10940 }, { "epoch": 2.378366637706342, "grad_norm": 0.0017069017048925161, "learning_rate": 2.0283992180712425e-05, "loss": 0.0005, "step": 10950 }, { "epoch": 2.3805386620330147, "grad_norm": 0.0016620549140498042, "learning_rate": 2.0256841876629018e-05, "loss": 0.0006, "step": 10960 }, { "epoch": 2.382710686359687, "grad_norm": 0.013684243895113468, "learning_rate": 2.0229691572545615e-05, "loss": 0.0007, "step": 10970 }, { "epoch": 2.3848827106863597, "grad_norm": 0.013956604525446892, "learning_rate": 2.0202541268462208e-05, "loss": 0.0016, "step": 10980 }, { "epoch": 2.3870547350130322, "grad_norm": 0.003464424517005682, "learning_rate": 2.01753909643788e-05, "loss": 0.0065, "step": 10990 }, { "epoch": 2.3892267593397047, "grad_norm": 0.0017165833851322532, "learning_rate": 2.0148240660295394e-05, "loss": 0.0028, "step": 11000 }, { "epoch": 2.391398783666377, "grad_norm": 0.0019415366696193814, "learning_rate": 2.012109035621199e-05, "loss": 0.0002, "step": 11010 }, { "epoch": 2.3935708079930493, "grad_norm": 0.022299442440271378, "learning_rate": 2.0093940052128586e-05, "loss": 0.0004, "step": 11020 }, { "epoch": 2.395742832319722, "grad_norm": 0.0016049507539719343, "learning_rate": 2.006678974804518e-05, "loss": 0.0002, "step": 11030 }, { "epoch": 2.3979148566463944, "grad_norm": 0.012664725072681904, "learning_rate": 2.0039639443961776e-05, "loss": 0.0019, "step": 11040 }, { "epoch": 2.400086880973067, "grad_norm": 0.010130131617188454, "learning_rate": 2.001248913987837e-05, "loss": 0.0004, "step": 11050 }, { "epoch": 2.4022589052997394, "grad_norm": 0.049954500049352646, "learning_rate": 1.9985338835794962e-05, "loss": 0.003, "step": 11060 }, { "epoch": 2.404430929626412, "grad_norm": 0.01277677807956934, "learning_rate": 1.9958188531711555e-05, "loss": 0.0014, "step": 11070 }, { "epoch": 2.4066029539530844, "grad_norm": 0.0015594464493915439, "learning_rate": 1.993103822762815e-05, "loss": 0.0002, "step": 11080 }, { "epoch": 2.408774978279757, "grad_norm": 0.0015805740840733051, "learning_rate": 1.9903887923544744e-05, "loss": 0.0005, "step": 11090 }, { "epoch": 2.410947002606429, "grad_norm": 0.001530050183646381, "learning_rate": 1.9876737619461337e-05, "loss": 0.0002, "step": 11100 }, { "epoch": 2.4131190269331015, "grad_norm": 0.001565085374750197, "learning_rate": 1.9849587315377934e-05, "loss": 0.0485, "step": 11110 }, { "epoch": 2.415291051259774, "grad_norm": 0.0015722399111837149, "learning_rate": 1.982243701129453e-05, "loss": 0.0003, "step": 11120 }, { "epoch": 2.4174630755864466, "grad_norm": 0.019924765452742577, "learning_rate": 1.9795286707211123e-05, "loss": 0.015, "step": 11130 }, { "epoch": 2.419635099913119, "grad_norm": 0.001612838706932962, "learning_rate": 1.9768136403127716e-05, "loss": 0.0002, "step": 11140 }, { "epoch": 2.4218071242397916, "grad_norm": 0.00153868249617517, "learning_rate": 1.974098609904431e-05, "loss": 0.0002, "step": 11150 }, { "epoch": 2.423979148566464, "grad_norm": 2.775703191757202, "learning_rate": 1.9713835794960906e-05, "loss": 0.046, "step": 11160 }, { "epoch": 2.426151172893136, "grad_norm": 0.012296248227357864, "learning_rate": 1.96866854908775e-05, "loss": 0.0005, "step": 11170 }, { "epoch": 2.4283231972198087, "grad_norm": 0.0023132723290473223, "learning_rate": 1.965953518679409e-05, "loss": 0.0003, "step": 11180 }, { "epoch": 2.4304952215464812, "grad_norm": 0.0025435942225158215, "learning_rate": 1.9632384882710685e-05, "loss": 0.0003, "step": 11190 }, { "epoch": 2.4326672458731537, "grad_norm": 0.0021016327664256096, "learning_rate": 1.960523457862728e-05, "loss": 0.0177, "step": 11200 }, { "epoch": 2.4348392701998263, "grad_norm": 0.002213849686086178, "learning_rate": 1.9578084274543877e-05, "loss": 0.0006, "step": 11210 }, { "epoch": 2.437011294526499, "grad_norm": 0.0018159413011744618, "learning_rate": 1.955093397046047e-05, "loss": 0.0002, "step": 11220 }, { "epoch": 2.4391833188531713, "grad_norm": 0.001797963515855372, "learning_rate": 1.9523783666377067e-05, "loss": 0.0005, "step": 11230 }, { "epoch": 2.4413553431798434, "grad_norm": 0.0016879733884707093, "learning_rate": 1.949663336229366e-05, "loss": 0.0002, "step": 11240 }, { "epoch": 2.443527367506516, "grad_norm": 0.0022847780492156744, "learning_rate": 1.9469483058210253e-05, "loss": 0.0244, "step": 11250 }, { "epoch": 2.4456993918331884, "grad_norm": 0.001833285903558135, "learning_rate": 1.9442332754126846e-05, "loss": 0.019, "step": 11260 }, { "epoch": 2.447871416159861, "grad_norm": 0.04164701700210571, "learning_rate": 1.9415182450043442e-05, "loss": 0.0115, "step": 11270 }, { "epoch": 2.4500434404865334, "grad_norm": 0.001846460741944611, "learning_rate": 1.9388032145960035e-05, "loss": 0.0224, "step": 11280 }, { "epoch": 2.452215464813206, "grad_norm": 0.012611893005669117, "learning_rate": 1.936088184187663e-05, "loss": 0.0024, "step": 11290 }, { "epoch": 2.4543874891398785, "grad_norm": 0.0033035138621926308, "learning_rate": 1.9333731537793225e-05, "loss": 0.0013, "step": 11300 }, { "epoch": 2.456559513466551, "grad_norm": 0.0016401956090703607, "learning_rate": 1.930658123370982e-05, "loss": 0.0008, "step": 11310 }, { "epoch": 2.4587315377932235, "grad_norm": 0.046950288116931915, "learning_rate": 1.9279430929626414e-05, "loss": 0.0022, "step": 11320 }, { "epoch": 2.4609035621198956, "grad_norm": 0.004596000071614981, "learning_rate": 1.9252280625543007e-05, "loss": 0.0018, "step": 11330 }, { "epoch": 2.463075586446568, "grad_norm": 0.0016806930070742965, "learning_rate": 1.92251303214596e-05, "loss": 0.0022, "step": 11340 }, { "epoch": 2.4652476107732406, "grad_norm": 0.001617814414203167, "learning_rate": 1.9197980017376197e-05, "loss": 0.0022, "step": 11350 }, { "epoch": 2.467419635099913, "grad_norm": 0.004318607039749622, "learning_rate": 1.917082971329279e-05, "loss": 0.0017, "step": 11360 }, { "epoch": 2.4695916594265857, "grad_norm": 0.0050076707266271114, "learning_rate": 1.9143679409209383e-05, "loss": 0.0012, "step": 11370 }, { "epoch": 2.471763683753258, "grad_norm": 0.21344320476055145, "learning_rate": 1.9116529105125976e-05, "loss": 0.0012, "step": 11380 }, { "epoch": 2.4739357080799307, "grad_norm": 0.0015156455338001251, "learning_rate": 1.9089378801042572e-05, "loss": 0.0006, "step": 11390 }, { "epoch": 2.4761077324066028, "grad_norm": 0.02697795256972313, "learning_rate": 1.906222849695917e-05, "loss": 0.0007, "step": 11400 }, { "epoch": 2.4782797567332753, "grad_norm": 0.00153002655133605, "learning_rate": 1.903507819287576e-05, "loss": 0.0007, "step": 11410 }, { "epoch": 2.480451781059948, "grad_norm": 0.0015049789799377322, "learning_rate": 1.9007927888792358e-05, "loss": 0.0015, "step": 11420 }, { "epoch": 2.4826238053866203, "grad_norm": 0.024290762841701508, "learning_rate": 1.898077758470895e-05, "loss": 0.002, "step": 11430 }, { "epoch": 2.484795829713293, "grad_norm": 0.0015076440759003162, "learning_rate": 1.8953627280625544e-05, "loss": 0.0003, "step": 11440 }, { "epoch": 2.4869678540399653, "grad_norm": 0.0014670953387394547, "learning_rate": 1.8926476976542137e-05, "loss": 0.0365, "step": 11450 }, { "epoch": 2.489139878366638, "grad_norm": 0.0019197690999135375, "learning_rate": 1.8899326672458733e-05, "loss": 0.0003, "step": 11460 }, { "epoch": 2.49131190269331, "grad_norm": 0.014209013432264328, "learning_rate": 1.8872176368375326e-05, "loss": 0.0036, "step": 11470 }, { "epoch": 2.4934839270199824, "grad_norm": 0.0019741549622267485, "learning_rate": 1.884502606429192e-05, "loss": 0.014, "step": 11480 }, { "epoch": 2.495655951346655, "grad_norm": 0.0015247397823259234, "learning_rate": 1.8817875760208516e-05, "loss": 0.0006, "step": 11490 }, { "epoch": 2.4978279756733275, "grad_norm": 0.0014463032130151987, "learning_rate": 1.8790725456125112e-05, "loss": 0.0004, "step": 11500 }, { "epoch": 2.5, "grad_norm": 7.1138739585876465, "learning_rate": 1.8763575152041705e-05, "loss": 0.0264, "step": 11510 }, { "epoch": 2.5021720243266725, "grad_norm": 0.009552753530442715, "learning_rate": 1.8736424847958298e-05, "loss": 0.0139, "step": 11520 }, { "epoch": 2.504344048653345, "grad_norm": 0.014069234021008015, "learning_rate": 1.870927454387489e-05, "loss": 0.0005, "step": 11530 }, { "epoch": 2.506516072980017, "grad_norm": 0.23029930889606476, "learning_rate": 1.8682124239791488e-05, "loss": 0.001, "step": 11540 }, { "epoch": 2.50868809730669, "grad_norm": 0.006573742721229792, "learning_rate": 1.865497393570808e-05, "loss": 0.0013, "step": 11550 }, { "epoch": 2.510860121633362, "grad_norm": 0.0014153916854411364, "learning_rate": 1.8627823631624674e-05, "loss": 0.0011, "step": 11560 }, { "epoch": 2.5130321459600347, "grad_norm": 0.0014535776572301984, "learning_rate": 1.8600673327541267e-05, "loss": 0.0344, "step": 11570 }, { "epoch": 2.515204170286707, "grad_norm": 0.002279409673064947, "learning_rate": 1.8573523023457863e-05, "loss": 0.008, "step": 11580 }, { "epoch": 2.5173761946133797, "grad_norm": 0.01143862958997488, "learning_rate": 1.854637271937446e-05, "loss": 0.004, "step": 11590 }, { "epoch": 2.519548218940052, "grad_norm": 0.0021647445391863585, "learning_rate": 1.8519222415291053e-05, "loss": 0.0281, "step": 11600 }, { "epoch": 2.5217202432667247, "grad_norm": 0.011500898748636246, "learning_rate": 1.8492072111207646e-05, "loss": 0.051, "step": 11610 }, { "epoch": 2.5238922675933972, "grad_norm": 0.07578958570957184, "learning_rate": 1.8464921807124242e-05, "loss": 0.0236, "step": 11620 }, { "epoch": 2.5260642919200693, "grad_norm": 0.002254678402096033, "learning_rate": 1.8437771503040835e-05, "loss": 0.0007, "step": 11630 }, { "epoch": 2.528236316246742, "grad_norm": 0.009960656054317951, "learning_rate": 1.8410621198957428e-05, "loss": 0.0019, "step": 11640 }, { "epoch": 2.5304083405734143, "grad_norm": 4.388426303863525, "learning_rate": 1.8383470894874024e-05, "loss": 0.0246, "step": 11650 }, { "epoch": 2.532580364900087, "grad_norm": 0.002279053907841444, "learning_rate": 1.8356320590790617e-05, "loss": 0.0014, "step": 11660 }, { "epoch": 2.5347523892267594, "grad_norm": 0.009489334188401699, "learning_rate": 1.832917028670721e-05, "loss": 0.0121, "step": 11670 }, { "epoch": 2.536924413553432, "grad_norm": 0.0027339665684849024, "learning_rate": 1.8302019982623807e-05, "loss": 0.0015, "step": 11680 }, { "epoch": 2.5390964378801044, "grad_norm": 0.003309717169031501, "learning_rate": 1.8274869678540403e-05, "loss": 0.0003, "step": 11690 }, { "epoch": 2.5412684622067765, "grad_norm": 0.0023564095608890057, "learning_rate": 1.8247719374456996e-05, "loss": 0.0009, "step": 11700 }, { "epoch": 2.5434404865334495, "grad_norm": 0.0017603716114535928, "learning_rate": 1.822056907037359e-05, "loss": 0.0005, "step": 11710 }, { "epoch": 2.5456125108601215, "grad_norm": 0.0017267238581553102, "learning_rate": 1.8193418766290182e-05, "loss": 0.0005, "step": 11720 }, { "epoch": 2.547784535186794, "grad_norm": 0.0017208755016326904, "learning_rate": 1.816626846220678e-05, "loss": 0.0004, "step": 11730 }, { "epoch": 2.5499565595134666, "grad_norm": 0.0018419224070385098, "learning_rate": 1.8139118158123372e-05, "loss": 0.0004, "step": 11740 }, { "epoch": 2.552128583840139, "grad_norm": 0.003102607326582074, "learning_rate": 1.8111967854039965e-05, "loss": 0.0292, "step": 11750 }, { "epoch": 2.5543006081668116, "grad_norm": 0.0016968741547316313, "learning_rate": 1.8084817549956558e-05, "loss": 0.0006, "step": 11760 }, { "epoch": 2.5564726324934837, "grad_norm": 0.016231173649430275, "learning_rate": 1.8057667245873154e-05, "loss": 0.001, "step": 11770 }, { "epoch": 2.5586446568201566, "grad_norm": 0.00396195612847805, "learning_rate": 1.803051694178975e-05, "loss": 0.0004, "step": 11780 }, { "epoch": 2.5608166811468287, "grad_norm": 0.017558127641677856, "learning_rate": 1.8003366637706344e-05, "loss": 0.0363, "step": 11790 }, { "epoch": 2.562988705473501, "grad_norm": 0.032345548272132874, "learning_rate": 1.7976216333622937e-05, "loss": 0.0136, "step": 11800 }, { "epoch": 2.5651607298001737, "grad_norm": 0.040637094527482986, "learning_rate": 1.7949066029539533e-05, "loss": 0.0043, "step": 11810 }, { "epoch": 2.5673327541268463, "grad_norm": 0.10168937593698502, "learning_rate": 1.7921915725456126e-05, "loss": 0.0263, "step": 11820 }, { "epoch": 2.5695047784535188, "grad_norm": 0.0077156987972557545, "learning_rate": 1.789476542137272e-05, "loss": 0.0072, "step": 11830 }, { "epoch": 2.5716768027801913, "grad_norm": 0.003105215495452285, "learning_rate": 1.7867615117289315e-05, "loss": 0.0009, "step": 11840 }, { "epoch": 2.573848827106864, "grad_norm": 0.0035061310045421124, "learning_rate": 1.784046481320591e-05, "loss": 0.0005, "step": 11850 }, { "epoch": 2.576020851433536, "grad_norm": 0.0113242631778121, "learning_rate": 1.78133145091225e-05, "loss": 0.0259, "step": 11860 }, { "epoch": 2.5781928757602084, "grad_norm": 0.0015477265696972609, "learning_rate": 1.7786164205039098e-05, "loss": 0.0239, "step": 11870 }, { "epoch": 2.580364900086881, "grad_norm": 0.004106955602765083, "learning_rate": 1.7759013900955694e-05, "loss": 0.001, "step": 11880 }, { "epoch": 2.5825369244135534, "grad_norm": 0.0014083506539463997, "learning_rate": 1.7731863596872287e-05, "loss": 0.0003, "step": 11890 }, { "epoch": 2.584708948740226, "grad_norm": 0.0013759738067165017, "learning_rate": 1.770471329278888e-05, "loss": 0.0057, "step": 11900 }, { "epoch": 2.5868809730668985, "grad_norm": 0.0014395932666957378, "learning_rate": 1.7677562988705473e-05, "loss": 0.0244, "step": 11910 }, { "epoch": 2.589052997393571, "grad_norm": 0.018697045743465424, "learning_rate": 1.765041268462207e-05, "loss": 0.0114, "step": 11920 }, { "epoch": 2.591225021720243, "grad_norm": 0.008916974999010563, "learning_rate": 1.7623262380538663e-05, "loss": 0.0015, "step": 11930 }, { "epoch": 2.5933970460469156, "grad_norm": 0.06151333078742027, "learning_rate": 1.7596112076455256e-05, "loss": 0.0012, "step": 11940 }, { "epoch": 2.595569070373588, "grad_norm": 0.011603694409132004, "learning_rate": 1.756896177237185e-05, "loss": 0.0009, "step": 11950 }, { "epoch": 2.5977410947002606, "grad_norm": 0.0036000856198370457, "learning_rate": 1.7541811468288445e-05, "loss": 0.0002, "step": 11960 }, { "epoch": 2.599913119026933, "grad_norm": 0.008364981971681118, "learning_rate": 1.751466116420504e-05, "loss": 0.0063, "step": 11970 }, { "epoch": 2.6020851433536056, "grad_norm": 0.0018292396562173963, "learning_rate": 1.7487510860121635e-05, "loss": 0.0014, "step": 11980 }, { "epoch": 2.604257167680278, "grad_norm": 0.0016355229308828712, "learning_rate": 1.7460360556038228e-05, "loss": 0.0004, "step": 11990 }, { "epoch": 2.6064291920069502, "grad_norm": 0.035449955612421036, "learning_rate": 1.7433210251954824e-05, "loss": 0.0015, "step": 12000 }, { "epoch": 2.608601216333623, "grad_norm": 0.0014802763471379876, "learning_rate": 1.7406059947871417e-05, "loss": 0.0003, "step": 12010 }, { "epoch": 2.6107732406602953, "grad_norm": 0.0014913080958649516, "learning_rate": 1.737890964378801e-05, "loss": 0.0003, "step": 12020 }, { "epoch": 2.6129452649869678, "grad_norm": 0.001743799657560885, "learning_rate": 1.7351759339704607e-05, "loss": 0.0006, "step": 12030 }, { "epoch": 2.6151172893136403, "grad_norm": 0.001439134357497096, "learning_rate": 1.73246090356212e-05, "loss": 0.0002, "step": 12040 }, { "epoch": 2.617289313640313, "grad_norm": 0.0013170883757993579, "learning_rate": 1.7297458731537793e-05, "loss": 0.0006, "step": 12050 }, { "epoch": 2.6194613379669853, "grad_norm": 0.0016408158699050546, "learning_rate": 1.727030842745439e-05, "loss": 0.0004, "step": 12060 }, { "epoch": 2.621633362293658, "grad_norm": 0.006257723551243544, "learning_rate": 1.7243158123370985e-05, "loss": 0.0002, "step": 12070 }, { "epoch": 2.6238053866203304, "grad_norm": 0.0013448239769786596, "learning_rate": 1.721600781928758e-05, "loss": 0.0003, "step": 12080 }, { "epoch": 2.6259774109470024, "grad_norm": 0.0014338934561237693, "learning_rate": 1.718885751520417e-05, "loss": 0.0002, "step": 12090 }, { "epoch": 2.628149435273675, "grad_norm": 0.0013018847675994039, "learning_rate": 1.7161707211120764e-05, "loss": 0.0002, "step": 12100 }, { "epoch": 2.6303214596003475, "grad_norm": 0.0013456381857395172, "learning_rate": 1.713455690703736e-05, "loss": 0.0003, "step": 12110 }, { "epoch": 2.63249348392702, "grad_norm": 0.0014625930925831199, "learning_rate": 1.7107406602953954e-05, "loss": 0.0076, "step": 12120 }, { "epoch": 2.6346655082536925, "grad_norm": 0.00126743467990309, "learning_rate": 1.7080256298870547e-05, "loss": 0.0005, "step": 12130 }, { "epoch": 2.636837532580365, "grad_norm": 0.0012669408461079001, "learning_rate": 1.705310599478714e-05, "loss": 0.0004, "step": 12140 }, { "epoch": 2.6390095569070375, "grad_norm": 0.001318922615610063, "learning_rate": 1.7025955690703736e-05, "loss": 0.0188, "step": 12150 }, { "epoch": 2.6411815812337096, "grad_norm": 0.09571494907140732, "learning_rate": 1.6998805386620333e-05, "loss": 0.0244, "step": 12160 }, { "epoch": 2.643353605560382, "grad_norm": 0.024946704506874084, "learning_rate": 1.6971655082536926e-05, "loss": 0.001, "step": 12170 }, { "epoch": 2.6455256298870546, "grad_norm": 0.001351698418147862, "learning_rate": 1.694450477845352e-05, "loss": 0.0003, "step": 12180 }, { "epoch": 2.647697654213727, "grad_norm": 6.923243522644043, "learning_rate": 1.6917354474370115e-05, "loss": 0.0254, "step": 12190 }, { "epoch": 2.6498696785403997, "grad_norm": 0.0013446049997583032, "learning_rate": 1.6890204170286708e-05, "loss": 0.0013, "step": 12200 }, { "epoch": 2.652041702867072, "grad_norm": 0.0017462641699239612, "learning_rate": 1.68630538662033e-05, "loss": 0.0004, "step": 12210 }, { "epoch": 2.6542137271937447, "grad_norm": 0.0650092363357544, "learning_rate": 1.6835903562119894e-05, "loss": 0.0002, "step": 12220 }, { "epoch": 2.656385751520417, "grad_norm": 0.0013285756576806307, "learning_rate": 1.680875325803649e-05, "loss": 0.0135, "step": 12230 }, { "epoch": 2.6585577758470897, "grad_norm": 0.001253599999472499, "learning_rate": 1.6781602953953084e-05, "loss": 0.0022, "step": 12240 }, { "epoch": 2.660729800173762, "grad_norm": 0.001297818380407989, "learning_rate": 1.675445264986968e-05, "loss": 0.0004, "step": 12250 }, { "epoch": 2.6629018245004343, "grad_norm": 2.883939266204834, "learning_rate": 1.6727302345786276e-05, "loss": 0.0135, "step": 12260 }, { "epoch": 2.665073848827107, "grad_norm": 0.001262718578800559, "learning_rate": 1.670015204170287e-05, "loss": 0.0003, "step": 12270 }, { "epoch": 2.6672458731537794, "grad_norm": 0.0012341596884652972, "learning_rate": 1.6673001737619462e-05, "loss": 0.0024, "step": 12280 }, { "epoch": 2.669417897480452, "grad_norm": 0.0012327926233410835, "learning_rate": 1.6645851433536055e-05, "loss": 0.039, "step": 12290 }, { "epoch": 2.6715899218071244, "grad_norm": 0.0012394741643220186, "learning_rate": 1.6618701129452652e-05, "loss": 0.0004, "step": 12300 }, { "epoch": 2.673761946133797, "grad_norm": 5.620744705200195, "learning_rate": 1.6591550825369245e-05, "loss": 0.0317, "step": 12310 }, { "epoch": 2.675933970460469, "grad_norm": 1.9395980834960938, "learning_rate": 1.6564400521285838e-05, "loss": 0.0132, "step": 12320 }, { "epoch": 2.6781059947871415, "grad_norm": 0.07889677584171295, "learning_rate": 1.653725021720243e-05, "loss": 0.0091, "step": 12330 }, { "epoch": 2.680278019113814, "grad_norm": 0.20382428169250488, "learning_rate": 1.6510099913119027e-05, "loss": 0.0017, "step": 12340 }, { "epoch": 2.6824500434404865, "grad_norm": 0.03647405281662941, "learning_rate": 1.6482949609035624e-05, "loss": 0.0042, "step": 12350 }, { "epoch": 2.684622067767159, "grad_norm": 0.002080442849546671, "learning_rate": 1.6455799304952217e-05, "loss": 0.0023, "step": 12360 }, { "epoch": 2.6867940920938316, "grad_norm": 0.001212194561958313, "learning_rate": 1.642864900086881e-05, "loss": 0.0004, "step": 12370 }, { "epoch": 2.688966116420504, "grad_norm": 0.0034903050400316715, "learning_rate": 1.6401498696785406e-05, "loss": 0.0497, "step": 12380 }, { "epoch": 2.691138140747176, "grad_norm": 0.004165531136095524, "learning_rate": 1.6374348392702e-05, "loss": 0.0005, "step": 12390 }, { "epoch": 2.6933101650738487, "grad_norm": 0.003915696870535612, "learning_rate": 1.6347198088618592e-05, "loss": 0.0005, "step": 12400 }, { "epoch": 2.695482189400521, "grad_norm": 0.07809191197156906, "learning_rate": 1.6320047784535185e-05, "loss": 0.0027, "step": 12410 }, { "epoch": 2.6976542137271937, "grad_norm": 0.007462525740265846, "learning_rate": 1.629289748045178e-05, "loss": 0.0018, "step": 12420 }, { "epoch": 2.6998262380538662, "grad_norm": 0.02216045930981636, "learning_rate": 1.6265747176368375e-05, "loss": 0.0028, "step": 12430 }, { "epoch": 2.7019982623805388, "grad_norm": 0.02276870794594288, "learning_rate": 1.623859687228497e-05, "loss": 0.0003, "step": 12440 }, { "epoch": 2.7041702867072113, "grad_norm": 0.002578067360445857, "learning_rate": 1.6211446568201567e-05, "loss": 0.0007, "step": 12450 }, { "epoch": 2.7063423110338833, "grad_norm": 0.001441303757019341, "learning_rate": 1.618429626411816e-05, "loss": 0.0186, "step": 12460 }, { "epoch": 2.7085143353605563, "grad_norm": 0.0013689674669876695, "learning_rate": 1.6157145960034753e-05, "loss": 0.0005, "step": 12470 }, { "epoch": 2.7106863596872284, "grad_norm": 0.0017949125031009316, "learning_rate": 1.6129995655951346e-05, "loss": 0.0002, "step": 12480 }, { "epoch": 2.712858384013901, "grad_norm": 0.0020256515126675367, "learning_rate": 1.6102845351867943e-05, "loss": 0.017, "step": 12490 }, { "epoch": 2.7150304083405734, "grad_norm": 0.0013759853318333626, "learning_rate": 1.6075695047784536e-05, "loss": 0.032, "step": 12500 }, { "epoch": 2.717202432667246, "grad_norm": 0.0013345404295250773, "learning_rate": 1.604854474370113e-05, "loss": 0.0009, "step": 12510 }, { "epoch": 2.7193744569939184, "grad_norm": 0.0013303733430802822, "learning_rate": 1.6021394439617722e-05, "loss": 0.0008, "step": 12520 }, { "epoch": 2.7215464813205905, "grad_norm": 0.0013412077678367496, "learning_rate": 1.599424413553432e-05, "loss": 0.0005, "step": 12530 }, { "epoch": 2.7237185056472635, "grad_norm": 0.0020882273092865944, "learning_rate": 1.5967093831450915e-05, "loss": 0.0004, "step": 12540 }, { "epoch": 2.7258905299739355, "grad_norm": 0.27230900526046753, "learning_rate": 1.5939943527367508e-05, "loss": 0.0013, "step": 12550 }, { "epoch": 2.728062554300608, "grad_norm": 0.0014832447050139308, "learning_rate": 1.59127932232841e-05, "loss": 0.0011, "step": 12560 }, { "epoch": 2.7302345786272806, "grad_norm": 0.0016118614003062248, "learning_rate": 1.5885642919200697e-05, "loss": 0.0008, "step": 12570 }, { "epoch": 2.732406602953953, "grad_norm": 0.02013046108186245, "learning_rate": 1.585849261511729e-05, "loss": 0.0127, "step": 12580 }, { "epoch": 2.7345786272806256, "grad_norm": 0.0024779075756669044, "learning_rate": 1.5831342311033883e-05, "loss": 0.0002, "step": 12590 }, { "epoch": 2.736750651607298, "grad_norm": 0.005045650061219931, "learning_rate": 1.5804192006950476e-05, "loss": 0.0002, "step": 12600 }, { "epoch": 2.7389226759339707, "grad_norm": 0.007579619996249676, "learning_rate": 1.5777041702867073e-05, "loss": 0.0003, "step": 12610 }, { "epoch": 2.7410947002606427, "grad_norm": 0.0014458505902439356, "learning_rate": 1.5749891398783666e-05, "loss": 0.0001, "step": 12620 }, { "epoch": 2.7432667245873152, "grad_norm": 0.0013349404325708747, "learning_rate": 1.5722741094700262e-05, "loss": 0.0001, "step": 12630 }, { "epoch": 2.7454387489139878, "grad_norm": 0.002168971812352538, "learning_rate": 1.569559079061686e-05, "loss": 0.0421, "step": 12640 }, { "epoch": 2.7476107732406603, "grad_norm": 0.01694057695567608, "learning_rate": 1.566844048653345e-05, "loss": 0.0023, "step": 12650 }, { "epoch": 2.749782797567333, "grad_norm": 0.004285480361431837, "learning_rate": 1.5641290182450045e-05, "loss": 0.0023, "step": 12660 }, { "epoch": 2.7519548218940053, "grad_norm": 0.00815389771014452, "learning_rate": 1.5614139878366638e-05, "loss": 0.0007, "step": 12670 }, { "epoch": 2.754126846220678, "grad_norm": 0.012979789637029171, "learning_rate": 1.5586989574283234e-05, "loss": 0.004, "step": 12680 }, { "epoch": 2.75629887054735, "grad_norm": 0.009059431962668896, "learning_rate": 1.5559839270199827e-05, "loss": 0.0068, "step": 12690 }, { "epoch": 2.758470894874023, "grad_norm": 0.007445912342518568, "learning_rate": 1.553268896611642e-05, "loss": 0.0046, "step": 12700 }, { "epoch": 2.760642919200695, "grad_norm": 0.0011629179352894425, "learning_rate": 1.5505538662033013e-05, "loss": 0.0298, "step": 12710 }, { "epoch": 2.7628149435273675, "grad_norm": 0.001427669427357614, "learning_rate": 1.547838835794961e-05, "loss": 0.0275, "step": 12720 }, { "epoch": 2.76498696785404, "grad_norm": 0.022871850058436394, "learning_rate": 1.5451238053866206e-05, "loss": 0.0004, "step": 12730 }, { "epoch": 2.7671589921807125, "grad_norm": 0.003048468613997102, "learning_rate": 1.54240877497828e-05, "loss": 0.0006, "step": 12740 }, { "epoch": 2.769331016507385, "grad_norm": 0.0029503628611564636, "learning_rate": 1.5396937445699392e-05, "loss": 0.0006, "step": 12750 }, { "epoch": 2.771503040834057, "grad_norm": 2.5482583045959473, "learning_rate": 1.5369787141615988e-05, "loss": 0.0159, "step": 12760 }, { "epoch": 2.77367506516073, "grad_norm": 0.003224568674340844, "learning_rate": 1.534263683753258e-05, "loss": 0.0105, "step": 12770 }, { "epoch": 2.775847089487402, "grad_norm": 0.03141826391220093, "learning_rate": 1.5315486533449174e-05, "loss": 0.0021, "step": 12780 }, { "epoch": 2.7780191138140746, "grad_norm": 0.010639526881277561, "learning_rate": 1.5288336229365767e-05, "loss": 0.0133, "step": 12790 }, { "epoch": 2.780191138140747, "grad_norm": 0.001194779179058969, "learning_rate": 1.5261185925282364e-05, "loss": 0.0143, "step": 12800 }, { "epoch": 2.7823631624674197, "grad_norm": 0.0038152916822582483, "learning_rate": 1.5234035621198958e-05, "loss": 0.0112, "step": 12810 }, { "epoch": 2.784535186794092, "grad_norm": 0.001374902785755694, "learning_rate": 1.5206885317115551e-05, "loss": 0.0003, "step": 12820 }, { "epoch": 2.7867072111207647, "grad_norm": 0.004674356430768967, "learning_rate": 1.5179735013032148e-05, "loss": 0.0007, "step": 12830 }, { "epoch": 2.788879235447437, "grad_norm": 0.011177301406860352, "learning_rate": 1.5152584708948741e-05, "loss": 0.0006, "step": 12840 }, { "epoch": 2.7910512597741093, "grad_norm": 0.006574731785804033, "learning_rate": 1.5125434404865336e-05, "loss": 0.0002, "step": 12850 }, { "epoch": 2.793223284100782, "grad_norm": 0.0032284893095493317, "learning_rate": 1.5098284100781929e-05, "loss": 0.0045, "step": 12860 }, { "epoch": 2.7953953084274543, "grad_norm": 0.00666830176487565, "learning_rate": 1.5071133796698525e-05, "loss": 0.0006, "step": 12870 }, { "epoch": 2.797567332754127, "grad_norm": 0.006095957476645708, "learning_rate": 1.5043983492615118e-05, "loss": 0.0139, "step": 12880 }, { "epoch": 2.7997393570807994, "grad_norm": 0.0036753893364220858, "learning_rate": 1.5016833188531713e-05, "loss": 0.006, "step": 12890 }, { "epoch": 2.801911381407472, "grad_norm": 0.0015879254788160324, "learning_rate": 1.4989682884448306e-05, "loss": 0.0004, "step": 12900 }, { "epoch": 2.8040834057341444, "grad_norm": 0.004430914297699928, "learning_rate": 1.4962532580364902e-05, "loss": 0.0003, "step": 12910 }, { "epoch": 2.8062554300608165, "grad_norm": 0.001315233763307333, "learning_rate": 1.4935382276281495e-05, "loss": 0.0002, "step": 12920 }, { "epoch": 2.808427454387489, "grad_norm": 0.23426829278469086, "learning_rate": 1.4908231972198088e-05, "loss": 0.0159, "step": 12930 }, { "epoch": 2.8105994787141615, "grad_norm": 0.001154038356617093, "learning_rate": 1.4881081668114683e-05, "loss": 0.0001, "step": 12940 }, { "epoch": 2.812771503040834, "grad_norm": 0.0013015108415856957, "learning_rate": 1.485393136403128e-05, "loss": 0.0004, "step": 12950 }, { "epoch": 2.8149435273675065, "grad_norm": 0.0011772330617532134, "learning_rate": 1.4826781059947872e-05, "loss": 0.0003, "step": 12960 }, { "epoch": 2.817115551694179, "grad_norm": 0.0015416668029502034, "learning_rate": 1.4799630755864465e-05, "loss": 0.0048, "step": 12970 }, { "epoch": 2.8192875760208516, "grad_norm": 0.07705456763505936, "learning_rate": 1.477248045178106e-05, "loss": 0.0054, "step": 12980 }, { "epoch": 2.8214596003475236, "grad_norm": 4.77557897567749, "learning_rate": 1.4745330147697656e-05, "loss": 0.0108, "step": 12990 }, { "epoch": 2.8236316246741966, "grad_norm": 0.0011944122379645705, "learning_rate": 1.471817984361425e-05, "loss": 0.0004, "step": 13000 }, { "epoch": 2.8258036490008687, "grad_norm": 0.004810866434127092, "learning_rate": 1.4691029539530842e-05, "loss": 0.0006, "step": 13010 }, { "epoch": 2.827975673327541, "grad_norm": 0.0011162413284182549, "learning_rate": 1.4663879235447437e-05, "loss": 0.0005, "step": 13020 }, { "epoch": 2.8301476976542137, "grad_norm": 0.0012644167291000485, "learning_rate": 1.4636728931364032e-05, "loss": 0.0001, "step": 13030 }, { "epoch": 2.832319721980886, "grad_norm": 0.050831083208322525, "learning_rate": 1.4609578627280627e-05, "loss": 0.0133, "step": 13040 }, { "epoch": 2.8344917463075587, "grad_norm": 0.0011091434862464666, "learning_rate": 1.458242832319722e-05, "loss": 0.0005, "step": 13050 }, { "epoch": 2.8366637706342313, "grad_norm": 0.0010839985916391015, "learning_rate": 1.4555278019113816e-05, "loss": 0.003, "step": 13060 }, { "epoch": 2.8388357949609038, "grad_norm": 0.0010779522126540542, "learning_rate": 1.4528127715030409e-05, "loss": 0.0284, "step": 13070 }, { "epoch": 2.841007819287576, "grad_norm": 0.0010752989910542965, "learning_rate": 1.4500977410947004e-05, "loss": 0.0001, "step": 13080 }, { "epoch": 2.8431798436142484, "grad_norm": 0.0011290594702586532, "learning_rate": 1.4473827106863597e-05, "loss": 0.0001, "step": 13090 }, { "epoch": 2.845351867940921, "grad_norm": 0.001103981863707304, "learning_rate": 1.4446676802780193e-05, "loss": 0.0002, "step": 13100 }, { "epoch": 2.8475238922675934, "grad_norm": 0.009349314495921135, "learning_rate": 1.4419526498696786e-05, "loss": 0.0002, "step": 13110 }, { "epoch": 2.849695916594266, "grad_norm": 0.0011064874706789851, "learning_rate": 1.439237619461338e-05, "loss": 0.0273, "step": 13120 }, { "epoch": 2.8518679409209384, "grad_norm": 0.001436484744772315, "learning_rate": 1.4365225890529974e-05, "loss": 0.0067, "step": 13130 }, { "epoch": 2.854039965247611, "grad_norm": 0.001617782050743699, "learning_rate": 1.433807558644657e-05, "loss": 0.0096, "step": 13140 }, { "epoch": 2.856211989574283, "grad_norm": 0.0010622652480378747, "learning_rate": 1.4310925282363163e-05, "loss": 0.0012, "step": 13150 }, { "epoch": 2.8583840139009555, "grad_norm": 0.001210720743983984, "learning_rate": 1.4283774978279756e-05, "loss": 0.0003, "step": 13160 }, { "epoch": 2.860556038227628, "grad_norm": 0.001125740702264011, "learning_rate": 1.4256624674196351e-05, "loss": 0.0006, "step": 13170 }, { "epoch": 2.8627280625543006, "grad_norm": 2.121816396713257, "learning_rate": 1.4229474370112947e-05, "loss": 0.0072, "step": 13180 }, { "epoch": 2.864900086880973, "grad_norm": 0.001073643914423883, "learning_rate": 1.420232406602954e-05, "loss": 0.0774, "step": 13190 }, { "epoch": 2.8670721112076456, "grad_norm": 0.001115081482566893, "learning_rate": 1.4175173761946134e-05, "loss": 0.0003, "step": 13200 }, { "epoch": 2.869244135534318, "grad_norm": 1.6094557046890259, "learning_rate": 1.4148023457862728e-05, "loss": 0.0354, "step": 13210 }, { "epoch": 2.87141615986099, "grad_norm": 0.0011969703482463956, "learning_rate": 1.4120873153779323e-05, "loss": 0.001, "step": 13220 }, { "epoch": 2.873588184187663, "grad_norm": 0.0011109106708317995, "learning_rate": 1.4093722849695918e-05, "loss": 0.0088, "step": 13230 }, { "epoch": 2.8757602085143352, "grad_norm": 0.01138853095471859, "learning_rate": 1.406657254561251e-05, "loss": 0.0008, "step": 13240 }, { "epoch": 2.8779322328410077, "grad_norm": 0.001634183689020574, "learning_rate": 1.4039422241529107e-05, "loss": 0.005, "step": 13250 }, { "epoch": 2.8801042571676803, "grad_norm": 0.003161991247907281, "learning_rate": 1.40122719374457e-05, "loss": 0.0153, "step": 13260 }, { "epoch": 2.8822762814943528, "grad_norm": 0.001149641931988299, "learning_rate": 1.3985121633362295e-05, "loss": 0.0003, "step": 13270 }, { "epoch": 2.8844483058210253, "grad_norm": 0.001772301853634417, "learning_rate": 1.3957971329278888e-05, "loss": 0.065, "step": 13280 }, { "epoch": 2.886620330147698, "grad_norm": 0.004322749096900225, "learning_rate": 1.3930821025195484e-05, "loss": 0.0025, "step": 13290 }, { "epoch": 2.8887923544743703, "grad_norm": 0.009627276100218296, "learning_rate": 1.3903670721112077e-05, "loss": 0.0009, "step": 13300 }, { "epoch": 2.8909643788010424, "grad_norm": 0.0011137340916320682, "learning_rate": 1.3876520417028672e-05, "loss": 0.0003, "step": 13310 }, { "epoch": 2.893136403127715, "grad_norm": 5.180811405181885, "learning_rate": 1.3849370112945265e-05, "loss": 0.0053, "step": 13320 }, { "epoch": 2.8953084274543874, "grad_norm": 0.0012574810534715652, "learning_rate": 1.3822219808861861e-05, "loss": 0.0007, "step": 13330 }, { "epoch": 2.89748045178106, "grad_norm": 0.11162279546260834, "learning_rate": 1.3795069504778454e-05, "loss": 0.0017, "step": 13340 }, { "epoch": 2.8996524761077325, "grad_norm": 0.0011239303275942802, "learning_rate": 1.3767919200695047e-05, "loss": 0.0002, "step": 13350 }, { "epoch": 2.901824500434405, "grad_norm": 0.0021542648319154978, "learning_rate": 1.3740768896611642e-05, "loss": 0.0287, "step": 13360 }, { "epoch": 2.9039965247610775, "grad_norm": 0.0015336337964981794, "learning_rate": 1.3713618592528239e-05, "loss": 0.0005, "step": 13370 }, { "epoch": 2.9061685490877496, "grad_norm": 0.28698936104774475, "learning_rate": 1.3686468288444832e-05, "loss": 0.036, "step": 13380 }, { "epoch": 2.908340573414422, "grad_norm": 0.0016295438399538398, "learning_rate": 1.3659317984361425e-05, "loss": 0.0305, "step": 13390 }, { "epoch": 2.9105125977410946, "grad_norm": 0.0020589695777744055, "learning_rate": 1.363216768027802e-05, "loss": 0.0005, "step": 13400 }, { "epoch": 2.912684622067767, "grad_norm": 0.0016789559740573168, "learning_rate": 1.3605017376194614e-05, "loss": 0.0033, "step": 13410 }, { "epoch": 2.9148566463944396, "grad_norm": 0.10769294947385788, "learning_rate": 1.3577867072111209e-05, "loss": 0.0008, "step": 13420 }, { "epoch": 2.917028670721112, "grad_norm": 0.005054687615483999, "learning_rate": 1.3550716768027802e-05, "loss": 0.0012, "step": 13430 }, { "epoch": 2.9192006950477847, "grad_norm": 1.0613359212875366, "learning_rate": 1.3523566463944398e-05, "loss": 0.0084, "step": 13440 }, { "epoch": 2.9213727193744568, "grad_norm": 0.0010838696034625173, "learning_rate": 1.3496416159860991e-05, "loss": 0.0002, "step": 13450 }, { "epoch": 2.9235447437011297, "grad_norm": 0.0011025476269423962, "learning_rate": 1.3469265855777586e-05, "loss": 0.0008, "step": 13460 }, { "epoch": 2.925716768027802, "grad_norm": 0.001397005165927112, "learning_rate": 1.3442115551694179e-05, "loss": 0.0004, "step": 13470 }, { "epoch": 2.9278887923544743, "grad_norm": 0.027524210512638092, "learning_rate": 1.3414965247610775e-05, "loss": 0.0003, "step": 13480 }, { "epoch": 2.930060816681147, "grad_norm": 0.0011223404435440898, "learning_rate": 1.3387814943527368e-05, "loss": 0.0004, "step": 13490 }, { "epoch": 2.9322328410078193, "grad_norm": 0.0010830480605363846, "learning_rate": 1.3360664639443963e-05, "loss": 0.0002, "step": 13500 }, { "epoch": 2.934404865334492, "grad_norm": 0.0011122282594442368, "learning_rate": 1.3333514335360556e-05, "loss": 0.0002, "step": 13510 }, { "epoch": 2.9365768896611644, "grad_norm": 0.0010628863237798214, "learning_rate": 1.3306364031277152e-05, "loss": 0.0002, "step": 13520 }, { "epoch": 2.938748913987837, "grad_norm": 0.0013200805988162756, "learning_rate": 1.3279213727193745e-05, "loss": 0.0518, "step": 13530 }, { "epoch": 2.940920938314509, "grad_norm": 0.0017253914847970009, "learning_rate": 1.3252063423110338e-05, "loss": 0.0002, "step": 13540 }, { "epoch": 2.9430929626411815, "grad_norm": 0.007514182478189468, "learning_rate": 1.3224913119026933e-05, "loss": 0.0004, "step": 13550 }, { "epoch": 2.945264986967854, "grad_norm": 0.038544125854969025, "learning_rate": 1.319776281494353e-05, "loss": 0.0003, "step": 13560 }, { "epoch": 2.9474370112945265, "grad_norm": 0.01065383106470108, "learning_rate": 1.3170612510860123e-05, "loss": 0.0232, "step": 13570 }, { "epoch": 2.949609035621199, "grad_norm": 0.027386289089918137, "learning_rate": 1.3143462206776716e-05, "loss": 0.0072, "step": 13580 }, { "epoch": 2.9517810599478715, "grad_norm": 0.00448612542822957, "learning_rate": 1.311631190269331e-05, "loss": 0.0043, "step": 13590 }, { "epoch": 2.953953084274544, "grad_norm": 0.004412383772432804, "learning_rate": 1.3089161598609907e-05, "loss": 0.0184, "step": 13600 }, { "epoch": 2.956125108601216, "grad_norm": 0.012439540587365627, "learning_rate": 1.30620112945265e-05, "loss": 0.0044, "step": 13610 }, { "epoch": 2.9582971329278887, "grad_norm": 0.008847391232848167, "learning_rate": 1.3034860990443093e-05, "loss": 0.0004, "step": 13620 }, { "epoch": 2.960469157254561, "grad_norm": 0.0029101003892719746, "learning_rate": 1.3007710686359686e-05, "loss": 0.0009, "step": 13630 }, { "epoch": 2.9626411815812337, "grad_norm": 0.0015134647255763412, "learning_rate": 1.2980560382276282e-05, "loss": 0.0006, "step": 13640 }, { "epoch": 2.964813205907906, "grad_norm": 0.0011704802745953202, "learning_rate": 1.2953410078192877e-05, "loss": 0.0002, "step": 13650 }, { "epoch": 2.9669852302345787, "grad_norm": 0.0011589336208999157, "learning_rate": 1.292625977410947e-05, "loss": 0.0003, "step": 13660 }, { "epoch": 2.9691572545612512, "grad_norm": 0.002994675189256668, "learning_rate": 1.2899109470026066e-05, "loss": 0.0002, "step": 13670 }, { "epoch": 2.9713292788879233, "grad_norm": 0.0010929458076134324, "learning_rate": 1.287195916594266e-05, "loss": 0.0003, "step": 13680 }, { "epoch": 2.9735013032145963, "grad_norm": 0.0018180428305640817, "learning_rate": 1.2844808861859254e-05, "loss": 0.0002, "step": 13690 }, { "epoch": 2.9756733275412683, "grad_norm": 0.0012238157214596868, "learning_rate": 1.2817658557775847e-05, "loss": 0.0002, "step": 13700 }, { "epoch": 2.977845351867941, "grad_norm": 0.0010512187145650387, "learning_rate": 1.2790508253692443e-05, "loss": 0.0002, "step": 13710 }, { "epoch": 2.9800173761946134, "grad_norm": 0.0022942163050174713, "learning_rate": 1.2763357949609036e-05, "loss": 0.0006, "step": 13720 }, { "epoch": 2.982189400521286, "grad_norm": 0.0010182593250647187, "learning_rate": 1.273620764552563e-05, "loss": 0.0334, "step": 13730 }, { "epoch": 2.9843614248479584, "grad_norm": 0.0010593519546091557, "learning_rate": 1.2709057341442224e-05, "loss": 0.0001, "step": 13740 }, { "epoch": 2.9865334491746305, "grad_norm": 0.001036479021422565, "learning_rate": 1.268190703735882e-05, "loss": 0.0007, "step": 13750 }, { "epoch": 2.9887054735013034, "grad_norm": 0.014912799000740051, "learning_rate": 1.2654756733275414e-05, "loss": 0.0003, "step": 13760 }, { "epoch": 2.9908774978279755, "grad_norm": 0.0010219586547464132, "learning_rate": 1.2627606429192007e-05, "loss": 0.0007, "step": 13770 }, { "epoch": 2.993049522154648, "grad_norm": 0.001014968380331993, "learning_rate": 1.2600456125108601e-05, "loss": 0.0153, "step": 13780 }, { "epoch": 2.9952215464813206, "grad_norm": 0.001060599577613175, "learning_rate": 1.2573305821025198e-05, "loss": 0.0003, "step": 13790 }, { "epoch": 2.997393570807993, "grad_norm": 0.005443928763270378, "learning_rate": 1.254615551694179e-05, "loss": 0.0002, "step": 13800 }, { "epoch": 2.9995655951346656, "grad_norm": 0.0010318290442228317, "learning_rate": 1.2519005212858384e-05, "loss": 0.0006, "step": 13810 }, { "epoch": 3.0, "eval_f1": 0.6779661016949152, "eval_loss": 0.04916631802916527, "eval_runtime": 82.7215, "eval_samples_per_second": 120.585, "eval_steps_per_second": 7.543, "step": 13812 }, { "epoch": 3.001737619461338, "grad_norm": 0.0010593549814075232, "learning_rate": 1.2491854908774979e-05, "loss": 0.0002, "step": 13820 }, { "epoch": 3.0039096437880106, "grad_norm": 0.06401921808719635, "learning_rate": 1.2464704604691573e-05, "loss": 0.0006, "step": 13830 }, { "epoch": 3.0060816681146827, "grad_norm": 0.0010438418248668313, "learning_rate": 1.2437554300608168e-05, "loss": 0.0004, "step": 13840 }, { "epoch": 3.008253692441355, "grad_norm": 0.003679616143926978, "learning_rate": 1.2410403996524763e-05, "loss": 0.0003, "step": 13850 }, { "epoch": 3.0104257167680277, "grad_norm": 0.044540684670209885, "learning_rate": 1.2383253692441356e-05, "loss": 0.0008, "step": 13860 }, { "epoch": 3.0125977410947002, "grad_norm": 0.002288431627675891, "learning_rate": 1.235610338835795e-05, "loss": 0.0001, "step": 13870 }, { "epoch": 3.0147697654213728, "grad_norm": 0.0010103103704750538, "learning_rate": 1.2328953084274545e-05, "loss": 0.0001, "step": 13880 }, { "epoch": 3.0169417897480453, "grad_norm": 0.001005678903311491, "learning_rate": 1.230180278019114e-05, "loss": 0.0003, "step": 13890 }, { "epoch": 3.019113814074718, "grad_norm": 0.000996905262582004, "learning_rate": 1.2274652476107733e-05, "loss": 0.0005, "step": 13900 }, { "epoch": 3.0212858384013903, "grad_norm": 0.0010406819637864828, "learning_rate": 1.2247502172024328e-05, "loss": 0.0005, "step": 13910 }, { "epoch": 3.0234578627280624, "grad_norm": 0.0011022110702469945, "learning_rate": 1.222035186794092e-05, "loss": 0.0003, "step": 13920 }, { "epoch": 3.025629887054735, "grad_norm": 0.0009952927939593792, "learning_rate": 1.2193201563857517e-05, "loss": 0.0003, "step": 13930 }, { "epoch": 3.0278019113814074, "grad_norm": 0.007508167065680027, "learning_rate": 1.216605125977411e-05, "loss": 0.0001, "step": 13940 }, { "epoch": 3.02997393570808, "grad_norm": 0.0009940717136487365, "learning_rate": 1.2138900955690705e-05, "loss": 0.0002, "step": 13950 }, { "epoch": 3.0321459600347525, "grad_norm": 0.009310873225331306, "learning_rate": 1.2111750651607298e-05, "loss": 0.0004, "step": 13960 }, { "epoch": 3.034317984361425, "grad_norm": 0.0009831355419009924, "learning_rate": 1.2084600347523892e-05, "loss": 0.0002, "step": 13970 }, { "epoch": 3.0364900086880975, "grad_norm": 0.001007686834782362, "learning_rate": 1.2057450043440487e-05, "loss": 0.0002, "step": 13980 }, { "epoch": 3.0386620330147696, "grad_norm": 0.0009837389225140214, "learning_rate": 1.2030299739357082e-05, "loss": 0.0001, "step": 13990 }, { "epoch": 3.040834057341442, "grad_norm": 0.0009638213668949902, "learning_rate": 1.2003149435273675e-05, "loss": 0.0001, "step": 14000 }, { "epoch": 3.0430060816681146, "grad_norm": 0.0009538141894154251, "learning_rate": 1.197599913119027e-05, "loss": 0.0005, "step": 14010 }, { "epoch": 3.045178105994787, "grad_norm": 0.0009638393530622125, "learning_rate": 1.1948848827106864e-05, "loss": 0.0002, "step": 14020 }, { "epoch": 3.0473501303214596, "grad_norm": 0.00662460969761014, "learning_rate": 1.1921698523023459e-05, "loss": 0.0003, "step": 14030 }, { "epoch": 3.049522154648132, "grad_norm": 0.0009571721311658621, "learning_rate": 1.1894548218940054e-05, "loss": 0.0002, "step": 14040 }, { "epoch": 3.0516941789748047, "grad_norm": 0.0009538009180687368, "learning_rate": 1.1867397914856647e-05, "loss": 0.0001, "step": 14050 }, { "epoch": 3.053866203301477, "grad_norm": 0.00587887316942215, "learning_rate": 1.1840247610773241e-05, "loss": 0.0003, "step": 14060 }, { "epoch": 3.0560382276281493, "grad_norm": 0.00492137623950839, "learning_rate": 1.1813097306689836e-05, "loss": 0.0003, "step": 14070 }, { "epoch": 3.0582102519548218, "grad_norm": 0.0009409641497768462, "learning_rate": 1.178594700260643e-05, "loss": 0.0002, "step": 14080 }, { "epoch": 3.0603822762814943, "grad_norm": 0.0009386486490257084, "learning_rate": 1.1758796698523024e-05, "loss": 0.0003, "step": 14090 }, { "epoch": 3.062554300608167, "grad_norm": 0.005429286975413561, "learning_rate": 1.1731646394439619e-05, "loss": 0.0002, "step": 14100 }, { "epoch": 3.0647263249348393, "grad_norm": 0.005310894921422005, "learning_rate": 1.1704496090356212e-05, "loss": 0.0002, "step": 14110 }, { "epoch": 3.066898349261512, "grad_norm": 0.38952526450157166, "learning_rate": 1.1677345786272808e-05, "loss": 0.0008, "step": 14120 }, { "epoch": 3.0690703735881844, "grad_norm": 0.0009406186873093247, "learning_rate": 1.1650195482189401e-05, "loss": 0.0001, "step": 14130 }, { "epoch": 3.0712423979148564, "grad_norm": 0.01129077933728695, "learning_rate": 1.1623045178105996e-05, "loss": 0.0023, "step": 14140 }, { "epoch": 3.073414422241529, "grad_norm": 0.0024124332703649998, "learning_rate": 1.1595894874022589e-05, "loss": 0.0002, "step": 14150 }, { "epoch": 3.0755864465682015, "grad_norm": 0.006113865878432989, "learning_rate": 1.1568744569939183e-05, "loss": 0.0004, "step": 14160 }, { "epoch": 3.077758470894874, "grad_norm": 0.0014659430598840117, "learning_rate": 1.1541594265855778e-05, "loss": 0.0002, "step": 14170 }, { "epoch": 3.0799304952215465, "grad_norm": 0.004594105761498213, "learning_rate": 1.1514443961772373e-05, "loss": 0.0003, "step": 14180 }, { "epoch": 3.082102519548219, "grad_norm": 0.0009500061278231442, "learning_rate": 1.1487293657688966e-05, "loss": 0.025, "step": 14190 }, { "epoch": 3.0842745438748915, "grad_norm": 0.0009136287262663245, "learning_rate": 1.146014335360556e-05, "loss": 0.0218, "step": 14200 }, { "epoch": 3.086446568201564, "grad_norm": 0.01176744606345892, "learning_rate": 1.1432993049522155e-05, "loss": 0.0002, "step": 14210 }, { "epoch": 3.088618592528236, "grad_norm": 0.0009232366573996842, "learning_rate": 1.140584274543875e-05, "loss": 0.026, "step": 14220 }, { "epoch": 3.0907906168549086, "grad_norm": 0.0009313057526014745, "learning_rate": 1.1378692441355343e-05, "loss": 0.0401, "step": 14230 }, { "epoch": 3.092962641181581, "grad_norm": 0.0009547146037220955, "learning_rate": 1.1351542137271938e-05, "loss": 0.0001, "step": 14240 }, { "epoch": 3.0951346655082537, "grad_norm": 0.0010204818099737167, "learning_rate": 1.1324391833188532e-05, "loss": 0.001, "step": 14250 }, { "epoch": 3.097306689834926, "grad_norm": 0.0015138749731704593, "learning_rate": 1.1297241529105127e-05, "loss": 0.0002, "step": 14260 }, { "epoch": 3.0994787141615987, "grad_norm": 0.0009715591440908611, "learning_rate": 1.1270091225021722e-05, "loss": 0.0002, "step": 14270 }, { "epoch": 3.101650738488271, "grad_norm": 0.001034111832268536, "learning_rate": 1.1242940920938315e-05, "loss": 0.0002, "step": 14280 }, { "epoch": 3.1038227628149437, "grad_norm": 0.09780330210924149, "learning_rate": 1.121579061685491e-05, "loss": 0.0008, "step": 14290 }, { "epoch": 3.105994787141616, "grad_norm": 0.012254135683178902, "learning_rate": 1.1188640312771503e-05, "loss": 0.0002, "step": 14300 }, { "epoch": 3.1081668114682883, "grad_norm": 0.0010856038425117731, "learning_rate": 1.1161490008688099e-05, "loss": 0.0002, "step": 14310 }, { "epoch": 3.110338835794961, "grad_norm": 0.0009265311527997255, "learning_rate": 1.1134339704604692e-05, "loss": 0.0003, "step": 14320 }, { "epoch": 3.1125108601216334, "grad_norm": 0.0013762598391622305, "learning_rate": 1.1107189400521287e-05, "loss": 0.0003, "step": 14330 }, { "epoch": 3.114682884448306, "grad_norm": 0.0012818133691325784, "learning_rate": 1.108003909643788e-05, "loss": 0.0037, "step": 14340 }, { "epoch": 3.1168549087749784, "grad_norm": 0.000965480983722955, "learning_rate": 1.1052888792354474e-05, "loss": 0.0198, "step": 14350 }, { "epoch": 3.119026933101651, "grad_norm": 0.0009158066241070628, "learning_rate": 1.102573848827107e-05, "loss": 0.013, "step": 14360 }, { "epoch": 3.121198957428323, "grad_norm": 0.005339405033737421, "learning_rate": 1.0998588184187664e-05, "loss": 0.0004, "step": 14370 }, { "epoch": 3.1233709817549955, "grad_norm": 0.000903356762137264, "learning_rate": 1.0971437880104257e-05, "loss": 0.0002, "step": 14380 }, { "epoch": 3.125543006081668, "grad_norm": 0.0009056212729774415, "learning_rate": 1.0944287576020852e-05, "loss": 0.0094, "step": 14390 }, { "epoch": 3.1277150304083405, "grad_norm": 0.004547704942524433, "learning_rate": 1.0917137271937446e-05, "loss": 0.0001, "step": 14400 }, { "epoch": 3.129887054735013, "grad_norm": 0.006993583869189024, "learning_rate": 1.0889986967854041e-05, "loss": 0.0014, "step": 14410 }, { "epoch": 3.1320590790616856, "grad_norm": 0.0009027125779539347, "learning_rate": 1.0862836663770634e-05, "loss": 0.0055, "step": 14420 }, { "epoch": 3.134231103388358, "grad_norm": 2.2096879482269287, "learning_rate": 1.0835686359687229e-05, "loss": 0.0412, "step": 14430 }, { "epoch": 3.1364031277150306, "grad_norm": 0.0009054460097104311, "learning_rate": 1.0808536055603822e-05, "loss": 0.0001, "step": 14440 }, { "epoch": 3.1385751520417027, "grad_norm": 0.0009118215530179441, "learning_rate": 1.0781385751520418e-05, "loss": 0.0004, "step": 14450 }, { "epoch": 3.140747176368375, "grad_norm": 0.03558327257633209, "learning_rate": 1.0754235447437013e-05, "loss": 0.0004, "step": 14460 }, { "epoch": 3.1429192006950477, "grad_norm": 0.0009161134366877377, "learning_rate": 1.0727085143353606e-05, "loss": 0.0002, "step": 14470 }, { "epoch": 3.1450912250217202, "grad_norm": 0.0009112968109548092, "learning_rate": 1.06999348392702e-05, "loss": 0.0001, "step": 14480 }, { "epoch": 3.1472632493483927, "grad_norm": 0.26400279998779297, "learning_rate": 1.0672784535186794e-05, "loss": 0.0054, "step": 14490 }, { "epoch": 3.1494352736750653, "grad_norm": 0.0011174866231158376, "learning_rate": 1.064563423110339e-05, "loss": 0.0062, "step": 14500 }, { "epoch": 3.151607298001738, "grad_norm": 0.0009248264250345528, "learning_rate": 1.0618483927019983e-05, "loss": 0.0044, "step": 14510 }, { "epoch": 3.1537793223284103, "grad_norm": 0.00093687983462587, "learning_rate": 1.0591333622936578e-05, "loss": 0.0021, "step": 14520 }, { "epoch": 3.1559513466550824, "grad_norm": 0.0012292963219806552, "learning_rate": 1.056418331885317e-05, "loss": 0.0016, "step": 14530 }, { "epoch": 3.158123370981755, "grad_norm": 0.1130121722817421, "learning_rate": 1.0537033014769766e-05, "loss": 0.0007, "step": 14540 }, { "epoch": 3.1602953953084274, "grad_norm": 0.001622872776351869, "learning_rate": 1.0512597741094701e-05, "loss": 0.0102, "step": 14550 }, { "epoch": 3.1624674196351, "grad_norm": 0.01409598533064127, "learning_rate": 1.0485447437011296e-05, "loss": 0.0116, "step": 14560 }, { "epoch": 3.1646394439617724, "grad_norm": 0.000908426649402827, "learning_rate": 1.045829713292789e-05, "loss": 0.0182, "step": 14570 }, { "epoch": 3.166811468288445, "grad_norm": 0.0009166007512249053, "learning_rate": 1.0431146828844483e-05, "loss": 0.0003, "step": 14580 }, { "epoch": 3.1689834926151175, "grad_norm": 0.0008866732241585851, "learning_rate": 1.0403996524761078e-05, "loss": 0.0005, "step": 14590 }, { "epoch": 3.1711555169417895, "grad_norm": 0.0011180889559909701, "learning_rate": 1.0376846220677671e-05, "loss": 0.0127, "step": 14600 }, { "epoch": 3.173327541268462, "grad_norm": 0.011834468692541122, "learning_rate": 1.0349695916594268e-05, "loss": 0.0003, "step": 14610 }, { "epoch": 3.1754995655951346, "grad_norm": 0.0009702285169623792, "learning_rate": 1.032254561251086e-05, "loss": 0.0097, "step": 14620 }, { "epoch": 3.177671589921807, "grad_norm": 0.000917663041036576, "learning_rate": 1.0295395308427455e-05, "loss": 0.0001, "step": 14630 }, { "epoch": 3.1798436142484796, "grad_norm": 0.0103968670591712, "learning_rate": 1.0268245004344048e-05, "loss": 0.0003, "step": 14640 }, { "epoch": 3.182015638575152, "grad_norm": 0.0010382416658103466, "learning_rate": 1.0241094700260643e-05, "loss": 0.0019, "step": 14650 }, { "epoch": 3.1841876629018246, "grad_norm": 0.009599311277270317, "learning_rate": 1.0213944396177238e-05, "loss": 0.0009, "step": 14660 }, { "epoch": 3.186359687228497, "grad_norm": 0.0008943171706050634, "learning_rate": 1.0186794092093832e-05, "loss": 0.0015, "step": 14670 }, { "epoch": 3.1885317115551692, "grad_norm": 0.0009029650827869773, "learning_rate": 1.0159643788010425e-05, "loss": 0.0002, "step": 14680 }, { "epoch": 3.1907037358818418, "grad_norm": 0.0009432806400582194, "learning_rate": 1.013249348392702e-05, "loss": 0.0002, "step": 14690 }, { "epoch": 3.1928757602085143, "grad_norm": 0.00372710800729692, "learning_rate": 1.0105343179843615e-05, "loss": 0.0005, "step": 14700 }, { "epoch": 3.195047784535187, "grad_norm": 0.0008843187242746353, "learning_rate": 1.007819287576021e-05, "loss": 0.0003, "step": 14710 }, { "epoch": 3.1972198088618593, "grad_norm": 0.0008964376756921411, "learning_rate": 1.0051042571676804e-05, "loss": 0.0002, "step": 14720 }, { "epoch": 3.199391833188532, "grad_norm": 0.0008904538117349148, "learning_rate": 1.0023892267593397e-05, "loss": 0.0002, "step": 14730 }, { "epoch": 3.2015638575152043, "grad_norm": 0.0008782143704593182, "learning_rate": 9.996741963509992e-06, "loss": 0.0073, "step": 14740 }, { "epoch": 3.203735881841877, "grad_norm": 0.0008750375709496439, "learning_rate": 9.969591659426587e-06, "loss": 0.0002, "step": 14750 }, { "epoch": 3.205907906168549, "grad_norm": 8.284111022949219, "learning_rate": 9.942441355343181e-06, "loss": 0.002, "step": 14760 }, { "epoch": 3.2080799304952214, "grad_norm": 0.001080102170817554, "learning_rate": 9.915291051259774e-06, "loss": 0.0001, "step": 14770 }, { "epoch": 3.210251954821894, "grad_norm": 0.0008838811190798879, "learning_rate": 9.888140747176369e-06, "loss": 0.0002, "step": 14780 }, { "epoch": 3.2124239791485665, "grad_norm": 0.0008833123138174415, "learning_rate": 9.860990443092962e-06, "loss": 0.0003, "step": 14790 }, { "epoch": 3.214596003475239, "grad_norm": 0.01015100721269846, "learning_rate": 9.833840139009559e-06, "loss": 0.0003, "step": 14800 }, { "epoch": 3.2167680278019115, "grad_norm": 0.0013419504975900054, "learning_rate": 9.806689834926152e-06, "loss": 0.0003, "step": 14810 }, { "epoch": 3.218940052128584, "grad_norm": 0.0012950883246958256, "learning_rate": 9.779539530842746e-06, "loss": 0.0003, "step": 14820 }, { "epoch": 3.221112076455256, "grad_norm": 0.0009361078846268356, "learning_rate": 9.75238922675934e-06, "loss": 0.0001, "step": 14830 }, { "epoch": 3.2232841007819286, "grad_norm": 0.0008646132191643119, "learning_rate": 9.725238922675934e-06, "loss": 0.0002, "step": 14840 }, { "epoch": 3.225456125108601, "grad_norm": 0.0008529993938282132, "learning_rate": 9.698088618592529e-06, "loss": 0.0018, "step": 14850 }, { "epoch": 3.2276281494352737, "grad_norm": 0.000858749495819211, "learning_rate": 9.670938314509123e-06, "loss": 0.0001, "step": 14860 }, { "epoch": 3.229800173761946, "grad_norm": 0.0008768062107264996, "learning_rate": 9.643788010425716e-06, "loss": 0.0002, "step": 14870 }, { "epoch": 3.2319721980886187, "grad_norm": 0.0008445355342701077, "learning_rate": 9.616637706342311e-06, "loss": 0.0002, "step": 14880 }, { "epoch": 3.234144222415291, "grad_norm": 0.0014718384481966496, "learning_rate": 9.589487402258906e-06, "loss": 0.0033, "step": 14890 }, { "epoch": 3.2363162467419633, "grad_norm": 0.0008432904141955078, "learning_rate": 9.5623370981755e-06, "loss": 0.0002, "step": 14900 }, { "epoch": 3.238488271068636, "grad_norm": 0.0008551370119675994, "learning_rate": 9.535186794092095e-06, "loss": 0.0137, "step": 14910 }, { "epoch": 3.2406602953953083, "grad_norm": 0.0009141720947809517, "learning_rate": 9.508036490008688e-06, "loss": 0.0338, "step": 14920 }, { "epoch": 3.242832319721981, "grad_norm": 0.0008974664960987866, "learning_rate": 9.480886185925283e-06, "loss": 0.0002, "step": 14930 }, { "epoch": 3.2450043440486533, "grad_norm": 1.5477900505065918, "learning_rate": 9.453735881841878e-06, "loss": 0.018, "step": 14940 }, { "epoch": 3.247176368375326, "grad_norm": 0.0009042550809681416, "learning_rate": 9.426585577758472e-06, "loss": 0.0003, "step": 14950 }, { "epoch": 3.2493483927019984, "grad_norm": 0.22629916667938232, "learning_rate": 9.399435273675065e-06, "loss": 0.0014, "step": 14960 }, { "epoch": 3.251520417028671, "grad_norm": 0.0008919961983337998, "learning_rate": 9.37228496959166e-06, "loss": 0.0005, "step": 14970 }, { "epoch": 3.2536924413553434, "grad_norm": 0.0008801415096968412, "learning_rate": 9.345134665508253e-06, "loss": 0.0022, "step": 14980 }, { "epoch": 3.2558644656820155, "grad_norm": 0.24160538613796234, "learning_rate": 9.31798436142485e-06, "loss": 0.0119, "step": 14990 }, { "epoch": 3.258036490008688, "grad_norm": 0.019302744418382645, "learning_rate": 9.290834057341443e-06, "loss": 0.0008, "step": 15000 }, { "epoch": 3.2602085143353605, "grad_norm": 0.0008701402693986893, "learning_rate": 9.263683753258037e-06, "loss": 0.0055, "step": 15010 }, { "epoch": 3.262380538662033, "grad_norm": 0.0008556676330044866, "learning_rate": 9.23653344917463e-06, "loss": 0.0007, "step": 15020 }, { "epoch": 3.2645525629887056, "grad_norm": 0.0008663336047902703, "learning_rate": 9.209383145091225e-06, "loss": 0.0006, "step": 15030 }, { "epoch": 3.266724587315378, "grad_norm": 0.1436619907617569, "learning_rate": 9.18223284100782e-06, "loss": 0.0038, "step": 15040 }, { "epoch": 3.2688966116420506, "grad_norm": 0.0009035149705596268, "learning_rate": 9.155082536924414e-06, "loss": 0.0002, "step": 15050 }, { "epoch": 3.2710686359687227, "grad_norm": 0.0008320367196574807, "learning_rate": 9.127932232841007e-06, "loss": 0.0001, "step": 15060 }, { "epoch": 3.273240660295395, "grad_norm": 0.0010443541686981916, "learning_rate": 9.100781928757602e-06, "loss": 0.0002, "step": 15070 }, { "epoch": 3.2754126846220677, "grad_norm": 0.0015021273866295815, "learning_rate": 9.073631624674197e-06, "loss": 0.0002, "step": 15080 }, { "epoch": 3.27758470894874, "grad_norm": 0.0009816536912694573, "learning_rate": 9.046481320590792e-06, "loss": 0.0001, "step": 15090 }, { "epoch": 3.2797567332754127, "grad_norm": 0.0013045056257396936, "learning_rate": 9.019331016507385e-06, "loss": 0.0107, "step": 15100 }, { "epoch": 3.2819287576020852, "grad_norm": 0.002013902645558119, "learning_rate": 8.99218071242398e-06, "loss": 0.0004, "step": 15110 }, { "epoch": 3.2841007819287578, "grad_norm": 1.3247151374816895, "learning_rate": 8.965030408340574e-06, "loss": 0.0074, "step": 15120 }, { "epoch": 3.28627280625543, "grad_norm": 0.0020623058080673218, "learning_rate": 8.937880104257169e-06, "loss": 0.0002, "step": 15130 }, { "epoch": 3.2884448305821024, "grad_norm": 0.003490692237392068, "learning_rate": 8.910729800173763e-06, "loss": 0.0003, "step": 15140 }, { "epoch": 3.290616854908775, "grad_norm": 0.0008333229343406856, "learning_rate": 8.883579496090357e-06, "loss": 0.0001, "step": 15150 }, { "epoch": 3.2927888792354474, "grad_norm": 0.0008364542154595256, "learning_rate": 8.856429192006951e-06, "loss": 0.0028, "step": 15160 }, { "epoch": 3.29496090356212, "grad_norm": 0.005969099700450897, "learning_rate": 8.829278887923544e-06, "loss": 0.0058, "step": 15170 }, { "epoch": 3.2971329278887924, "grad_norm": 0.0008359033381566405, "learning_rate": 8.80212858384014e-06, "loss": 0.0001, "step": 15180 }, { "epoch": 3.299304952215465, "grad_norm": 0.004270035773515701, "learning_rate": 8.774978279756734e-06, "loss": 0.0001, "step": 15190 }, { "epoch": 3.3014769765421375, "grad_norm": 0.0008729678229428828, "learning_rate": 8.747827975673328e-06, "loss": 0.0003, "step": 15200 }, { "epoch": 3.30364900086881, "grad_norm": 5.316149711608887, "learning_rate": 8.720677671589921e-06, "loss": 0.0102, "step": 15210 }, { "epoch": 3.305821025195482, "grad_norm": 0.0008195647387765348, "learning_rate": 8.693527367506516e-06, "loss": 0.0003, "step": 15220 }, { "epoch": 3.3079930495221546, "grad_norm": 0.0008153934613801539, "learning_rate": 8.66637706342311e-06, "loss": 0.0001, "step": 15230 }, { "epoch": 3.310165073848827, "grad_norm": 0.022830063477158546, "learning_rate": 8.639226759339706e-06, "loss": 0.0004, "step": 15240 }, { "epoch": 3.3123370981754996, "grad_norm": 0.0008046124130487442, "learning_rate": 8.612076455256299e-06, "loss": 0.0002, "step": 15250 }, { "epoch": 3.314509122502172, "grad_norm": 0.00419920589774847, "learning_rate": 8.584926151172893e-06, "loss": 0.0001, "step": 15260 }, { "epoch": 3.3166811468288446, "grad_norm": 0.008058223873376846, "learning_rate": 8.557775847089488e-06, "loss": 0.0179, "step": 15270 }, { "epoch": 3.318853171155517, "grad_norm": 0.0008202812750823796, "learning_rate": 8.530625543006083e-06, "loss": 0.0002, "step": 15280 }, { "epoch": 3.321025195482189, "grad_norm": 0.0008240279275923967, "learning_rate": 8.503475238922676e-06, "loss": 0.0041, "step": 15290 }, { "epoch": 3.3231972198088617, "grad_norm": 0.00086255744099617, "learning_rate": 8.47632493483927e-06, "loss": 0.0001, "step": 15300 }, { "epoch": 3.3253692441355343, "grad_norm": 0.0007995104533620179, "learning_rate": 8.449174630755863e-06, "loss": 0.001, "step": 15310 }, { "epoch": 3.3275412684622068, "grad_norm": 0.0010926051763817668, "learning_rate": 8.42202432667246e-06, "loss": 0.0001, "step": 15320 }, { "epoch": 3.3297132927888793, "grad_norm": 0.0010417832527309656, "learning_rate": 8.394874022589055e-06, "loss": 0.0002, "step": 15330 }, { "epoch": 3.331885317115552, "grad_norm": 0.0009360710973851383, "learning_rate": 8.367723718505648e-06, "loss": 0.0002, "step": 15340 }, { "epoch": 3.3340573414422243, "grad_norm": 0.0008018920780159533, "learning_rate": 8.340573414422242e-06, "loss": 0.0042, "step": 15350 }, { "epoch": 3.3362293657688964, "grad_norm": 0.028158850967884064, "learning_rate": 8.313423110338835e-06, "loss": 0.0049, "step": 15360 }, { "epoch": 3.338401390095569, "grad_norm": 0.0008068516617640853, "learning_rate": 8.286272806255432e-06, "loss": 0.0002, "step": 15370 }, { "epoch": 3.3405734144222414, "grad_norm": 0.000881023530382663, "learning_rate": 8.259122502172025e-06, "loss": 0.0002, "step": 15380 }, { "epoch": 3.342745438748914, "grad_norm": 0.0008173162932507694, "learning_rate": 8.23197219808862e-06, "loss": 0.0001, "step": 15390 }, { "epoch": 3.3449174630755865, "grad_norm": 0.000863785739056766, "learning_rate": 8.204821894005212e-06, "loss": 0.0003, "step": 15400 }, { "epoch": 3.347089487402259, "grad_norm": 0.03187748044729233, "learning_rate": 8.177671589921807e-06, "loss": 0.0003, "step": 15410 }, { "epoch": 3.3492615117289315, "grad_norm": 0.0008539878181181848, "learning_rate": 8.150521285838402e-06, "loss": 0.0047, "step": 15420 }, { "epoch": 3.351433536055604, "grad_norm": 0.0008244166965596378, "learning_rate": 8.123370981754997e-06, "loss": 0.0001, "step": 15430 }, { "epoch": 3.3536055603822765, "grad_norm": 0.0009109236998483539, "learning_rate": 8.09622067767159e-06, "loss": 0.0117, "step": 15440 }, { "epoch": 3.3557775847089486, "grad_norm": 0.0009838317055255175, "learning_rate": 8.069070373588184e-06, "loss": 0.0002, "step": 15450 }, { "epoch": 3.357949609035621, "grad_norm": 0.0014414238976314664, "learning_rate": 8.041920069504779e-06, "loss": 0.0001, "step": 15460 }, { "epoch": 3.3601216333622936, "grad_norm": 0.0042699044570326805, "learning_rate": 8.014769765421374e-06, "loss": 0.0001, "step": 15470 }, { "epoch": 3.362293657688966, "grad_norm": 0.048958804458379745, "learning_rate": 7.987619461337967e-06, "loss": 0.0057, "step": 15480 }, { "epoch": 3.3644656820156387, "grad_norm": 0.0007931030704639852, "learning_rate": 7.960469157254561e-06, "loss": 0.0003, "step": 15490 }, { "epoch": 3.366637706342311, "grad_norm": 0.0008370107971131802, "learning_rate": 7.933318853171154e-06, "loss": 0.0058, "step": 15500 }, { "epoch": 3.3688097306689837, "grad_norm": 0.0008241998148150742, "learning_rate": 7.906168549087751e-06, "loss": 0.0001, "step": 15510 }, { "epoch": 3.3709817549956558, "grad_norm": 0.0010215636575594544, "learning_rate": 7.879018245004346e-06, "loss": 0.0054, "step": 15520 }, { "epoch": 3.3731537793223283, "grad_norm": 0.0008759453776292503, "learning_rate": 7.851867940920939e-06, "loss": 0.0003, "step": 15530 }, { "epoch": 3.375325803649001, "grad_norm": 0.0008560109417885542, "learning_rate": 7.824717636837533e-06, "loss": 0.0002, "step": 15540 }, { "epoch": 3.3774978279756733, "grad_norm": 0.0008002111571840942, "learning_rate": 7.797567332754126e-06, "loss": 0.0004, "step": 15550 }, { "epoch": 3.379669852302346, "grad_norm": 0.0007969232392497361, "learning_rate": 7.770417028670723e-06, "loss": 0.0002, "step": 15560 }, { "epoch": 3.3818418766290184, "grad_norm": 0.000970225315541029, "learning_rate": 7.743266724587316e-06, "loss": 0.0002, "step": 15570 }, { "epoch": 3.384013900955691, "grad_norm": 0.0008663604385219514, "learning_rate": 7.71611642050391e-06, "loss": 0.0039, "step": 15580 }, { "epoch": 3.386185925282363, "grad_norm": 0.0031977728940546513, "learning_rate": 7.688966116420503e-06, "loss": 0.0002, "step": 15590 }, { "epoch": 3.3883579496090355, "grad_norm": 0.0008602018351666629, "learning_rate": 7.661815812337098e-06, "loss": 0.0169, "step": 15600 }, { "epoch": 3.390529973935708, "grad_norm": 0.0009347721934318542, "learning_rate": 7.634665508253693e-06, "loss": 0.0056, "step": 15610 }, { "epoch": 3.3927019982623805, "grad_norm": 0.0007914070738479495, "learning_rate": 7.607515204170288e-06, "loss": 0.0001, "step": 15620 }, { "epoch": 3.394874022589053, "grad_norm": 0.0007933730375953019, "learning_rate": 7.580364900086881e-06, "loss": 0.0099, "step": 15630 }, { "epoch": 3.3970460469157255, "grad_norm": 0.000815921404864639, "learning_rate": 7.553214596003476e-06, "loss": 0.0101, "step": 15640 }, { "epoch": 3.399218071242398, "grad_norm": 0.0013794410042464733, "learning_rate": 7.526064291920069e-06, "loss": 0.0002, "step": 15650 }, { "epoch": 3.40139009556907, "grad_norm": 0.0015276771737262607, "learning_rate": 7.498913987836664e-06, "loss": 0.0024, "step": 15660 }, { "epoch": 3.4035621198957426, "grad_norm": 0.0007931669242680073, "learning_rate": 7.471763683753258e-06, "loss": 0.0001, "step": 15670 }, { "epoch": 3.405734144222415, "grad_norm": 0.0007988855941221118, "learning_rate": 7.4446133796698525e-06, "loss": 0.0005, "step": 15680 }, { "epoch": 3.4079061685490877, "grad_norm": 0.0007956427871249616, "learning_rate": 7.417463075586446e-06, "loss": 0.0001, "step": 15690 }, { "epoch": 3.41007819287576, "grad_norm": 0.8127291202545166, "learning_rate": 7.390312771503041e-06, "loss": 0.005, "step": 15700 }, { "epoch": 3.4122502172024327, "grad_norm": 0.0007987445569597185, "learning_rate": 7.363162467419635e-06, "loss": 0.0001, "step": 15710 }, { "epoch": 3.4144222415291052, "grad_norm": 0.0007936052861623466, "learning_rate": 7.33601216333623e-06, "loss": 0.0038, "step": 15720 }, { "epoch": 3.4165942658557777, "grad_norm": 0.0008497874368913472, "learning_rate": 7.308861859252824e-06, "loss": 0.0001, "step": 15730 }, { "epoch": 3.4187662901824503, "grad_norm": 0.0007960118819028139, "learning_rate": 7.281711555169418e-06, "loss": 0.0001, "step": 15740 }, { "epoch": 3.4209383145091223, "grad_norm": 1.809430480003357, "learning_rate": 7.254561251086013e-06, "loss": 0.0114, "step": 15750 }, { "epoch": 3.423110338835795, "grad_norm": 0.003907696343958378, "learning_rate": 7.227410947002607e-06, "loss": 0.0001, "step": 15760 }, { "epoch": 3.4252823631624674, "grad_norm": 0.0008068581810221076, "learning_rate": 7.2002606429192015e-06, "loss": 0.0001, "step": 15770 }, { "epoch": 3.42745438748914, "grad_norm": 0.0009195163147523999, "learning_rate": 7.173110338835795e-06, "loss": 0.0001, "step": 15780 }, { "epoch": 3.4296264118158124, "grad_norm": 0.00783773697912693, "learning_rate": 7.14596003475239e-06, "loss": 0.0002, "step": 15790 }, { "epoch": 3.431798436142485, "grad_norm": 0.0008429814479313791, "learning_rate": 7.118809730668984e-06, "loss": 0.0001, "step": 15800 }, { "epoch": 3.4339704604691574, "grad_norm": 0.0007842437480576336, "learning_rate": 7.091659426585579e-06, "loss": 0.0002, "step": 15810 }, { "epoch": 3.4361424847958295, "grad_norm": 0.0007843258208595216, "learning_rate": 7.064509122502172e-06, "loss": 0.0001, "step": 15820 }, { "epoch": 3.438314509122502, "grad_norm": 0.0032298804726451635, "learning_rate": 7.037358818418767e-06, "loss": 0.0003, "step": 15830 }, { "epoch": 3.4404865334491745, "grad_norm": 0.0035422821529209614, "learning_rate": 7.01020851433536e-06, "loss": 0.0002, "step": 15840 }, { "epoch": 3.442658557775847, "grad_norm": 0.0007916768663562834, "learning_rate": 6.983058210251956e-06, "loss": 0.0001, "step": 15850 }, { "epoch": 3.4448305821025196, "grad_norm": 0.003323981538414955, "learning_rate": 6.955907906168549e-06, "loss": 0.0006, "step": 15860 }, { "epoch": 3.447002606429192, "grad_norm": 0.0007974683539941907, "learning_rate": 6.9287576020851435e-06, "loss": 0.0003, "step": 15870 }, { "epoch": 3.4491746307558646, "grad_norm": 0.002854996593669057, "learning_rate": 6.901607298001737e-06, "loss": 0.0001, "step": 15880 }, { "epoch": 3.4513466550825367, "grad_norm": 0.0008318639011122286, "learning_rate": 6.874456993918332e-06, "loss": 0.0001, "step": 15890 }, { "epoch": 3.453518679409209, "grad_norm": 0.0008181575685739517, "learning_rate": 6.847306689834926e-06, "loss": 0.0001, "step": 15900 }, { "epoch": 3.4556907037358817, "grad_norm": 0.000765151169616729, "learning_rate": 6.820156385751521e-06, "loss": 0.0001, "step": 15910 }, { "epoch": 3.4578627280625542, "grad_norm": 0.0007698533590883017, "learning_rate": 6.793006081668115e-06, "loss": 0.0026, "step": 15920 }, { "epoch": 3.4600347523892268, "grad_norm": 0.0137395691126585, "learning_rate": 6.765855777584709e-06, "loss": 0.0002, "step": 15930 }, { "epoch": 3.4622067767158993, "grad_norm": 0.010664197616279125, "learning_rate": 6.738705473501304e-06, "loss": 0.0001, "step": 15940 }, { "epoch": 3.464378801042572, "grad_norm": 0.001031374093145132, "learning_rate": 6.711555169417898e-06, "loss": 0.0001, "step": 15950 }, { "epoch": 3.4665508253692443, "grad_norm": 0.0018270538421347737, "learning_rate": 6.6844048653344925e-06, "loss": 0.0418, "step": 15960 }, { "epoch": 3.468722849695917, "grad_norm": 0.0007723259041085839, "learning_rate": 6.657254561251086e-06, "loss": 0.0001, "step": 15970 }, { "epoch": 3.470894874022589, "grad_norm": 0.0008250788087025285, "learning_rate": 6.630104257167681e-06, "loss": 0.0002, "step": 15980 }, { "epoch": 3.4730668983492614, "grad_norm": 0.0007811134564690292, "learning_rate": 6.602953953084275e-06, "loss": 0.0006, "step": 15990 }, { "epoch": 3.475238922675934, "grad_norm": 0.000832605641335249, "learning_rate": 6.57580364900087e-06, "loss": 0.0002, "step": 16000 }, { "epoch": 3.4774109470026064, "grad_norm": 0.0007671408238820732, "learning_rate": 6.548653344917463e-06, "loss": 0.0002, "step": 16010 }, { "epoch": 3.479582971329279, "grad_norm": 0.0007610549218952656, "learning_rate": 6.521503040834058e-06, "loss": 0.0003, "step": 16020 }, { "epoch": 3.4817549956559515, "grad_norm": 0.0007617721566930413, "learning_rate": 6.494352736750651e-06, "loss": 0.0011, "step": 16030 }, { "epoch": 3.483927019982624, "grad_norm": 0.001039681606926024, "learning_rate": 6.467202432667247e-06, "loss": 0.0001, "step": 16040 }, { "epoch": 3.486099044309296, "grad_norm": 0.0007607974112033844, "learning_rate": 6.44005212858384e-06, "loss": 0.0001, "step": 16050 }, { "epoch": 3.4882710686359686, "grad_norm": 0.0007605382124893367, "learning_rate": 6.4129018245004346e-06, "loss": 0.0001, "step": 16060 }, { "epoch": 3.490443092962641, "grad_norm": 0.0007544245454482734, "learning_rate": 6.3857515204170284e-06, "loss": 0.0002, "step": 16070 }, { "epoch": 3.4926151172893136, "grad_norm": 0.0007656171219423413, "learning_rate": 6.358601216333623e-06, "loss": 0.0001, "step": 16080 }, { "epoch": 3.494787141615986, "grad_norm": 0.0007810737006366253, "learning_rate": 6.331450912250217e-06, "loss": 0.0001, "step": 16090 }, { "epoch": 3.4969591659426587, "grad_norm": 0.1456904411315918, "learning_rate": 6.304300608166812e-06, "loss": 0.0003, "step": 16100 }, { "epoch": 3.499131190269331, "grad_norm": 0.0007609643507748842, "learning_rate": 6.277150304083406e-06, "loss": 0.0001, "step": 16110 }, { "epoch": 3.5013032145960032, "grad_norm": 0.0034587134141474962, "learning_rate": 6.25e-06, "loss": 0.0002, "step": 16120 }, { "epoch": 3.503475238922676, "grad_norm": 0.0008425491396337748, "learning_rate": 6.222849695916594e-06, "loss": 0.0001, "step": 16130 }, { "epoch": 3.5056472632493483, "grad_norm": 0.0007616893853992224, "learning_rate": 6.195699391833189e-06, "loss": 0.001, "step": 16140 }, { "epoch": 3.507819287576021, "grad_norm": 0.000824123912025243, "learning_rate": 6.168549087749783e-06, "loss": 0.0001, "step": 16150 }, { "epoch": 3.5099913119026933, "grad_norm": 0.0007427196251228452, "learning_rate": 6.1413987836663775e-06, "loss": 0.0001, "step": 16160 }, { "epoch": 3.512163336229366, "grad_norm": 0.0027246661484241486, "learning_rate": 6.114248479582971e-06, "loss": 0.0001, "step": 16170 }, { "epoch": 3.5143353605560383, "grad_norm": 0.0007525637629441917, "learning_rate": 6.087098175499566e-06, "loss": 0.0001, "step": 16180 }, { "epoch": 3.5165073848827104, "grad_norm": 0.17882095277309418, "learning_rate": 6.05994787141616e-06, "loss": 0.0003, "step": 16190 }, { "epoch": 3.5186794092093834, "grad_norm": 0.0022536071483045816, "learning_rate": 6.032797567332754e-06, "loss": 0.0002, "step": 16200 }, { "epoch": 3.5208514335360555, "grad_norm": 0.0007445691735483706, "learning_rate": 6.0056472632493485e-06, "loss": 0.0001, "step": 16210 }, { "epoch": 3.523023457862728, "grad_norm": 0.0007559970254078507, "learning_rate": 5.978496959165942e-06, "loss": 0.0194, "step": 16220 }, { "epoch": 3.5251954821894005, "grad_norm": 0.001018458278849721, "learning_rate": 5.951346655082538e-06, "loss": 0.0001, "step": 16230 }, { "epoch": 3.527367506516073, "grad_norm": 0.0007447813986800611, "learning_rate": 5.924196350999132e-06, "loss": 0.0001, "step": 16240 }, { "epoch": 3.5295395308427455, "grad_norm": 0.0007549290312454104, "learning_rate": 5.897046046915726e-06, "loss": 0.0001, "step": 16250 }, { "epoch": 3.531711555169418, "grad_norm": 0.0007536158664152026, "learning_rate": 5.86989574283232e-06, "loss": 0.0054, "step": 16260 }, { "epoch": 3.5338835794960906, "grad_norm": 0.000750150007661432, "learning_rate": 5.842745438748914e-06, "loss": 0.0001, "step": 16270 }, { "epoch": 3.5360556038227626, "grad_norm": 0.0007642016862519085, "learning_rate": 5.815595134665509e-06, "loss": 0.0046, "step": 16280 }, { "epoch": 3.538227628149435, "grad_norm": 0.0008390732109546661, "learning_rate": 5.788444830582103e-06, "loss": 0.0001, "step": 16290 }, { "epoch": 3.5403996524761077, "grad_norm": 0.0008663103799335659, "learning_rate": 5.7612945264986975e-06, "loss": 0.0001, "step": 16300 }, { "epoch": 3.54257167680278, "grad_norm": 0.004686756059527397, "learning_rate": 5.734144222415291e-06, "loss": 0.0001, "step": 16310 }, { "epoch": 3.5447437011294527, "grad_norm": 0.000735687674023211, "learning_rate": 5.706993918331885e-06, "loss": 0.0001, "step": 16320 }, { "epoch": 3.546915725456125, "grad_norm": 0.2715161442756653, "learning_rate": 5.67984361424848e-06, "loss": 0.0003, "step": 16330 }, { "epoch": 3.5490877497827977, "grad_norm": 0.0007402179180644453, "learning_rate": 5.652693310165074e-06, "loss": 0.0001, "step": 16340 }, { "epoch": 3.55125977410947, "grad_norm": 0.0007502386579290032, "learning_rate": 5.6255430060816685e-06, "loss": 0.0001, "step": 16350 }, { "epoch": 3.5534317984361423, "grad_norm": 0.0010584397241473198, "learning_rate": 5.598392701998262e-06, "loss": 0.0001, "step": 16360 }, { "epoch": 3.555603822762815, "grad_norm": 0.0007463762303814292, "learning_rate": 5.571242397914857e-06, "loss": 0.0035, "step": 16370 }, { "epoch": 3.5577758470894874, "grad_norm": 0.0025062961503863335, "learning_rate": 5.544092093831451e-06, "loss": 0.0001, "step": 16380 }, { "epoch": 3.55994787141616, "grad_norm": 0.0007336509297601879, "learning_rate": 5.516941789748045e-06, "loss": 0.0009, "step": 16390 }, { "epoch": 3.5621198957428324, "grad_norm": 0.004578218795359135, "learning_rate": 5.4897914856646395e-06, "loss": 0.0002, "step": 16400 }, { "epoch": 3.564291920069505, "grad_norm": 0.0007287193438969553, "learning_rate": 5.462641181581233e-06, "loss": 0.0106, "step": 16410 }, { "epoch": 3.566463944396177, "grad_norm": 0.000773219857364893, "learning_rate": 5.435490877497828e-06, "loss": 0.0002, "step": 16420 }, { "epoch": 3.56863596872285, "grad_norm": 0.0025072686839848757, "learning_rate": 5.408340573414423e-06, "loss": 0.0002, "step": 16430 }, { "epoch": 3.570807993049522, "grad_norm": 0.0008311655255965889, "learning_rate": 5.381190269331017e-06, "loss": 0.0022, "step": 16440 }, { "epoch": 3.5729800173761945, "grad_norm": 0.19069628417491913, "learning_rate": 5.354039965247611e-06, "loss": 0.0034, "step": 16450 }, { "epoch": 3.575152041702867, "grad_norm": 0.09645688533782959, "learning_rate": 5.326889661164205e-06, "loss": 0.031, "step": 16460 }, { "epoch": 3.5773240660295396, "grad_norm": 0.005101743154227734, "learning_rate": 5.2997393570808e-06, "loss": 0.0354, "step": 16470 }, { "epoch": 3.579496090356212, "grad_norm": 0.000746962963603437, "learning_rate": 5.272589052997394e-06, "loss": 0.0001, "step": 16480 }, { "epoch": 3.5816681146828846, "grad_norm": 0.003772861324250698, "learning_rate": 5.2454387489139885e-06, "loss": 0.0002, "step": 16490 }, { "epoch": 3.583840139009557, "grad_norm": 0.0007960237562656403, "learning_rate": 5.218288444830582e-06, "loss": 0.0001, "step": 16500 }, { "epoch": 3.586012163336229, "grad_norm": 0.1657443791627884, "learning_rate": 5.191138140747176e-06, "loss": 0.0026, "step": 16510 }, { "epoch": 3.5881841876629017, "grad_norm": 0.0007543342071585357, "learning_rate": 5.163987836663771e-06, "loss": 0.0001, "step": 16520 }, { "epoch": 3.590356211989574, "grad_norm": 0.0007493611774407327, "learning_rate": 5.136837532580365e-06, "loss": 0.0001, "step": 16530 }, { "epoch": 3.5925282363162467, "grad_norm": 0.0041643306612968445, "learning_rate": 5.1096872284969595e-06, "loss": 0.0002, "step": 16540 }, { "epoch": 3.5947002606429193, "grad_norm": 0.0007641764241270721, "learning_rate": 5.082536924413553e-06, "loss": 0.0008, "step": 16550 }, { "epoch": 3.5968722849695918, "grad_norm": 0.003648051293566823, "learning_rate": 5.055386620330148e-06, "loss": 0.0001, "step": 16560 }, { "epoch": 3.5990443092962643, "grad_norm": 0.0033129567746073008, "learning_rate": 5.028236316246742e-06, "loss": 0.0002, "step": 16570 }, { "epoch": 3.6012163336229364, "grad_norm": 0.000875050260219723, "learning_rate": 5.001086012163337e-06, "loss": 0.0003, "step": 16580 }, { "epoch": 3.603388357949609, "grad_norm": 0.0007452957797795534, "learning_rate": 4.9739357080799306e-06, "loss": 0.0001, "step": 16590 }, { "epoch": 3.6055603822762814, "grad_norm": 0.0007511350559070706, "learning_rate": 4.946785403996524e-06, "loss": 0.0001, "step": 16600 }, { "epoch": 3.607732406602954, "grad_norm": 0.003435454098507762, "learning_rate": 4.919635099913119e-06, "loss": 0.0001, "step": 16610 }, { "epoch": 3.6099044309296264, "grad_norm": 0.0033019285183399916, "learning_rate": 4.892484795829713e-06, "loss": 0.0002, "step": 16620 }, { "epoch": 3.612076455256299, "grad_norm": 0.0009856983087956905, "learning_rate": 4.865334491746308e-06, "loss": 0.0001, "step": 16630 }, { "epoch": 3.6142484795829715, "grad_norm": 0.0007532949093729258, "learning_rate": 4.838184187662902e-06, "loss": 0.017, "step": 16640 }, { "epoch": 3.6164205039096435, "grad_norm": 0.0007319801952689886, "learning_rate": 4.811033883579496e-06, "loss": 0.0001, "step": 16650 }, { "epoch": 3.6185925282363165, "grad_norm": 0.001139196683652699, "learning_rate": 4.783883579496091e-06, "loss": 0.0002, "step": 16660 }, { "epoch": 3.6207645525629886, "grad_norm": 0.0007473634323105216, "learning_rate": 4.756733275412685e-06, "loss": 0.0001, "step": 16670 }, { "epoch": 3.622936576889661, "grad_norm": 0.006157098803669214, "learning_rate": 4.7295829713292796e-06, "loss": 0.0002, "step": 16680 }, { "epoch": 3.6251086012163336, "grad_norm": 0.0007382711628451943, "learning_rate": 4.7024326672458734e-06, "loss": 0.0036, "step": 16690 }, { "epoch": 3.627280625543006, "grad_norm": 0.0007937946356832981, "learning_rate": 4.675282363162468e-06, "loss": 0.0001, "step": 16700 }, { "epoch": 3.6294526498696786, "grad_norm": 0.0007356005371548235, "learning_rate": 4.648132059079062e-06, "loss": 0.0034, "step": 16710 }, { "epoch": 3.631624674196351, "grad_norm": 0.0007657075184397399, "learning_rate": 4.620981754995656e-06, "loss": 0.0001, "step": 16720 }, { "epoch": 3.6337966985230237, "grad_norm": 0.0008099807891994715, "learning_rate": 4.593831450912251e-06, "loss": 0.0001, "step": 16730 }, { "epoch": 3.6359687228496957, "grad_norm": 0.0007494213059544563, "learning_rate": 4.5666811468288444e-06, "loss": 0.0001, "step": 16740 }, { "epoch": 3.6381407471763683, "grad_norm": 0.0016505821840837598, "learning_rate": 4.539530842745439e-06, "loss": 0.0001, "step": 16750 }, { "epoch": 3.640312771503041, "grad_norm": 0.0035423533990979195, "learning_rate": 4.512380538662033e-06, "loss": 0.0002, "step": 16760 }, { "epoch": 3.6424847958297133, "grad_norm": 2.324589490890503, "learning_rate": 4.485230234578628e-06, "loss": 0.0019, "step": 16770 }, { "epoch": 3.644656820156386, "grad_norm": 0.0007355398265644908, "learning_rate": 4.458079930495222e-06, "loss": 0.0288, "step": 16780 }, { "epoch": 3.6468288444830583, "grad_norm": 0.0007384142372757196, "learning_rate": 4.4309296264118155e-06, "loss": 0.007, "step": 16790 }, { "epoch": 3.649000868809731, "grad_norm": 0.01005201693624258, "learning_rate": 4.40377932232841e-06, "loss": 0.0001, "step": 16800 }, { "epoch": 3.651172893136403, "grad_norm": 0.0007379804737865925, "learning_rate": 4.376629018245004e-06, "loss": 0.0087, "step": 16810 }, { "epoch": 3.6533449174630754, "grad_norm": 0.0007344160694628954, "learning_rate": 4.349478714161599e-06, "loss": 0.0001, "step": 16820 }, { "epoch": 3.655516941789748, "grad_norm": 0.9092646837234497, "learning_rate": 4.3223284100781935e-06, "loss": 0.0101, "step": 16830 }, { "epoch": 3.6576889661164205, "grad_norm": 0.006105160806328058, "learning_rate": 4.295178105994787e-06, "loss": 0.0002, "step": 16840 }, { "epoch": 3.659860990443093, "grad_norm": 0.0007417987799271941, "learning_rate": 4.268027801911382e-06, "loss": 0.0002, "step": 16850 }, { "epoch": 3.6620330147697655, "grad_norm": 0.004440960939973593, "learning_rate": 4.240877497827976e-06, "loss": 0.0002, "step": 16860 }, { "epoch": 3.664205039096438, "grad_norm": 0.0007366131176240742, "learning_rate": 4.213727193744571e-06, "loss": 0.0001, "step": 16870 }, { "epoch": 3.66637706342311, "grad_norm": 0.0007839969475753605, "learning_rate": 4.1865768896611645e-06, "loss": 0.0066, "step": 16880 }, { "epoch": 3.668549087749783, "grad_norm": 0.0007367105572484434, "learning_rate": 4.159426585577759e-06, "loss": 0.0002, "step": 16890 }, { "epoch": 3.670721112076455, "grad_norm": 0.0007469954434782267, "learning_rate": 4.132276281494353e-06, "loss": 0.0001, "step": 16900 }, { "epoch": 3.6728931364031276, "grad_norm": 0.000735281384550035, "learning_rate": 4.105125977410947e-06, "loss": 0.0002, "step": 16910 }, { "epoch": 3.6750651607298, "grad_norm": 0.0007407576194964349, "learning_rate": 4.077975673327542e-06, "loss": 0.0001, "step": 16920 }, { "epoch": 3.6772371850564727, "grad_norm": 0.0008425369742326438, "learning_rate": 4.0508253692441355e-06, "loss": 0.0001, "step": 16930 }, { "epoch": 3.679409209383145, "grad_norm": 0.0007419844623655081, "learning_rate": 4.02367506516073e-06, "loss": 0.0001, "step": 16940 }, { "epoch": 3.6815812337098177, "grad_norm": 0.0007465860689990222, "learning_rate": 3.996524761077324e-06, "loss": 0.0003, "step": 16950 }, { "epoch": 3.6837532580364902, "grad_norm": 0.0007462946814484894, "learning_rate": 3.969374456993919e-06, "loss": 0.0002, "step": 16960 }, { "epoch": 3.6859252823631623, "grad_norm": 0.0007361106108874083, "learning_rate": 3.942224152910513e-06, "loss": 0.0001, "step": 16970 }, { "epoch": 3.688097306689835, "grad_norm": 0.0007370809908024967, "learning_rate": 3.9150738488271065e-06, "loss": 0.0001, "step": 16980 }, { "epoch": 3.6902693310165073, "grad_norm": 0.0007324148900806904, "learning_rate": 3.887923544743701e-06, "loss": 0.0001, "step": 16990 }, { "epoch": 3.69244135534318, "grad_norm": 0.015564072877168655, "learning_rate": 3.860773240660295e-06, "loss": 0.0127, "step": 17000 }, { "epoch": 3.6946133796698524, "grad_norm": 0.0007292072405107319, "learning_rate": 3.83362293657689e-06, "loss": 0.0001, "step": 17010 }, { "epoch": 3.696785403996525, "grad_norm": 0.000941729755140841, "learning_rate": 3.8064726324934837e-06, "loss": 0.0002, "step": 17020 }, { "epoch": 3.6989574283231974, "grad_norm": 0.0020503199193626642, "learning_rate": 3.779322328410079e-06, "loss": 0.0002, "step": 17030 }, { "epoch": 3.7011294526498695, "grad_norm": 0.0007638138486072421, "learning_rate": 3.752172024326673e-06, "loss": 0.0001, "step": 17040 }, { "epoch": 3.703301476976542, "grad_norm": 0.0007398283923976123, "learning_rate": 3.7250217202432674e-06, "loss": 0.0001, "step": 17050 }, { "epoch": 3.7054735013032145, "grad_norm": 0.44359394907951355, "learning_rate": 3.6978714161598612e-06, "loss": 0.0032, "step": 17060 }, { "epoch": 3.707645525629887, "grad_norm": 0.0008260589092969894, "learning_rate": 3.6707211120764555e-06, "loss": 0.0002, "step": 17070 }, { "epoch": 3.7098175499565595, "grad_norm": 0.0007681497954763472, "learning_rate": 3.64357080799305e-06, "loss": 0.0016, "step": 17080 }, { "epoch": 3.711989574283232, "grad_norm": 0.00076832011109218, "learning_rate": 3.616420503909644e-06, "loss": 0.0004, "step": 17090 }, { "epoch": 3.7141615986099046, "grad_norm": 0.0007327235070988536, "learning_rate": 3.5892701998262384e-06, "loss": 0.0003, "step": 17100 }, { "epoch": 3.7163336229365767, "grad_norm": 0.0007373854168690741, "learning_rate": 3.5621198957428327e-06, "loss": 0.0002, "step": 17110 }, { "epoch": 3.7185056472632496, "grad_norm": 0.0007295843679457903, "learning_rate": 3.534969591659427e-06, "loss": 0.003, "step": 17120 }, { "epoch": 3.7206776715899217, "grad_norm": 0.000977756455540657, "learning_rate": 3.507819287576021e-06, "loss": 0.0001, "step": 17130 }, { "epoch": 3.722849695916594, "grad_norm": 0.0007806509966030717, "learning_rate": 3.480668983492615e-06, "loss": 0.0001, "step": 17140 }, { "epoch": 3.7250217202432667, "grad_norm": 0.0007476311875507236, "learning_rate": 3.4535186794092094e-06, "loss": 0.0269, "step": 17150 }, { "epoch": 3.7271937445699392, "grad_norm": 0.014178342185914516, "learning_rate": 3.4263683753258037e-06, "loss": 0.0042, "step": 17160 }, { "epoch": 3.7293657688966118, "grad_norm": 0.000731293112039566, "learning_rate": 3.399218071242398e-06, "loss": 0.0003, "step": 17170 }, { "epoch": 3.731537793223284, "grad_norm": 0.0008929009782150388, "learning_rate": 3.3720677671589923e-06, "loss": 0.0001, "step": 17180 }, { "epoch": 3.733709817549957, "grad_norm": 0.0007993864710442722, "learning_rate": 3.3449174630755865e-06, "loss": 0.0001, "step": 17190 }, { "epoch": 3.735881841876629, "grad_norm": 0.0007400406175293028, "learning_rate": 3.317767158992181e-06, "loss": 0.0003, "step": 17200 }, { "epoch": 3.7380538662033014, "grad_norm": 0.0007384680793620646, "learning_rate": 3.2906168549087747e-06, "loss": 0.0002, "step": 17210 }, { "epoch": 3.740225890529974, "grad_norm": 0.0007810071110725403, "learning_rate": 3.263466550825369e-06, "loss": 0.0001, "step": 17220 }, { "epoch": 3.7423979148566464, "grad_norm": 0.0007533092866651714, "learning_rate": 3.2363162467419633e-06, "loss": 0.007, "step": 17230 }, { "epoch": 3.744569939183319, "grad_norm": 0.0007717570406384766, "learning_rate": 3.2091659426585584e-06, "loss": 0.0003, "step": 17240 }, { "epoch": 3.7467419635099914, "grad_norm": 0.01403042022138834, "learning_rate": 3.1820156385751523e-06, "loss": 0.0003, "step": 17250 }, { "epoch": 3.748913987836664, "grad_norm": 0.0007486468530260026, "learning_rate": 3.1548653344917466e-06, "loss": 0.0004, "step": 17260 }, { "epoch": 3.751086012163336, "grad_norm": 0.000728482089471072, "learning_rate": 3.127715030408341e-06, "loss": 0.0001, "step": 17270 }, { "epoch": 3.7532580364900086, "grad_norm": 0.0007304223254323006, "learning_rate": 3.1005647263249347e-06, "loss": 0.0002, "step": 17280 }, { "epoch": 3.755430060816681, "grad_norm": 0.0015727184945717454, "learning_rate": 3.0734144222415294e-06, "loss": 0.0001, "step": 17290 }, { "epoch": 3.7576020851433536, "grad_norm": 0.0007460480555891991, "learning_rate": 3.0462641181581237e-06, "loss": 0.0001, "step": 17300 }, { "epoch": 3.759774109470026, "grad_norm": 0.0007649777107872069, "learning_rate": 3.019113814074718e-06, "loss": 0.0001, "step": 17310 }, { "epoch": 3.7619461337966986, "grad_norm": 0.005170903634279966, "learning_rate": 2.9919635099913123e-06, "loss": 0.0002, "step": 17320 }, { "epoch": 3.764118158123371, "grad_norm": 0.0007338092545978725, "learning_rate": 2.964813205907906e-06, "loss": 0.0003, "step": 17330 }, { "epoch": 3.766290182450043, "grad_norm": 0.0008042194531299174, "learning_rate": 2.9376629018245004e-06, "loss": 0.0001, "step": 17340 }, { "epoch": 3.768462206776716, "grad_norm": 0.0007178256055340171, "learning_rate": 2.9105125977410947e-06, "loss": 0.0007, "step": 17350 }, { "epoch": 3.7706342311033882, "grad_norm": 0.0007388383965007961, "learning_rate": 2.883362293657689e-06, "loss": 0.0004, "step": 17360 }, { "epoch": 3.7728062554300608, "grad_norm": 0.0007393441046588123, "learning_rate": 2.8562119895742833e-06, "loss": 0.0002, "step": 17370 }, { "epoch": 3.7749782797567333, "grad_norm": 0.0007252399227581918, "learning_rate": 2.8290616854908776e-06, "loss": 0.0002, "step": 17380 }, { "epoch": 3.777150304083406, "grad_norm": 0.0008031509933061898, "learning_rate": 2.801911381407472e-06, "loss": 0.0002, "step": 17390 }, { "epoch": 3.7793223284100783, "grad_norm": 0.0007574482006020844, "learning_rate": 2.774761077324066e-06, "loss": 0.012, "step": 17400 }, { "epoch": 3.7814943527367504, "grad_norm": 0.0009023218881338835, "learning_rate": 2.7476107732406605e-06, "loss": 0.0003, "step": 17410 }, { "epoch": 3.7836663770634233, "grad_norm": 0.0007265750900842249, "learning_rate": 2.7204604691572547e-06, "loss": 0.0298, "step": 17420 }, { "epoch": 3.7858384013900954, "grad_norm": 0.0007209655013866723, "learning_rate": 2.693310165073849e-06, "loss": 0.0002, "step": 17430 }, { "epoch": 3.788010425716768, "grad_norm": 0.0007346943602897227, "learning_rate": 2.6661598609904433e-06, "loss": 0.0006, "step": 17440 }, { "epoch": 3.7901824500434405, "grad_norm": 0.000738593575078994, "learning_rate": 2.6390095569070376e-06, "loss": 0.0004, "step": 17450 }, { "epoch": 3.792354474370113, "grad_norm": 0.0007343738689087331, "learning_rate": 2.6118592528236315e-06, "loss": 0.0003, "step": 17460 }, { "epoch": 3.7945264986967855, "grad_norm": 0.007034082897007465, "learning_rate": 2.5847089487402258e-06, "loss": 0.0003, "step": 17470 }, { "epoch": 3.796698523023458, "grad_norm": 0.0068852780386805534, "learning_rate": 2.55755864465682e-06, "loss": 0.0084, "step": 17480 }, { "epoch": 3.7988705473501305, "grad_norm": 0.022389423102140427, "learning_rate": 2.5304083405734148e-06, "loss": 0.0004, "step": 17490 }, { "epoch": 3.8010425716768026, "grad_norm": 0.0008509616018272936, "learning_rate": 2.503258036490009e-06, "loss": 0.0001, "step": 17500 }, { "epoch": 3.803214596003475, "grad_norm": 0.006721413694322109, "learning_rate": 2.4761077324066033e-06, "loss": 0.0004, "step": 17510 }, { "epoch": 3.8053866203301476, "grad_norm": 0.000736792222596705, "learning_rate": 2.448957428323197e-06, "loss": 0.0001, "step": 17520 }, { "epoch": 3.80755864465682, "grad_norm": 0.001253137830644846, "learning_rate": 2.4218071242397915e-06, "loss": 0.0002, "step": 17530 }, { "epoch": 3.8097306689834927, "grad_norm": 0.0007209026953205466, "learning_rate": 2.3946568201563858e-06, "loss": 0.0001, "step": 17540 }, { "epoch": 3.811902693310165, "grad_norm": 0.0007342109456658363, "learning_rate": 2.36750651607298e-06, "loss": 0.0001, "step": 17550 }, { "epoch": 3.8140747176368377, "grad_norm": 0.0008827606798149645, "learning_rate": 2.3403562119895743e-06, "loss": 0.0067, "step": 17560 }, { "epoch": 3.8162467419635098, "grad_norm": 0.0007216184167191386, "learning_rate": 2.3132059079061686e-06, "loss": 0.0001, "step": 17570 }, { "epoch": 3.8184187662901823, "grad_norm": 0.0007301006116904318, "learning_rate": 2.286055603822763e-06, "loss": 0.0002, "step": 17580 }, { "epoch": 3.820590790616855, "grad_norm": 0.0007518759812228382, "learning_rate": 2.258905299739357e-06, "loss": 0.0001, "step": 17590 }, { "epoch": 3.8227628149435273, "grad_norm": 0.000788421428296715, "learning_rate": 2.2317549956559515e-06, "loss": 0.0001, "step": 17600 }, { "epoch": 3.8249348392702, "grad_norm": 0.0007337273564189672, "learning_rate": 2.2046046915725458e-06, "loss": 0.0001, "step": 17610 }, { "epoch": 3.8271068635968724, "grad_norm": 0.0007254479569382966, "learning_rate": 2.17745438748914e-06, "loss": 0.0002, "step": 17620 }, { "epoch": 3.829278887923545, "grad_norm": 0.0007236094097606838, "learning_rate": 2.1503040834057344e-06, "loss": 0.0062, "step": 17630 }, { "epoch": 3.831450912250217, "grad_norm": 0.005401818081736565, "learning_rate": 2.1231537793223286e-06, "loss": 0.0003, "step": 17640 }, { "epoch": 3.83362293657689, "grad_norm": 0.0007210998446680605, "learning_rate": 2.0960034752389225e-06, "loss": 0.0005, "step": 17650 }, { "epoch": 3.835794960903562, "grad_norm": 0.005460330750793219, "learning_rate": 2.068853171155517e-06, "loss": 0.0002, "step": 17660 }, { "epoch": 3.8379669852302345, "grad_norm": 0.00539832329377532, "learning_rate": 2.041702867072111e-06, "loss": 0.0002, "step": 17670 }, { "epoch": 3.840139009556907, "grad_norm": 0.0007603506674058735, "learning_rate": 2.0145525629887054e-06, "loss": 0.0001, "step": 17680 }, { "epoch": 3.8423110338835795, "grad_norm": 0.0007322979508899152, "learning_rate": 1.9874022589053e-06, "loss": 0.0003, "step": 17690 }, { "epoch": 3.844483058210252, "grad_norm": 0.0007165221031755209, "learning_rate": 1.9602519548218944e-06, "loss": 0.0206, "step": 17700 }, { "epoch": 3.8466550825369246, "grad_norm": 0.0007267245091497898, "learning_rate": 1.9331016507384887e-06, "loss": 0.0003, "step": 17710 }, { "epoch": 3.848827106863597, "grad_norm": 0.0007173445192165673, "learning_rate": 1.9059513466550827e-06, "loss": 0.0001, "step": 17720 }, { "epoch": 3.850999131190269, "grad_norm": 0.0007399597088806331, "learning_rate": 1.8788010425716768e-06, "loss": 0.001, "step": 17730 }, { "epoch": 3.8531711555169417, "grad_norm": 0.0007208718452602625, "learning_rate": 1.851650738488271e-06, "loss": 0.0001, "step": 17740 }, { "epoch": 3.855343179843614, "grad_norm": 0.8270201086997986, "learning_rate": 1.8245004344048654e-06, "loss": 0.0172, "step": 17750 }, { "epoch": 3.8575152041702867, "grad_norm": 0.0016780218575149775, "learning_rate": 1.7973501303214597e-06, "loss": 0.0001, "step": 17760 }, { "epoch": 3.859687228496959, "grad_norm": 0.8392778038978577, "learning_rate": 1.7701998262380538e-06, "loss": 0.01, "step": 17770 }, { "epoch": 3.8618592528236317, "grad_norm": 0.0007236091187223792, "learning_rate": 1.743049522154648e-06, "loss": 0.0002, "step": 17780 }, { "epoch": 3.8640312771503043, "grad_norm": 0.05827876180410385, "learning_rate": 1.7158992180712425e-06, "loss": 0.0005, "step": 17790 }, { "epoch": 3.8662033014769763, "grad_norm": 0.0007236993405967951, "learning_rate": 1.6887489139878368e-06, "loss": 0.0001, "step": 17800 }, { "epoch": 3.868375325803649, "grad_norm": 0.0007266022148542106, "learning_rate": 1.6615986099044311e-06, "loss": 0.0009, "step": 17810 }, { "epoch": 3.8705473501303214, "grad_norm": 0.007087093777954578, "learning_rate": 1.6344483058210254e-06, "loss": 0.0054, "step": 17820 }, { "epoch": 3.872719374456994, "grad_norm": 0.0007367506041191518, "learning_rate": 1.6072980017376195e-06, "loss": 0.0003, "step": 17830 }, { "epoch": 3.8748913987836664, "grad_norm": 0.0007232032367028296, "learning_rate": 1.5801476976542138e-06, "loss": 0.0004, "step": 17840 }, { "epoch": 3.877063423110339, "grad_norm": 0.0007434978033415973, "learning_rate": 1.552997393570808e-06, "loss": 0.0002, "step": 17850 }, { "epoch": 3.8792354474370114, "grad_norm": 0.0007194494246505201, "learning_rate": 1.5258470894874023e-06, "loss": 0.0001, "step": 17860 }, { "epoch": 3.8814074717636835, "grad_norm": 0.0007267376640811563, "learning_rate": 1.4986967854039966e-06, "loss": 0.0001, "step": 17870 }, { "epoch": 3.8835794960903565, "grad_norm": 0.0007254155352711678, "learning_rate": 1.471546481320591e-06, "loss": 0.0002, "step": 17880 }, { "epoch": 3.8857515204170285, "grad_norm": 0.0007285097963176668, "learning_rate": 1.4443961772371852e-06, "loss": 0.0099, "step": 17890 }, { "epoch": 3.887923544743701, "grad_norm": 0.0007246483583003283, "learning_rate": 1.4172458731537793e-06, "loss": 0.0001, "step": 17900 }, { "epoch": 3.8900955690703736, "grad_norm": 0.0007502794032916427, "learning_rate": 1.3900955690703736e-06, "loss": 0.0003, "step": 17910 }, { "epoch": 3.892267593397046, "grad_norm": 0.00836797896772623, "learning_rate": 1.362945264986968e-06, "loss": 0.0001, "step": 17920 }, { "epoch": 3.8944396177237186, "grad_norm": 0.0007197211962193251, "learning_rate": 1.3357949609035621e-06, "loss": 0.0001, "step": 17930 }, { "epoch": 3.896611642050391, "grad_norm": 0.008673655800521374, "learning_rate": 1.3086446568201564e-06, "loss": 0.0005, "step": 17940 }, { "epoch": 3.8987836663770636, "grad_norm": 0.0007204540306702256, "learning_rate": 1.2814943527367507e-06, "loss": 0.0003, "step": 17950 }, { "epoch": 3.9009556907037357, "grad_norm": 0.0007173253106884658, "learning_rate": 1.2543440486533448e-06, "loss": 0.0001, "step": 17960 }, { "epoch": 3.9031277150304082, "grad_norm": 0.0007573234033770859, "learning_rate": 1.2271937445699393e-06, "loss": 0.0009, "step": 17970 }, { "epoch": 3.9052997393570807, "grad_norm": 0.0007323205936700106, "learning_rate": 1.2000434404865336e-06, "loss": 0.0055, "step": 17980 }, { "epoch": 3.9074717636837533, "grad_norm": 0.0007362981559708714, "learning_rate": 1.1728931364031277e-06, "loss": 0.0092, "step": 17990 }, { "epoch": 3.909643788010426, "grad_norm": 0.00073151447577402, "learning_rate": 1.145742832319722e-06, "loss": 0.0001, "step": 18000 }, { "epoch": 3.9118158123370983, "grad_norm": 0.0007298584096133709, "learning_rate": 1.1185925282363162e-06, "loss": 0.0002, "step": 18010 }, { "epoch": 3.913987836663771, "grad_norm": 0.0007194079225882888, "learning_rate": 1.0914422241529105e-06, "loss": 0.0037, "step": 18020 }, { "epoch": 3.916159860990443, "grad_norm": 0.0007370563107542694, "learning_rate": 1.0642919200695048e-06, "loss": 0.0049, "step": 18030 }, { "epoch": 3.9183318853171154, "grad_norm": 0.0009322396363131702, "learning_rate": 1.037141615986099e-06, "loss": 0.0038, "step": 18040 }, { "epoch": 3.920503909643788, "grad_norm": 0.000749268860090524, "learning_rate": 1.0099913119026934e-06, "loss": 0.0001, "step": 18050 }, { "epoch": 3.9226759339704604, "grad_norm": 0.0007187744486145675, "learning_rate": 9.828410078192875e-07, "loss": 0.0002, "step": 18060 }, { "epoch": 3.924847958297133, "grad_norm": 0.0009798071114346385, "learning_rate": 9.55690703735882e-07, "loss": 0.0001, "step": 18070 }, { "epoch": 3.9270199826238055, "grad_norm": 0.000750518636777997, "learning_rate": 9.285403996524761e-07, "loss": 0.0001, "step": 18080 }, { "epoch": 3.929192006950478, "grad_norm": 0.0007136166095733643, "learning_rate": 9.013900955690704e-07, "loss": 0.0001, "step": 18090 }, { "epoch": 3.93136403127715, "grad_norm": 0.0007220849511213601, "learning_rate": 8.742397914856646e-07, "loss": 0.0001, "step": 18100 }, { "epoch": 3.933536055603823, "grad_norm": 0.007109349127858877, "learning_rate": 8.470894874022589e-07, "loss": 0.0004, "step": 18110 }, { "epoch": 3.935708079930495, "grad_norm": 0.0007139624794945121, "learning_rate": 8.199391833188533e-07, "loss": 0.0002, "step": 18120 }, { "epoch": 3.9378801042571676, "grad_norm": 0.0007277438417077065, "learning_rate": 7.927888792354475e-07, "loss": 0.0063, "step": 18130 }, { "epoch": 3.94005212858384, "grad_norm": 0.0007169364835135639, "learning_rate": 7.656385751520418e-07, "loss": 0.0001, "step": 18140 }, { "epoch": 3.9422241529105126, "grad_norm": 0.000733832479454577, "learning_rate": 7.384882710686359e-07, "loss": 0.0001, "step": 18150 }, { "epoch": 3.944396177237185, "grad_norm": 0.0007185288704931736, "learning_rate": 7.113379669852302e-07, "loss": 0.0002, "step": 18160 }, { "epoch": 3.9465682015638577, "grad_norm": 0.000867704045958817, "learning_rate": 6.841876629018245e-07, "loss": 0.0003, "step": 18170 }, { "epoch": 3.94874022589053, "grad_norm": 0.0007935749599710107, "learning_rate": 6.570373588184188e-07, "loss": 0.0002, "step": 18180 }, { "epoch": 3.9509122502172023, "grad_norm": 0.0007953582680784166, "learning_rate": 6.298870547350131e-07, "loss": 0.0001, "step": 18190 }, { "epoch": 3.953084274543875, "grad_norm": 0.000717403250746429, "learning_rate": 6.027367506516073e-07, "loss": 0.0001, "step": 18200 }, { "epoch": 3.9552562988705473, "grad_norm": 0.0007746173650957644, "learning_rate": 5.755864465682016e-07, "loss": 0.0003, "step": 18210 }, { "epoch": 3.95742832319722, "grad_norm": 0.0007847067317925394, "learning_rate": 5.484361424847959e-07, "loss": 0.0001, "step": 18220 }, { "epoch": 3.9596003475238923, "grad_norm": 0.005621789488941431, "learning_rate": 5.212858384013901e-07, "loss": 0.0001, "step": 18230 }, { "epoch": 3.961772371850565, "grad_norm": 0.0007133004837669432, "learning_rate": 4.941355343179844e-07, "loss": 0.0004, "step": 18240 }, { "epoch": 3.9639443961772374, "grad_norm": 0.00072172109503299, "learning_rate": 4.669852302345786e-07, "loss": 0.0002, "step": 18250 }, { "epoch": 3.9661164205039094, "grad_norm": 0.0070555261336266994, "learning_rate": 4.3983492615117295e-07, "loss": 0.0002, "step": 18260 }, { "epoch": 3.968288444830582, "grad_norm": 0.0007262382423505187, "learning_rate": 4.126846220677672e-07, "loss": 0.0002, "step": 18270 }, { "epoch": 3.9704604691572545, "grad_norm": 0.0007373811677098274, "learning_rate": 3.855343179843614e-07, "loss": 0.0008, "step": 18280 }, { "epoch": 3.972632493483927, "grad_norm": 0.0007229727343656123, "learning_rate": 3.583840139009557e-07, "loss": 0.0001, "step": 18290 }, { "epoch": 3.9748045178105995, "grad_norm": 0.0007267961045727134, "learning_rate": 3.3123370981755e-07, "loss": 0.0001, "step": 18300 }, { "epoch": 3.976976542137272, "grad_norm": 0.17784090340137482, "learning_rate": 3.0408340573414423e-07, "loss": 0.0035, "step": 18310 }, { "epoch": 3.9791485664639445, "grad_norm": 0.0007294813403859735, "learning_rate": 2.7693310165073847e-07, "loss": 0.0002, "step": 18320 }, { "epoch": 3.9813205907906166, "grad_norm": 0.0007423889474011958, "learning_rate": 2.4978279756733275e-07, "loss": 0.0001, "step": 18330 }, { "epoch": 3.9834926151172896, "grad_norm": 0.0007235651719383895, "learning_rate": 2.2263249348392704e-07, "loss": 0.0001, "step": 18340 }, { "epoch": 3.9856646394439617, "grad_norm": 0.0007269734633155167, "learning_rate": 1.954821894005213e-07, "loss": 0.0001, "step": 18350 }, { "epoch": 3.987836663770634, "grad_norm": 0.3467176854610443, "learning_rate": 1.6833188531711556e-07, "loss": 0.0022, "step": 18360 }, { "epoch": 3.9900086880973067, "grad_norm": 0.0007139446679502726, "learning_rate": 1.4118158123370983e-07, "loss": 0.0001, "step": 18370 }, { "epoch": 3.992180712423979, "grad_norm": 0.000727241684217006, "learning_rate": 1.1403127715030409e-07, "loss": 0.0002, "step": 18380 }, { "epoch": 3.9943527367506517, "grad_norm": 0.000711127242539078, "learning_rate": 8.688097306689835e-08, "loss": 0.0001, "step": 18390 }, { "epoch": 3.996524761077324, "grad_norm": 0.0008922716369852424, "learning_rate": 5.973066898349262e-08, "loss": 0.0001, "step": 18400 }, { "epoch": 3.9986967854039968, "grad_norm": 0.0007189746247604489, "learning_rate": 3.2580364900086884e-08, "loss": 0.0002, "step": 18410 }, { "epoch": 4.0, "eval_f1": 0.6523297491039427, "eval_loss": 0.06827918440103531, "eval_runtime": 83.7948, "eval_samples_per_second": 119.041, "eval_steps_per_second": 7.447, "step": 18416 }, { "epoch": 4.000868809730669, "grad_norm": 0.0007167569710873067, "learning_rate": 2.5002715030408342e-05, "loss": 0.0004, "step": 18420 }, { "epoch": 4.003040834057342, "grad_norm": 0.0007184027344919741, "learning_rate": 2.498913987836664e-05, "loss": 0.0044, "step": 18430 }, { "epoch": 4.005212858384014, "grad_norm": 0.0007228711619973183, "learning_rate": 2.4975564726324935e-05, "loss": 0.0001, "step": 18440 }, { "epoch": 4.007384882710686, "grad_norm": 0.0007069294806569815, "learning_rate": 2.4961989574283235e-05, "loss": 0.004, "step": 18450 }, { "epoch": 4.009556907037359, "grad_norm": 0.006892836652696133, "learning_rate": 2.494841442224153e-05, "loss": 0.0002, "step": 18460 }, { "epoch": 4.011728931364031, "grad_norm": 0.0007597632356919348, "learning_rate": 2.4934839270199828e-05, "loss": 0.0415, "step": 18470 }, { "epoch": 4.013900955690704, "grad_norm": 0.0008095133816823363, "learning_rate": 2.4921264118158124e-05, "loss": 0.0001, "step": 18480 }, { "epoch": 4.016072980017376, "grad_norm": 0.0007956316112540662, "learning_rate": 2.490768896611642e-05, "loss": 0.0002, "step": 18490 }, { "epoch": 4.018245004344049, "grad_norm": 0.000739986018743366, "learning_rate": 2.4894113814074717e-05, "loss": 0.0001, "step": 18500 }, { "epoch": 4.020417028670721, "grad_norm": 0.000718973926268518, "learning_rate": 2.4880538662033014e-05, "loss": 0.0003, "step": 18510 }, { "epoch": 4.022589052997394, "grad_norm": 0.00071329454658553, "learning_rate": 2.486696350999131e-05, "loss": 0.0001, "step": 18520 }, { "epoch": 4.024761077324066, "grad_norm": 0.0007385624339804053, "learning_rate": 2.485338835794961e-05, "loss": 0.0001, "step": 18530 }, { "epoch": 4.026933101650738, "grad_norm": 0.004547603894025087, "learning_rate": 2.4839813205907907e-05, "loss": 0.0001, "step": 18540 }, { "epoch": 4.029105125977411, "grad_norm": 0.0011677941074594855, "learning_rate": 2.4826238053866203e-05, "loss": 0.0001, "step": 18550 }, { "epoch": 4.031277150304083, "grad_norm": 0.004432608839124441, "learning_rate": 2.4812662901824503e-05, "loss": 0.0025, "step": 18560 }, { "epoch": 4.033449174630756, "grad_norm": 0.0007672629435546696, "learning_rate": 2.47990877497828e-05, "loss": 0.0001, "step": 18570 }, { "epoch": 4.035621198957428, "grad_norm": 0.09396348148584366, "learning_rate": 2.4785512597741096e-05, "loss": 0.0254, "step": 18580 }, { "epoch": 4.037793223284101, "grad_norm": 0.0008097750251181424, "learning_rate": 2.4771937445699393e-05, "loss": 0.0006, "step": 18590 }, { "epoch": 4.039965247610773, "grad_norm": 0.0007146692369133234, "learning_rate": 2.4758362293657693e-05, "loss": 0.0001, "step": 18600 }, { "epoch": 4.042137271937445, "grad_norm": 0.0007175743812695146, "learning_rate": 2.474478714161599e-05, "loss": 0.001, "step": 18610 }, { "epoch": 4.044309296264118, "grad_norm": 0.0007156123756431043, "learning_rate": 2.4731211989574286e-05, "loss": 0.0009, "step": 18620 }, { "epoch": 4.04648132059079, "grad_norm": 0.0006997164455242455, "learning_rate": 2.4717636837532582e-05, "loss": 0.0005, "step": 18630 }, { "epoch": 4.048653344917463, "grad_norm": 0.0007682847790420055, "learning_rate": 2.470406168549088e-05, "loss": 0.0006, "step": 18640 }, { "epoch": 4.050825369244135, "grad_norm": 0.0006895299884490669, "learning_rate": 2.4690486533449175e-05, "loss": 0.0001, "step": 18650 }, { "epoch": 4.052997393570808, "grad_norm": 0.0007155478233471513, "learning_rate": 2.467691138140747e-05, "loss": 0.0006, "step": 18660 }, { "epoch": 4.05516941789748, "grad_norm": 0.012197580188512802, "learning_rate": 2.4663336229365768e-05, "loss": 0.0004, "step": 18670 }, { "epoch": 4.0573414422241525, "grad_norm": 0.011634773574769497, "learning_rate": 2.4649761077324068e-05, "loss": 0.0088, "step": 18680 }, { "epoch": 4.0595134665508255, "grad_norm": 0.0007289189961738884, "learning_rate": 2.4636185925282365e-05, "loss": 0.0005, "step": 18690 }, { "epoch": 4.0616854908774975, "grad_norm": 0.0007233425858430564, "learning_rate": 2.462261077324066e-05, "loss": 0.0001, "step": 18700 }, { "epoch": 4.0638575152041705, "grad_norm": 0.001152144162915647, "learning_rate": 2.4609035621198958e-05, "loss": 0.0115, "step": 18710 }, { "epoch": 4.066029539530843, "grad_norm": 0.0007558057550340891, "learning_rate": 2.4595460469157254e-05, "loss": 0.0006, "step": 18720 }, { "epoch": 4.0682015638575155, "grad_norm": 0.0007093999884091318, "learning_rate": 2.458188531711555e-05, "loss": 0.0002, "step": 18730 }, { "epoch": 4.070373588184188, "grad_norm": 0.0007253269432112575, "learning_rate": 2.456831016507385e-05, "loss": 0.0005, "step": 18740 }, { "epoch": 4.072545612510861, "grad_norm": 0.0006909583462402225, "learning_rate": 2.4554735013032147e-05, "loss": 0.0001, "step": 18750 }, { "epoch": 4.074717636837533, "grad_norm": 0.0007387499208562076, "learning_rate": 2.4541159860990447e-05, "loss": 0.0002, "step": 18760 }, { "epoch": 4.076889661164205, "grad_norm": 0.0006695879274047911, "learning_rate": 2.4527584708948743e-05, "loss": 0.0001, "step": 18770 }, { "epoch": 4.079061685490878, "grad_norm": 0.0006833495572209358, "learning_rate": 2.451400955690704e-05, "loss": 0.0002, "step": 18780 }, { "epoch": 4.08123370981755, "grad_norm": 0.0006686097476631403, "learning_rate": 2.4500434404865336e-05, "loss": 0.0052, "step": 18790 }, { "epoch": 4.083405734144223, "grad_norm": 0.03649875894188881, "learning_rate": 2.4486859252823633e-05, "loss": 0.0003, "step": 18800 }, { "epoch": 4.085577758470895, "grad_norm": 0.0007832238334231079, "learning_rate": 2.447328410078193e-05, "loss": 0.0041, "step": 18810 }, { "epoch": 4.087749782797568, "grad_norm": 15.030851364135742, "learning_rate": 2.4459708948740226e-05, "loss": 0.0476, "step": 18820 }, { "epoch": 4.08992180712424, "grad_norm": 0.007664125878363848, "learning_rate": 2.4446133796698526e-05, "loss": 0.0004, "step": 18830 }, { "epoch": 4.092093831450912, "grad_norm": 0.0014251531101763248, "learning_rate": 2.4432558644656822e-05, "loss": 0.0103, "step": 18840 }, { "epoch": 4.094265855777585, "grad_norm": 0.0033034952357411385, "learning_rate": 2.441898349261512e-05, "loss": 0.005, "step": 18850 }, { "epoch": 4.096437880104257, "grad_norm": 0.0006803958094678819, "learning_rate": 2.4405408340573415e-05, "loss": 0.0001, "step": 18860 }, { "epoch": 4.09860990443093, "grad_norm": 0.0006750720203854144, "learning_rate": 2.4391833188531712e-05, "loss": 0.0002, "step": 18870 }, { "epoch": 4.100781928757602, "grad_norm": 0.0007022693753242493, "learning_rate": 2.437825803649001e-05, "loss": 0.0677, "step": 18880 }, { "epoch": 4.102953953084275, "grad_norm": 0.000759601651225239, "learning_rate": 2.4364682884448305e-05, "loss": 0.0002, "step": 18890 }, { "epoch": 4.105125977410947, "grad_norm": 0.0007145005511119962, "learning_rate": 2.43511077324066e-05, "loss": 0.0003, "step": 18900 }, { "epoch": 4.107298001737619, "grad_norm": 0.008006428368389606, "learning_rate": 2.43375325803649e-05, "loss": 0.0315, "step": 18910 }, { "epoch": 4.109470026064292, "grad_norm": 0.0008494790527038276, "learning_rate": 2.4323957428323198e-05, "loss": 0.0026, "step": 18920 }, { "epoch": 4.111642050390964, "grad_norm": 0.0010820915922522545, "learning_rate": 2.4310382276281494e-05, "loss": 0.0001, "step": 18930 }, { "epoch": 4.113814074717637, "grad_norm": 0.0007636768277734518, "learning_rate": 2.4296807124239794e-05, "loss": 0.0004, "step": 18940 }, { "epoch": 4.115986099044309, "grad_norm": 0.0008369954884983599, "learning_rate": 2.428323197219809e-05, "loss": 0.0001, "step": 18950 }, { "epoch": 4.118158123370982, "grad_norm": 0.0007214748184196651, "learning_rate": 2.4269656820156387e-05, "loss": 0.0002, "step": 18960 }, { "epoch": 4.120330147697654, "grad_norm": 0.0007030196720734239, "learning_rate": 2.4256081668114684e-05, "loss": 0.0002, "step": 18970 }, { "epoch": 4.122502172024326, "grad_norm": 0.0007046711980365217, "learning_rate": 2.4242506516072984e-05, "loss": 0.0001, "step": 18980 }, { "epoch": 4.124674196350999, "grad_norm": 0.00932464748620987, "learning_rate": 2.422893136403128e-05, "loss": 0.0527, "step": 18990 }, { "epoch": 4.126846220677671, "grad_norm": 0.04322976619005203, "learning_rate": 2.4215356211989577e-05, "loss": 0.0109, "step": 19000 }, { "epoch": 4.129018245004344, "grad_norm": 0.0007810614770278335, "learning_rate": 2.4201781059947873e-05, "loss": 0.0013, "step": 19010 }, { "epoch": 4.131190269331016, "grad_norm": 0.0007010008557699621, "learning_rate": 2.418820590790617e-05, "loss": 0.0122, "step": 19020 }, { "epoch": 4.133362293657689, "grad_norm": 0.0007034821319393814, "learning_rate": 2.4174630755864466e-05, "loss": 0.0001, "step": 19030 }, { "epoch": 4.135534317984361, "grad_norm": 0.0008014214690774679, "learning_rate": 2.4161055603822763e-05, "loss": 0.1098, "step": 19040 }, { "epoch": 4.137706342311034, "grad_norm": 12.395751953125, "learning_rate": 2.414748045178106e-05, "loss": 0.0169, "step": 19050 }, { "epoch": 4.139878366637706, "grad_norm": 0.0018553922418504953, "learning_rate": 2.413390529973936e-05, "loss": 0.01, "step": 19060 }, { "epoch": 4.142050390964378, "grad_norm": 0.0010227859020233154, "learning_rate": 2.4120330147697656e-05, "loss": 0.0001, "step": 19070 }, { "epoch": 4.144222415291051, "grad_norm": 0.0009623629739508033, "learning_rate": 2.4106754995655952e-05, "loss": 0.0016, "step": 19080 }, { "epoch": 4.1463944396177235, "grad_norm": 0.0010376714635640383, "learning_rate": 2.409317984361425e-05, "loss": 0.0214, "step": 19090 }, { "epoch": 4.148566463944396, "grad_norm": 0.0009919269941747189, "learning_rate": 2.4079604691572545e-05, "loss": 0.0001, "step": 19100 }, { "epoch": 4.1507384882710685, "grad_norm": 0.0010005880612879992, "learning_rate": 2.406602953953084e-05, "loss": 0.0001, "step": 19110 }, { "epoch": 4.1529105125977415, "grad_norm": 2.53873872756958, "learning_rate": 2.405245438748914e-05, "loss": 0.1289, "step": 19120 }, { "epoch": 4.1550825369244135, "grad_norm": 0.04172717407345772, "learning_rate": 2.4038879235447438e-05, "loss": 0.0016, "step": 19130 }, { "epoch": 4.157254561251086, "grad_norm": 0.019300812855362892, "learning_rate": 2.4025304083405738e-05, "loss": 0.0013, "step": 19140 }, { "epoch": 4.159426585577759, "grad_norm": 0.036429353058338165, "learning_rate": 2.4011728931364034e-05, "loss": 0.0046, "step": 19150 }, { "epoch": 4.161598609904431, "grad_norm": 20.21198081970215, "learning_rate": 2.399815377932233e-05, "loss": 0.0074, "step": 19160 }, { "epoch": 4.163770634231104, "grad_norm": 0.009878740645945072, "learning_rate": 2.3984578627280627e-05, "loss": 0.0065, "step": 19170 }, { "epoch": 4.165942658557776, "grad_norm": 0.005423377268016338, "learning_rate": 2.3971003475238924e-05, "loss": 0.0002, "step": 19180 }, { "epoch": 4.168114682884449, "grad_norm": 0.00920186284929514, "learning_rate": 2.395742832319722e-05, "loss": 0.0012, "step": 19190 }, { "epoch": 4.170286707211121, "grad_norm": 0.05071718618273735, "learning_rate": 2.3943853171155517e-05, "loss": 0.0005, "step": 19200 }, { "epoch": 4.172458731537793, "grad_norm": 0.0018713108729571104, "learning_rate": 2.3930278019113817e-05, "loss": 0.0003, "step": 19210 }, { "epoch": 4.174630755864466, "grad_norm": 0.0014962096465751529, "learning_rate": 2.3916702867072113e-05, "loss": 0.0003, "step": 19220 }, { "epoch": 4.176802780191138, "grad_norm": 0.0009879091521725059, "learning_rate": 2.390312771503041e-05, "loss": 0.0002, "step": 19230 }, { "epoch": 4.178974804517811, "grad_norm": 0.0009537252481095493, "learning_rate": 2.3889552562988706e-05, "loss": 0.0001, "step": 19240 }, { "epoch": 4.181146828844483, "grad_norm": 0.000836131046526134, "learning_rate": 2.3875977410947003e-05, "loss": 0.0002, "step": 19250 }, { "epoch": 4.183318853171156, "grad_norm": 0.0021473176311701536, "learning_rate": 2.38624022589053e-05, "loss": 0.0085, "step": 19260 }, { "epoch": 4.185490877497828, "grad_norm": 0.02601959928870201, "learning_rate": 2.3848827106863596e-05, "loss": 0.0011, "step": 19270 }, { "epoch": 4.187662901824501, "grad_norm": 0.0008759573684073985, "learning_rate": 2.3835251954821892e-05, "loss": 0.0001, "step": 19280 }, { "epoch": 4.189834926151173, "grad_norm": 0.0007665369194000959, "learning_rate": 2.3821676802780192e-05, "loss": 0.0001, "step": 19290 }, { "epoch": 4.192006950477845, "grad_norm": 0.000761428673285991, "learning_rate": 2.380810165073849e-05, "loss": 0.0001, "step": 19300 }, { "epoch": 4.194178974804518, "grad_norm": 1.9355708360671997, "learning_rate": 2.3794526498696785e-05, "loss": 0.0131, "step": 19310 }, { "epoch": 4.19635099913119, "grad_norm": 0.0007248061010614038, "learning_rate": 2.3780951346655085e-05, "loss": 0.0001, "step": 19320 }, { "epoch": 4.198523023457863, "grad_norm": 0.0007215312216430902, "learning_rate": 2.3767376194613382e-05, "loss": 0.0508, "step": 19330 }, { "epoch": 4.200695047784535, "grad_norm": 0.002852953039109707, "learning_rate": 2.3753801042571678e-05, "loss": 0.0355, "step": 19340 }, { "epoch": 4.202867072111208, "grad_norm": 0.004599974490702152, "learning_rate": 2.3740225890529975e-05, "loss": 0.0486, "step": 19350 }, { "epoch": 4.20503909643788, "grad_norm": 0.002695480128750205, "learning_rate": 2.3726650738488275e-05, "loss": 0.0221, "step": 19360 }, { "epoch": 4.207211120764552, "grad_norm": 0.0010665751760825515, "learning_rate": 2.371307558644657e-05, "loss": 0.0002, "step": 19370 }, { "epoch": 4.209383145091225, "grad_norm": 0.0010960167273879051, "learning_rate": 2.3699500434404868e-05, "loss": 0.0185, "step": 19380 }, { "epoch": 4.211555169417897, "grad_norm": 0.4971291422843933, "learning_rate": 2.3685925282363164e-05, "loss": 0.0009, "step": 19390 }, { "epoch": 4.21372719374457, "grad_norm": 0.010001223534345627, "learning_rate": 2.367235013032146e-05, "loss": 0.0021, "step": 19400 }, { "epoch": 4.215899218071242, "grad_norm": 0.006596973165869713, "learning_rate": 2.3658774978279757e-05, "loss": 0.0093, "step": 19410 }, { "epoch": 4.218071242397915, "grad_norm": 0.44958433508872986, "learning_rate": 2.3645199826238054e-05, "loss": 0.0005, "step": 19420 }, { "epoch": 4.220243266724587, "grad_norm": 0.0007452222635038197, "learning_rate": 2.363162467419635e-05, "loss": 0.0008, "step": 19430 }, { "epoch": 4.222415291051259, "grad_norm": 0.0007038828334771097, "learning_rate": 2.361804952215465e-05, "loss": 0.0002, "step": 19440 }, { "epoch": 4.224587315377932, "grad_norm": 0.0006963639170862734, "learning_rate": 2.3604474370112947e-05, "loss": 0.007, "step": 19450 }, { "epoch": 4.226759339704604, "grad_norm": 0.0007029336411505938, "learning_rate": 2.3590899218071243e-05, "loss": 0.0001, "step": 19460 }, { "epoch": 4.228931364031277, "grad_norm": 0.0007282626465894282, "learning_rate": 2.357732406602954e-05, "loss": 0.0309, "step": 19470 }, { "epoch": 4.231103388357949, "grad_norm": 0.0007143176626414061, "learning_rate": 2.3563748913987836e-05, "loss": 0.0005, "step": 19480 }, { "epoch": 4.233275412684622, "grad_norm": 0.5573042631149292, "learning_rate": 2.3550173761946133e-05, "loss": 0.0034, "step": 19490 }, { "epoch": 4.2354474370112944, "grad_norm": 0.0006998754688538611, "learning_rate": 2.3536598609904433e-05, "loss": 0.0008, "step": 19500 }, { "epoch": 4.237619461337967, "grad_norm": 0.01692361570894718, "learning_rate": 2.352302345786273e-05, "loss": 0.0296, "step": 19510 }, { "epoch": 4.2397914856646395, "grad_norm": 0.000765336852055043, "learning_rate": 2.350944830582103e-05, "loss": 0.0005, "step": 19520 }, { "epoch": 4.2419635099913116, "grad_norm": 0.25539854168891907, "learning_rate": 2.3495873153779326e-05, "loss": 0.0013, "step": 19530 }, { "epoch": 4.2441355343179845, "grad_norm": 0.0006899808067828417, "learning_rate": 2.3482298001737622e-05, "loss": 0.001, "step": 19540 }, { "epoch": 4.246307558644657, "grad_norm": 0.0007053895969875157, "learning_rate": 2.346872284969592e-05, "loss": 0.0004, "step": 19550 }, { "epoch": 4.2484795829713295, "grad_norm": 0.0007296680123545229, "learning_rate": 2.3455147697654215e-05, "loss": 0.0011, "step": 19560 }, { "epoch": 4.250651607298002, "grad_norm": 0.02057144045829773, "learning_rate": 2.344157254561251e-05, "loss": 0.0339, "step": 19570 }, { "epoch": 4.252823631624675, "grad_norm": 0.0007652404601685703, "learning_rate": 2.3427997393570808e-05, "loss": 0.0001, "step": 19580 }, { "epoch": 4.254995655951347, "grad_norm": 0.0007344402838498354, "learning_rate": 2.3414422241529108e-05, "loss": 0.0005, "step": 19590 }, { "epoch": 4.257167680278019, "grad_norm": 0.0010844263015314937, "learning_rate": 2.3400847089487404e-05, "loss": 0.0006, "step": 19600 }, { "epoch": 4.259339704604692, "grad_norm": 0.0356300063431263, "learning_rate": 2.33872719374457e-05, "loss": 0.0004, "step": 19610 }, { "epoch": 4.261511728931364, "grad_norm": 0.0006806873134337366, "learning_rate": 2.3373696785403997e-05, "loss": 0.0065, "step": 19620 }, { "epoch": 4.263683753258037, "grad_norm": 0.0006880282890051603, "learning_rate": 2.3360121633362294e-05, "loss": 0.0175, "step": 19630 }, { "epoch": 4.265855777584709, "grad_norm": 0.0006852375227026641, "learning_rate": 2.334654648132059e-05, "loss": 0.0234, "step": 19640 }, { "epoch": 4.268027801911382, "grad_norm": 0.04227971285581589, "learning_rate": 2.3332971329278887e-05, "loss": 0.0322, "step": 19650 }, { "epoch": 4.270199826238054, "grad_norm": 0.03739573061466217, "learning_rate": 2.3319396177237184e-05, "loss": 0.012, "step": 19660 }, { "epoch": 4.272371850564726, "grad_norm": 0.032801300287246704, "learning_rate": 2.3305821025195483e-05, "loss": 0.0095, "step": 19670 }, { "epoch": 4.274543874891399, "grad_norm": 0.0007418083841912448, "learning_rate": 2.329224587315378e-05, "loss": 0.0013, "step": 19680 }, { "epoch": 4.276715899218071, "grad_norm": 0.0007542024250142276, "learning_rate": 2.3278670721112076e-05, "loss": 0.0001, "step": 19690 }, { "epoch": 4.278887923544744, "grad_norm": 0.0008709717076271772, "learning_rate": 2.3265095569070376e-05, "loss": 0.0001, "step": 19700 }, { "epoch": 4.281059947871416, "grad_norm": 0.0007557451608590782, "learning_rate": 2.3251520417028673e-05, "loss": 0.0001, "step": 19710 }, { "epoch": 4.283231972198089, "grad_norm": 0.24801254272460938, "learning_rate": 2.323794526498697e-05, "loss": 0.0041, "step": 19720 }, { "epoch": 4.285403996524761, "grad_norm": 0.0008331090793944895, "learning_rate": 2.3224370112945266e-05, "loss": 0.0037, "step": 19730 }, { "epoch": 4.287576020851434, "grad_norm": 0.0016378792934119701, "learning_rate": 2.3210794960903562e-05, "loss": 0.0001, "step": 19740 }, { "epoch": 4.289748045178106, "grad_norm": 0.0007555813062936068, "learning_rate": 2.3197219808861862e-05, "loss": 0.0022, "step": 19750 }, { "epoch": 4.291920069504778, "grad_norm": 0.2303730845451355, "learning_rate": 2.318364465682016e-05, "loss": 0.007, "step": 19760 }, { "epoch": 4.294092093831451, "grad_norm": 0.0008710839902050793, "learning_rate": 2.3170069504778455e-05, "loss": 0.0262, "step": 19770 }, { "epoch": 4.296264118158123, "grad_norm": 0.0009850760689005256, "learning_rate": 2.3156494352736752e-05, "loss": 0.0016, "step": 19780 }, { "epoch": 4.298436142484796, "grad_norm": 0.0006828588084317744, "learning_rate": 2.3142919200695048e-05, "loss": 0.0018, "step": 19790 }, { "epoch": 4.300608166811468, "grad_norm": 0.0008635299745947123, "learning_rate": 2.3129344048653345e-05, "loss": 0.0595, "step": 19800 }, { "epoch": 4.302780191138141, "grad_norm": 0.0011844933032989502, "learning_rate": 2.311576889661164e-05, "loss": 0.0533, "step": 19810 }, { "epoch": 4.304952215464813, "grad_norm": 0.0021339692175388336, "learning_rate": 2.310219374456994e-05, "loss": 0.0003, "step": 19820 }, { "epoch": 4.307124239791485, "grad_norm": 0.0015525285853073, "learning_rate": 2.3088618592528238e-05, "loss": 0.0016, "step": 19830 }, { "epoch": 4.309296264118158, "grad_norm": 0.0011952450731769204, "learning_rate": 2.3075043440486534e-05, "loss": 0.0095, "step": 19840 }, { "epoch": 4.31146828844483, "grad_norm": 0.0019283192232251167, "learning_rate": 2.306146828844483e-05, "loss": 0.0003, "step": 19850 }, { "epoch": 4.313640312771503, "grad_norm": 0.0012698841746896505, "learning_rate": 2.3047893136403127e-05, "loss": 0.0056, "step": 19860 }, { "epoch": 4.315812337098175, "grad_norm": 0.0013187688309699297, "learning_rate": 2.3034317984361424e-05, "loss": 0.0003, "step": 19870 }, { "epoch": 4.317984361424848, "grad_norm": 0.0029401553329080343, "learning_rate": 2.3020742832319724e-05, "loss": 0.0002, "step": 19880 }, { "epoch": 4.32015638575152, "grad_norm": 0.010737915523350239, "learning_rate": 2.300716768027802e-05, "loss": 0.008, "step": 19890 }, { "epoch": 4.3223284100781925, "grad_norm": 0.00099332130048424, "learning_rate": 2.299359252823632e-05, "loss": 0.0003, "step": 19900 }, { "epoch": 4.324500434404865, "grad_norm": 0.0011381141375750303, "learning_rate": 2.2980017376194617e-05, "loss": 0.0103, "step": 19910 }, { "epoch": 4.3266724587315375, "grad_norm": 0.001223794766701758, "learning_rate": 2.2966442224152913e-05, "loss": 0.0007, "step": 19920 }, { "epoch": 4.3288444830582105, "grad_norm": 0.006975673139095306, "learning_rate": 2.295286707211121e-05, "loss": 0.0014, "step": 19930 }, { "epoch": 4.3310165073848825, "grad_norm": 0.02788945473730564, "learning_rate": 2.2939291920069506e-05, "loss": 0.0107, "step": 19940 }, { "epoch": 4.3331885317115555, "grad_norm": 0.0013920166529715061, "learning_rate": 2.2925716768027803e-05, "loss": 0.0002, "step": 19950 }, { "epoch": 4.335360556038228, "grad_norm": 0.0010082477238029242, "learning_rate": 2.29121416159861e-05, "loss": 0.0156, "step": 19960 }, { "epoch": 4.3375325803649005, "grad_norm": 0.6021537184715271, "learning_rate": 2.28985664639444e-05, "loss": 0.0117, "step": 19970 }, { "epoch": 4.339704604691573, "grad_norm": 0.013891434296965599, "learning_rate": 2.2884991311902695e-05, "loss": 0.0014, "step": 19980 }, { "epoch": 4.341876629018245, "grad_norm": 0.0009469124488532543, "learning_rate": 2.2871416159860992e-05, "loss": 0.0038, "step": 19990 }, { "epoch": 4.344048653344918, "grad_norm": 0.0008154679671861231, "learning_rate": 2.285784100781929e-05, "loss": 0.0003, "step": 20000 }, { "epoch": 4.34622067767159, "grad_norm": 0.0008383361855521798, "learning_rate": 2.2844265855777585e-05, "loss": 0.0012, "step": 20010 }, { "epoch": 4.348392701998263, "grad_norm": 0.000763860356528312, "learning_rate": 2.283069070373588e-05, "loss": 0.0001, "step": 20020 }, { "epoch": 4.350564726324935, "grad_norm": 0.0007653414504602551, "learning_rate": 2.2817115551694178e-05, "loss": 0.0015, "step": 20030 }, { "epoch": 4.352736750651608, "grad_norm": 0.000781756651122123, "learning_rate": 2.2803540399652475e-05, "loss": 0.0001, "step": 20040 }, { "epoch": 4.35490877497828, "grad_norm": 0.0007571419118903577, "learning_rate": 2.2789965247610774e-05, "loss": 0.0009, "step": 20050 }, { "epoch": 4.357080799304952, "grad_norm": 0.04005056619644165, "learning_rate": 2.277639009556907e-05, "loss": 0.0002, "step": 20060 }, { "epoch": 4.359252823631625, "grad_norm": 0.0007382580661214888, "learning_rate": 2.2762814943527367e-05, "loss": 0.0001, "step": 20070 }, { "epoch": 4.361424847958297, "grad_norm": 0.0007434505387209356, "learning_rate": 2.2749239791485667e-05, "loss": 0.0002, "step": 20080 }, { "epoch": 4.36359687228497, "grad_norm": 0.0007536330376751721, "learning_rate": 2.2735664639443964e-05, "loss": 0.0001, "step": 20090 }, { "epoch": 4.365768896611642, "grad_norm": 0.0007379274466075003, "learning_rate": 2.272208948740226e-05, "loss": 0.0001, "step": 20100 }, { "epoch": 4.367940920938315, "grad_norm": 0.0007571052410639822, "learning_rate": 2.2708514335360557e-05, "loss": 0.056, "step": 20110 }, { "epoch": 4.370112945264987, "grad_norm": 0.0013724949676543474, "learning_rate": 2.2694939183318853e-05, "loss": 0.0003, "step": 20120 }, { "epoch": 4.372284969591659, "grad_norm": 0.0017404978862032294, "learning_rate": 2.2681364031277153e-05, "loss": 0.0002, "step": 20130 }, { "epoch": 4.374456993918332, "grad_norm": 0.002438147785142064, "learning_rate": 2.266778887923545e-05, "loss": 0.0004, "step": 20140 }, { "epoch": 4.376629018245004, "grad_norm": 0.0024324068799614906, "learning_rate": 2.2654213727193746e-05, "loss": 0.0003, "step": 20150 }, { "epoch": 4.378801042571677, "grad_norm": 0.001972701633349061, "learning_rate": 2.2640638575152043e-05, "loss": 0.0004, "step": 20160 }, { "epoch": 4.380973066898349, "grad_norm": 0.002721251919865608, "learning_rate": 2.262706342311034e-05, "loss": 0.0003, "step": 20170 }, { "epoch": 4.383145091225022, "grad_norm": 0.0017292031552642584, "learning_rate": 2.2613488271068636e-05, "loss": 0.0002, "step": 20180 }, { "epoch": 4.385317115551694, "grad_norm": 0.001443073502741754, "learning_rate": 2.2599913119026932e-05, "loss": 0.0004, "step": 20190 }, { "epoch": 4.387489139878367, "grad_norm": 0.0014947176678106189, "learning_rate": 2.2586337966985232e-05, "loss": 0.0343, "step": 20200 }, { "epoch": 4.389661164205039, "grad_norm": 0.001758243073709309, "learning_rate": 2.257276281494353e-05, "loss": 0.0002, "step": 20210 }, { "epoch": 4.391833188531711, "grad_norm": 0.0025202189572155476, "learning_rate": 2.2559187662901825e-05, "loss": 0.0225, "step": 20220 }, { "epoch": 4.394005212858384, "grad_norm": 0.005367154721170664, "learning_rate": 2.2545612510860122e-05, "loss": 0.0185, "step": 20230 }, { "epoch": 4.396177237185056, "grad_norm": 0.011141028255224228, "learning_rate": 2.2532037358818418e-05, "loss": 0.0007, "step": 20240 }, { "epoch": 4.398349261511729, "grad_norm": 0.003209081245586276, "learning_rate": 2.2518462206776718e-05, "loss": 0.0005, "step": 20250 }, { "epoch": 4.400521285838401, "grad_norm": 0.0013920213095843792, "learning_rate": 2.2504887054735015e-05, "loss": 0.0003, "step": 20260 }, { "epoch": 4.402693310165074, "grad_norm": 0.004766841884702444, "learning_rate": 2.249131190269331e-05, "loss": 0.0141, "step": 20270 }, { "epoch": 4.404865334491746, "grad_norm": 0.002749124076217413, "learning_rate": 2.247773675065161e-05, "loss": 0.0192, "step": 20280 }, { "epoch": 4.407037358818418, "grad_norm": 0.07083216309547424, "learning_rate": 2.2464161598609908e-05, "loss": 0.0002, "step": 20290 }, { "epoch": 4.409209383145091, "grad_norm": 0.0012828879989683628, "learning_rate": 2.2450586446568204e-05, "loss": 0.0004, "step": 20300 }, { "epoch": 4.411381407471763, "grad_norm": 4.6498332023620605, "learning_rate": 2.24370112945265e-05, "loss": 0.0449, "step": 20310 }, { "epoch": 4.413553431798436, "grad_norm": 0.0018220586935058236, "learning_rate": 2.2423436142484797e-05, "loss": 0.0303, "step": 20320 }, { "epoch": 4.4157254561251085, "grad_norm": 0.018667029216885567, "learning_rate": 2.2409860990443094e-05, "loss": 0.0041, "step": 20330 }, { "epoch": 4.417897480451781, "grad_norm": 0.04699333757162094, "learning_rate": 2.239628583840139e-05, "loss": 0.0172, "step": 20340 }, { "epoch": 4.4200695047784535, "grad_norm": 0.006092607043683529, "learning_rate": 2.2382710686359687e-05, "loss": 0.0004, "step": 20350 }, { "epoch": 4.422241529105126, "grad_norm": 0.36696863174438477, "learning_rate": 2.2369135534317987e-05, "loss": 0.0006, "step": 20360 }, { "epoch": 4.4244135534317985, "grad_norm": 0.0011871858732774854, "learning_rate": 2.2355560382276283e-05, "loss": 0.0004, "step": 20370 }, { "epoch": 4.426585577758471, "grad_norm": 0.0013889227993786335, "learning_rate": 2.234198523023458e-05, "loss": 0.0002, "step": 20380 }, { "epoch": 4.428757602085144, "grad_norm": 0.002782667288556695, "learning_rate": 2.2328410078192876e-05, "loss": 0.0001, "step": 20390 }, { "epoch": 4.430929626411816, "grad_norm": 0.008937436155974865, "learning_rate": 2.2314834926151173e-05, "loss": 0.0003, "step": 20400 }, { "epoch": 4.433101650738489, "grad_norm": 0.00113490573130548, "learning_rate": 2.230125977410947e-05, "loss": 0.0001, "step": 20410 }, { "epoch": 4.435273675065161, "grad_norm": 0.0009732726030051708, "learning_rate": 2.2287684622067766e-05, "loss": 0.0003, "step": 20420 }, { "epoch": 4.437445699391834, "grad_norm": 0.0016077302861958742, "learning_rate": 2.2274109470026065e-05, "loss": 0.0003, "step": 20430 }, { "epoch": 4.439617723718506, "grad_norm": 0.0008823130046948791, "learning_rate": 2.2260534317984362e-05, "loss": 0.0142, "step": 20440 }, { "epoch": 4.441789748045178, "grad_norm": 0.0009418035624548793, "learning_rate": 2.224695916594266e-05, "loss": 0.0264, "step": 20450 }, { "epoch": 4.443961772371851, "grad_norm": 0.0008949940092861652, "learning_rate": 2.223338401390096e-05, "loss": 0.0005, "step": 20460 }, { "epoch": 4.446133796698523, "grad_norm": 0.0009129407699219882, "learning_rate": 2.2219808861859255e-05, "loss": 0.0001, "step": 20470 }, { "epoch": 4.448305821025196, "grad_norm": 0.0014547642786055803, "learning_rate": 2.220623370981755e-05, "loss": 0.0003, "step": 20480 }, { "epoch": 4.450477845351868, "grad_norm": 0.00568614574149251, "learning_rate": 2.2192658557775848e-05, "loss": 0.0002, "step": 20490 }, { "epoch": 4.45264986967854, "grad_norm": 0.0009105128119699657, "learning_rate": 2.2179083405734144e-05, "loss": 0.022, "step": 20500 }, { "epoch": 4.454821894005213, "grad_norm": 0.0072452593594789505, "learning_rate": 2.2165508253692444e-05, "loss": 0.0022, "step": 20510 }, { "epoch": 4.456993918331885, "grad_norm": 0.0010036842431873083, "learning_rate": 2.215193310165074e-05, "loss": 0.0001, "step": 20520 }, { "epoch": 4.459165942658558, "grad_norm": 0.0008698371821083128, "learning_rate": 2.2138357949609037e-05, "loss": 0.0007, "step": 20530 }, { "epoch": 4.46133796698523, "grad_norm": 0.0008447846048511565, "learning_rate": 2.2124782797567334e-05, "loss": 0.0004, "step": 20540 }, { "epoch": 4.463509991311903, "grad_norm": 0.0008029814343899488, "learning_rate": 2.211120764552563e-05, "loss": 0.0001, "step": 20550 }, { "epoch": 4.465682015638575, "grad_norm": 0.008035254664719105, "learning_rate": 2.2097632493483927e-05, "loss": 0.0003, "step": 20560 }, { "epoch": 4.467854039965248, "grad_norm": 0.19235126674175262, "learning_rate": 2.2084057341442223e-05, "loss": 0.0413, "step": 20570 }, { "epoch": 4.47002606429192, "grad_norm": 0.007838092744350433, "learning_rate": 2.2070482189400523e-05, "loss": 0.0219, "step": 20580 }, { "epoch": 4.472198088618592, "grad_norm": 0.0008193932590074837, "learning_rate": 2.205690703735882e-05, "loss": 0.0002, "step": 20590 }, { "epoch": 4.474370112945265, "grad_norm": 0.000779150053858757, "learning_rate": 2.2043331885317116e-05, "loss": 0.0009, "step": 20600 }, { "epoch": 4.476542137271937, "grad_norm": 0.0013178132940083742, "learning_rate": 2.2029756733275413e-05, "loss": 0.0001, "step": 20610 }, { "epoch": 4.47871416159861, "grad_norm": 0.0007607596344314516, "learning_rate": 2.201618158123371e-05, "loss": 0.0004, "step": 20620 }, { "epoch": 4.480886185925282, "grad_norm": 0.0007874126313254237, "learning_rate": 2.200260642919201e-05, "loss": 0.0001, "step": 20630 }, { "epoch": 4.483058210251955, "grad_norm": 0.0007739612483419478, "learning_rate": 2.1989031277150306e-05, "loss": 0.0004, "step": 20640 }, { "epoch": 4.485230234578627, "grad_norm": 0.0008215973502956331, "learning_rate": 2.1975456125108602e-05, "loss": 0.0002, "step": 20650 }, { "epoch": 4.4874022589053, "grad_norm": 0.0007648586761206388, "learning_rate": 2.1961880973066902e-05, "loss": 0.0002, "step": 20660 }, { "epoch": 4.489574283231972, "grad_norm": 0.0007663946016691625, "learning_rate": 2.19483058210252e-05, "loss": 0.0444, "step": 20670 }, { "epoch": 4.491746307558644, "grad_norm": 0.0007827861700206995, "learning_rate": 2.1934730668983495e-05, "loss": 0.0006, "step": 20680 }, { "epoch": 4.493918331885317, "grad_norm": 0.693448007106781, "learning_rate": 2.192115551694179e-05, "loss": 0.0024, "step": 20690 }, { "epoch": 4.496090356211989, "grad_norm": 0.0008732756250537932, "learning_rate": 2.1907580364900088e-05, "loss": 0.0465, "step": 20700 }, { "epoch": 4.498262380538662, "grad_norm": 0.0008466057479381561, "learning_rate": 2.1894005212858385e-05, "loss": 0.0004, "step": 20710 }, { "epoch": 4.500434404865334, "grad_norm": 0.012937244027853012, "learning_rate": 2.188043006081668e-05, "loss": 0.0005, "step": 20720 }, { "epoch": 4.5026064291920065, "grad_norm": 1.4481043815612793, "learning_rate": 2.1866854908774978e-05, "loss": 0.0143, "step": 20730 }, { "epoch": 4.5047784535186794, "grad_norm": 0.008231800980865955, "learning_rate": 2.1853279756733278e-05, "loss": 0.0021, "step": 20740 }, { "epoch": 4.5069504778453515, "grad_norm": 0.0010032965801656246, "learning_rate": 2.1839704604691574e-05, "loss": 0.0014, "step": 20750 }, { "epoch": 4.5091225021720245, "grad_norm": 0.0007704606978222728, "learning_rate": 2.182612945264987e-05, "loss": 0.0062, "step": 20760 }, { "epoch": 4.5112945264986966, "grad_norm": 0.000827732787001878, "learning_rate": 2.1812554300608167e-05, "loss": 0.0001, "step": 20770 }, { "epoch": 4.5134665508253695, "grad_norm": 0.0018680243520066142, "learning_rate": 2.1798979148566464e-05, "loss": 0.0294, "step": 20780 }, { "epoch": 4.515638575152042, "grad_norm": 0.0008118122932501137, "learning_rate": 2.178540399652476e-05, "loss": 0.0002, "step": 20790 }, { "epoch": 4.5178105994787146, "grad_norm": 0.0030436657834798098, "learning_rate": 2.1771828844483057e-05, "loss": 0.0002, "step": 20800 }, { "epoch": 4.519982623805387, "grad_norm": 0.0008835258195176721, "learning_rate": 2.1758253692441357e-05, "loss": 0.0123, "step": 20810 }, { "epoch": 4.522154648132059, "grad_norm": 0.005247740540653467, "learning_rate": 2.1744678540399653e-05, "loss": 0.0004, "step": 20820 }, { "epoch": 4.524326672458732, "grad_norm": 0.0017484568525105715, "learning_rate": 2.173110338835795e-05, "loss": 0.022, "step": 20830 }, { "epoch": 4.526498696785404, "grad_norm": 0.0007496718899346888, "learning_rate": 2.171752823631625e-05, "loss": 0.0002, "step": 20840 }, { "epoch": 4.528670721112077, "grad_norm": 5.449917316436768, "learning_rate": 2.1703953084274546e-05, "loss": 0.0168, "step": 20850 }, { "epoch": 4.530842745438749, "grad_norm": 0.0012228295672684908, "learning_rate": 2.1690377932232842e-05, "loss": 0.0002, "step": 20860 }, { "epoch": 4.533014769765422, "grad_norm": 5.398144721984863, "learning_rate": 2.167680278019114e-05, "loss": 0.0191, "step": 20870 }, { "epoch": 4.535186794092094, "grad_norm": 0.0008976564276963472, "learning_rate": 2.1663227628149435e-05, "loss": 0.0001, "step": 20880 }, { "epoch": 4.537358818418767, "grad_norm": 0.01168507058173418, "learning_rate": 2.1649652476107735e-05, "loss": 0.0005, "step": 20890 }, { "epoch": 4.539530842745439, "grad_norm": 0.0032003382220864296, "learning_rate": 2.1636077324066032e-05, "loss": 0.0036, "step": 20900 }, { "epoch": 4.541702867072111, "grad_norm": 4.298925399780273, "learning_rate": 2.162250217202433e-05, "loss": 0.0271, "step": 20910 }, { "epoch": 4.543874891398784, "grad_norm": 0.0012041199952363968, "learning_rate": 2.1608927019982625e-05, "loss": 0.0383, "step": 20920 }, { "epoch": 4.546046915725456, "grad_norm": 0.056777678430080414, "learning_rate": 2.159535186794092e-05, "loss": 0.0063, "step": 20930 }, { "epoch": 4.548218940052129, "grad_norm": 0.0007683674339205027, "learning_rate": 2.1581776715899218e-05, "loss": 0.0076, "step": 20940 }, { "epoch": 4.550390964378801, "grad_norm": 0.0007942827069200575, "learning_rate": 2.1568201563857514e-05, "loss": 0.0009, "step": 20950 }, { "epoch": 4.552562988705473, "grad_norm": 0.0007410432444885373, "learning_rate": 2.155462641181581e-05, "loss": 0.0001, "step": 20960 }, { "epoch": 4.554735013032146, "grad_norm": 0.0018079435685649514, "learning_rate": 2.154105125977411e-05, "loss": 0.0002, "step": 20970 }, { "epoch": 4.556907037358818, "grad_norm": 0.0007396311848424375, "learning_rate": 2.1527476107732407e-05, "loss": 0.0004, "step": 20980 }, { "epoch": 4.559079061685491, "grad_norm": 0.0008238620939664543, "learning_rate": 2.1513900955690704e-05, "loss": 0.0103, "step": 20990 }, { "epoch": 4.561251086012163, "grad_norm": 0.0007384721538983285, "learning_rate": 2.1500325803649e-05, "loss": 0.0002, "step": 21000 }, { "epoch": 4.563423110338836, "grad_norm": 0.0007408479577861726, "learning_rate": 2.14867506516073e-05, "loss": 0.0003, "step": 21010 }, { "epoch": 4.565595134665508, "grad_norm": 0.0007491989526897669, "learning_rate": 2.1474533014769767e-05, "loss": 0.0219, "step": 21020 }, { "epoch": 4.567767158992181, "grad_norm": 0.0007304720929823816, "learning_rate": 2.1460957862728064e-05, "loss": 0.0004, "step": 21030 }, { "epoch": 4.569939183318853, "grad_norm": 0.0007323980098590255, "learning_rate": 2.144738271068636e-05, "loss": 0.0002, "step": 21040 }, { "epoch": 4.572111207645525, "grad_norm": 0.0008021637913770974, "learning_rate": 2.143380755864466e-05, "loss": 0.0001, "step": 21050 }, { "epoch": 4.574283231972198, "grad_norm": 0.0009776867227628827, "learning_rate": 2.1420232406602957e-05, "loss": 0.0001, "step": 21060 }, { "epoch": 4.57645525629887, "grad_norm": 0.0009424020536243916, "learning_rate": 2.1406657254561253e-05, "loss": 0.0033, "step": 21070 }, { "epoch": 4.578627280625543, "grad_norm": 0.08862542361021042, "learning_rate": 2.139308210251955e-05, "loss": 0.0004, "step": 21080 }, { "epoch": 4.580799304952215, "grad_norm": 0.000843246525619179, "learning_rate": 2.1379506950477846e-05, "loss": 0.0029, "step": 21090 }, { "epoch": 4.582971329278888, "grad_norm": 0.0007149993907660246, "learning_rate": 2.1365931798436143e-05, "loss": 0.001, "step": 21100 }, { "epoch": 4.58514335360556, "grad_norm": 0.0006953048286959529, "learning_rate": 2.1352356646394442e-05, "loss": 0.0002, "step": 21110 }, { "epoch": 4.587315377932233, "grad_norm": 0.018970614299178123, "learning_rate": 2.133878149435274e-05, "loss": 0.0012, "step": 21120 }, { "epoch": 4.589487402258905, "grad_norm": 0.0007085061515681446, "learning_rate": 2.1325206342311035e-05, "loss": 0.0002, "step": 21130 }, { "epoch": 4.5916594265855775, "grad_norm": 0.0007259439444169402, "learning_rate": 2.1311631190269332e-05, "loss": 0.0001, "step": 21140 }, { "epoch": 4.59383145091225, "grad_norm": 0.0007674265652894974, "learning_rate": 2.129805603822763e-05, "loss": 0.0001, "step": 21150 }, { "epoch": 4.5960034752389225, "grad_norm": 0.006665141321718693, "learning_rate": 2.1284480886185925e-05, "loss": 0.0001, "step": 21160 }, { "epoch": 4.5981754995655955, "grad_norm": 0.0006893404060974717, "learning_rate": 2.127090573414422e-05, "loss": 0.0001, "step": 21170 }, { "epoch": 4.6003475238922675, "grad_norm": 0.0035702604800462723, "learning_rate": 2.1257330582102518e-05, "loss": 0.0002, "step": 21180 }, { "epoch": 4.60251954821894, "grad_norm": 0.000676013296470046, "learning_rate": 2.1243755430060818e-05, "loss": 0.0008, "step": 21190 }, { "epoch": 4.604691572545613, "grad_norm": 0.000665114785078913, "learning_rate": 2.1230180278019114e-05, "loss": 0.0009, "step": 21200 }, { "epoch": 4.606863596872285, "grad_norm": 0.0031976511236280203, "learning_rate": 2.121660512597741e-05, "loss": 0.0779, "step": 21210 }, { "epoch": 4.609035621198958, "grad_norm": 0.6927196979522705, "learning_rate": 2.1203029973935707e-05, "loss": 0.0006, "step": 21220 }, { "epoch": 4.61120764552563, "grad_norm": 0.0027231345884501934, "learning_rate": 2.1189454821894007e-05, "loss": 0.0003, "step": 21230 }, { "epoch": 4.613379669852303, "grad_norm": 0.01008631195873022, "learning_rate": 2.1175879669852304e-05, "loss": 0.0515, "step": 21240 }, { "epoch": 4.615551694178975, "grad_norm": 0.0007393760024569929, "learning_rate": 2.11623045178106e-05, "loss": 0.0001, "step": 21250 }, { "epoch": 4.617723718505648, "grad_norm": 0.000740601506549865, "learning_rate": 2.11487293657689e-05, "loss": 0.0006, "step": 21260 }, { "epoch": 4.61989574283232, "grad_norm": 0.0007298871059902012, "learning_rate": 2.1135154213727197e-05, "loss": 0.0001, "step": 21270 }, { "epoch": 4.622067767158992, "grad_norm": 0.0009730908204801381, "learning_rate": 2.1121579061685493e-05, "loss": 0.0194, "step": 21280 }, { "epoch": 4.624239791485665, "grad_norm": 0.006183923222124577, "learning_rate": 2.110800390964379e-05, "loss": 0.0006, "step": 21290 }, { "epoch": 4.626411815812337, "grad_norm": 0.0007730096112936735, "learning_rate": 2.1094428757602086e-05, "loss": 0.0003, "step": 21300 }, { "epoch": 4.62858384013901, "grad_norm": 0.0007310515502467752, "learning_rate": 2.1080853605560383e-05, "loss": 0.0001, "step": 21310 }, { "epoch": 4.630755864465682, "grad_norm": 0.0019475659355521202, "learning_rate": 2.106727845351868e-05, "loss": 0.0001, "step": 21320 }, { "epoch": 4.632927888792355, "grad_norm": 0.0008489437168464065, "learning_rate": 2.1053703301476976e-05, "loss": 0.0001, "step": 21330 }, { "epoch": 4.635099913119027, "grad_norm": 0.0007811547257006168, "learning_rate": 2.1040128149435276e-05, "loss": 0.0001, "step": 21340 }, { "epoch": 4.6372719374457, "grad_norm": 0.0007179775275290012, "learning_rate": 2.1026552997393572e-05, "loss": 0.0003, "step": 21350 }, { "epoch": 4.639443961772372, "grad_norm": 0.0007291779038496315, "learning_rate": 2.101297784535187e-05, "loss": 0.0001, "step": 21360 }, { "epoch": 4.641615986099044, "grad_norm": 6.902067184448242, "learning_rate": 2.0999402693310165e-05, "loss": 0.0221, "step": 21370 }, { "epoch": 4.643788010425717, "grad_norm": 0.000777409237343818, "learning_rate": 2.0985827541268462e-05, "loss": 0.0002, "step": 21380 }, { "epoch": 4.645960034752389, "grad_norm": 0.039273735135793686, "learning_rate": 2.0972252389226758e-05, "loss": 0.0003, "step": 21390 }, { "epoch": 4.648132059079062, "grad_norm": 0.0009249201975762844, "learning_rate": 2.0958677237185058e-05, "loss": 0.0002, "step": 21400 }, { "epoch": 4.650304083405734, "grad_norm": 0.004159982316195965, "learning_rate": 2.0945102085143355e-05, "loss": 0.0002, "step": 21410 }, { "epoch": 4.652476107732406, "grad_norm": 0.0012220778735354543, "learning_rate": 2.093152693310165e-05, "loss": 0.0006, "step": 21420 }, { "epoch": 4.654648132059079, "grad_norm": 0.000692892586812377, "learning_rate": 2.091795178105995e-05, "loss": 0.0198, "step": 21430 }, { "epoch": 4.656820156385751, "grad_norm": 0.0007744540343992412, "learning_rate": 2.0904376629018248e-05, "loss": 0.0529, "step": 21440 }, { "epoch": 4.658992180712424, "grad_norm": 0.03046293742954731, "learning_rate": 2.0890801476976544e-05, "loss": 0.0004, "step": 21450 }, { "epoch": 4.661164205039096, "grad_norm": 0.0007374830893240869, "learning_rate": 2.087722632493484e-05, "loss": 0.0003, "step": 21460 }, { "epoch": 4.663336229365769, "grad_norm": 0.023736968636512756, "learning_rate": 2.0863651172893137e-05, "loss": 0.0001, "step": 21470 }, { "epoch": 4.665508253692441, "grad_norm": 0.0007188957533799112, "learning_rate": 2.0850076020851434e-05, "loss": 0.0002, "step": 21480 }, { "epoch": 4.667680278019114, "grad_norm": 0.0006954037235118449, "learning_rate": 2.0836500868809734e-05, "loss": 0.0003, "step": 21490 }, { "epoch": 4.669852302345786, "grad_norm": 0.0007659996044822037, "learning_rate": 2.082292571676803e-05, "loss": 0.0014, "step": 21500 }, { "epoch": 4.672024326672458, "grad_norm": 0.02373771369457245, "learning_rate": 2.0809350564726327e-05, "loss": 0.0002, "step": 21510 }, { "epoch": 4.674196350999131, "grad_norm": 0.0015294611221179366, "learning_rate": 2.0795775412684623e-05, "loss": 0.0002, "step": 21520 }, { "epoch": 4.676368375325803, "grad_norm": 0.000917143770493567, "learning_rate": 2.078220026064292e-05, "loss": 0.0006, "step": 21530 }, { "epoch": 4.678540399652476, "grad_norm": 0.021886199712753296, "learning_rate": 2.0768625108601216e-05, "loss": 0.0002, "step": 21540 }, { "epoch": 4.680712423979148, "grad_norm": 0.0006681543891318142, "learning_rate": 2.0755049956559513e-05, "loss": 0.0001, "step": 21550 }, { "epoch": 4.682884448305821, "grad_norm": 0.0006924382178112864, "learning_rate": 2.074147480451781e-05, "loss": 0.0001, "step": 21560 }, { "epoch": 4.6850564726324935, "grad_norm": 0.0007467414252460003, "learning_rate": 2.072789965247611e-05, "loss": 0.0001, "step": 21570 }, { "epoch": 4.687228496959166, "grad_norm": 0.0007178894011303782, "learning_rate": 2.0714324500434405e-05, "loss": 0.0006, "step": 21580 }, { "epoch": 4.6894005212858385, "grad_norm": 0.0006496147834695876, "learning_rate": 2.0700749348392702e-05, "loss": 0.0241, "step": 21590 }, { "epoch": 4.691572545612511, "grad_norm": 0.0006787081365473568, "learning_rate": 2.0687174196351e-05, "loss": 0.0143, "step": 21600 }, { "epoch": 4.6937445699391835, "grad_norm": 0.0007202685810625553, "learning_rate": 2.06735990443093e-05, "loss": 0.0001, "step": 21610 }, { "epoch": 4.695916594265856, "grad_norm": 0.0007853159331716597, "learning_rate": 2.0660023892267595e-05, "loss": 0.0008, "step": 21620 }, { "epoch": 4.698088618592529, "grad_norm": 0.03189823031425476, "learning_rate": 2.064644874022589e-05, "loss": 0.0064, "step": 21630 }, { "epoch": 4.700260642919201, "grad_norm": 0.12528634071350098, "learning_rate": 2.063287358818419e-05, "loss": 0.0232, "step": 21640 }, { "epoch": 4.702432667245873, "grad_norm": 0.0006917872815392911, "learning_rate": 2.0619298436142488e-05, "loss": 0.0073, "step": 21650 }, { "epoch": 4.704604691572546, "grad_norm": 0.0006776847876608372, "learning_rate": 2.0605723284100784e-05, "loss": 0.0001, "step": 21660 }, { "epoch": 4.706776715899218, "grad_norm": 0.0006595394806936383, "learning_rate": 2.059214813205908e-05, "loss": 0.0002, "step": 21670 }, { "epoch": 4.708948740225891, "grad_norm": 0.0007088473648764193, "learning_rate": 2.0578572980017377e-05, "loss": 0.0003, "step": 21680 }, { "epoch": 4.711120764552563, "grad_norm": 0.0006470125517807901, "learning_rate": 2.0564997827975674e-05, "loss": 0.0001, "step": 21690 }, { "epoch": 4.713292788879236, "grad_norm": 0.0244167298078537, "learning_rate": 2.055142267593397e-05, "loss": 0.012, "step": 21700 }, { "epoch": 4.715464813205908, "grad_norm": 0.0006437553092837334, "learning_rate": 2.0537847523892267e-05, "loss": 0.0017, "step": 21710 }, { "epoch": 4.717636837532581, "grad_norm": 0.6032142043113708, "learning_rate": 2.0524272371850567e-05, "loss": 0.0091, "step": 21720 }, { "epoch": 4.719808861859253, "grad_norm": 0.0015431154752150178, "learning_rate": 2.0510697219808863e-05, "loss": 0.0011, "step": 21730 }, { "epoch": 4.721980886185925, "grad_norm": 0.031862836331129074, "learning_rate": 2.049712206776716e-05, "loss": 0.0346, "step": 21740 }, { "epoch": 4.724152910512598, "grad_norm": 0.0006862595910206437, "learning_rate": 2.0483546915725456e-05, "loss": 0.0006, "step": 21750 }, { "epoch": 4.72632493483927, "grad_norm": 0.0006549872341565788, "learning_rate": 2.0469971763683753e-05, "loss": 0.0017, "step": 21760 }, { "epoch": 4.728496959165943, "grad_norm": 0.0008586323237977922, "learning_rate": 2.045639661164205e-05, "loss": 0.0533, "step": 21770 }, { "epoch": 4.730668983492615, "grad_norm": 0.0010252386564388871, "learning_rate": 2.044282145960035e-05, "loss": 0.0001, "step": 21780 }, { "epoch": 4.732841007819288, "grad_norm": 0.05718378722667694, "learning_rate": 2.0429246307558646e-05, "loss": 0.0003, "step": 21790 }, { "epoch": 4.73501303214596, "grad_norm": 0.0011250174138695002, "learning_rate": 2.0415671155516942e-05, "loss": 0.0107, "step": 21800 }, { "epoch": 4.737185056472632, "grad_norm": 0.0008177366689778864, "learning_rate": 2.0402096003475242e-05, "loss": 0.0001, "step": 21810 }, { "epoch": 4.739357080799305, "grad_norm": 0.0008273087441921234, "learning_rate": 2.038852085143354e-05, "loss": 0.0085, "step": 21820 }, { "epoch": 4.741529105125977, "grad_norm": 0.0027850535698235035, "learning_rate": 2.0374945699391835e-05, "loss": 0.0112, "step": 21830 }, { "epoch": 4.74370112945265, "grad_norm": 0.0014280122704803944, "learning_rate": 2.036137054735013e-05, "loss": 0.001, "step": 21840 }, { "epoch": 4.745873153779322, "grad_norm": 0.0009550989489071071, "learning_rate": 2.0347795395308428e-05, "loss": 0.0021, "step": 21850 }, { "epoch": 4.748045178105995, "grad_norm": 0.0006730654276907444, "learning_rate": 2.0334220243266725e-05, "loss": 0.0062, "step": 21860 }, { "epoch": 4.750217202432667, "grad_norm": 0.0006814012303948402, "learning_rate": 2.0320645091225025e-05, "loss": 0.0007, "step": 21870 }, { "epoch": 4.752389226759339, "grad_norm": 0.00271434779278934, "learning_rate": 2.030706993918332e-05, "loss": 0.0001, "step": 21880 }, { "epoch": 4.754561251086012, "grad_norm": 0.0006929839146323502, "learning_rate": 2.0293494787141618e-05, "loss": 0.0063, "step": 21890 }, { "epoch": 4.756733275412684, "grad_norm": 0.0015105424681678414, "learning_rate": 2.0279919635099914e-05, "loss": 0.0001, "step": 21900 }, { "epoch": 4.758905299739357, "grad_norm": 0.0006794344517402351, "learning_rate": 2.026634448305821e-05, "loss": 0.0453, "step": 21910 }, { "epoch": 4.761077324066029, "grad_norm": 0.0010067267576232553, "learning_rate": 2.0252769331016507e-05, "loss": 0.0016, "step": 21920 }, { "epoch": 4.763249348392702, "grad_norm": 0.014931906014680862, "learning_rate": 2.0239194178974804e-05, "loss": 0.0003, "step": 21930 }, { "epoch": 4.765421372719374, "grad_norm": 0.07368484139442444, "learning_rate": 2.02256190269331e-05, "loss": 0.0005, "step": 21940 }, { "epoch": 4.767593397046047, "grad_norm": 0.0007706546457484365, "learning_rate": 2.02120438748914e-05, "loss": 0.0081, "step": 21950 }, { "epoch": 4.769765421372719, "grad_norm": 0.0007759315776638687, "learning_rate": 2.0198468722849697e-05, "loss": 0.0001, "step": 21960 }, { "epoch": 4.7719374456993915, "grad_norm": 0.02588563784956932, "learning_rate": 2.0184893570807993e-05, "loss": 0.0004, "step": 21970 }, { "epoch": 4.7741094700260645, "grad_norm": 0.0009247218258678913, "learning_rate": 2.0171318418766293e-05, "loss": 0.0218, "step": 21980 }, { "epoch": 4.7762814943527365, "grad_norm": 0.0019565566908568144, "learning_rate": 2.015774326672459e-05, "loss": 0.0012, "step": 21990 }, { "epoch": 4.7784535186794095, "grad_norm": 0.3613233268260956, "learning_rate": 2.0144168114682886e-05, "loss": 0.0005, "step": 22000 }, { "epoch": 4.780625543006082, "grad_norm": 0.0011166412150487304, "learning_rate": 2.0130592962641182e-05, "loss": 0.0004, "step": 22010 }, { "epoch": 4.782797567332754, "grad_norm": 0.00089460943127051, "learning_rate": 2.011701781059948e-05, "loss": 0.0043, "step": 22020 }, { "epoch": 4.784969591659427, "grad_norm": 0.00839716475456953, "learning_rate": 2.010344265855778e-05, "loss": 0.0164, "step": 22030 }, { "epoch": 4.787141615986099, "grad_norm": 0.0009814859367907047, "learning_rate": 2.0089867506516075e-05, "loss": 0.0001, "step": 22040 }, { "epoch": 4.789313640312772, "grad_norm": 0.000751690415199846, "learning_rate": 2.0076292354474372e-05, "loss": 0.0002, "step": 22050 }, { "epoch": 4.791485664639444, "grad_norm": 0.08599035441875458, "learning_rate": 2.006271720243267e-05, "loss": 0.0002, "step": 22060 }, { "epoch": 4.793657688966117, "grad_norm": 0.0007369867525994778, "learning_rate": 2.0049142050390965e-05, "loss": 0.0006, "step": 22070 }, { "epoch": 4.795829713292789, "grad_norm": 0.0006486243801191449, "learning_rate": 2.003556689834926e-05, "loss": 0.0001, "step": 22080 }, { "epoch": 4.798001737619462, "grad_norm": 0.0007731476216576993, "learning_rate": 2.0021991746307558e-05, "loss": 0.0137, "step": 22090 }, { "epoch": 4.800173761946134, "grad_norm": 0.0006871931254863739, "learning_rate": 2.0008416594265858e-05, "loss": 0.0001, "step": 22100 }, { "epoch": 4.802345786272806, "grad_norm": 0.0170272383838892, "learning_rate": 1.9994841442224154e-05, "loss": 0.0003, "step": 22110 }, { "epoch": 4.804517810599479, "grad_norm": 0.000689572305418551, "learning_rate": 1.998126629018245e-05, "loss": 0.0007, "step": 22120 }, { "epoch": 4.806689834926151, "grad_norm": 0.0006891828961670399, "learning_rate": 1.9967691138140747e-05, "loss": 0.0001, "step": 22130 }, { "epoch": 4.808861859252824, "grad_norm": 0.005554310977458954, "learning_rate": 1.9954115986099044e-05, "loss": 0.0225, "step": 22140 }, { "epoch": 4.811033883579496, "grad_norm": 0.0006960682803764939, "learning_rate": 1.994054083405734e-05, "loss": 0.0099, "step": 22150 }, { "epoch": 4.813205907906169, "grad_norm": 0.001702047768048942, "learning_rate": 1.992696568201564e-05, "loss": 0.0003, "step": 22160 }, { "epoch": 4.815377932232841, "grad_norm": 0.0007203352870419621, "learning_rate": 1.9913390529973937e-05, "loss": 0.0071, "step": 22170 }, { "epoch": 4.817549956559514, "grad_norm": 0.0006288993754424155, "learning_rate": 1.9899815377932233e-05, "loss": 0.0001, "step": 22180 }, { "epoch": 4.819721980886186, "grad_norm": 0.0006460346630774438, "learning_rate": 1.9886240225890533e-05, "loss": 0.0001, "step": 22190 }, { "epoch": 4.821894005212858, "grad_norm": 0.0006239477661438286, "learning_rate": 1.987266507384883e-05, "loss": 0.0001, "step": 22200 }, { "epoch": 4.824066029539531, "grad_norm": 0.0006247904966585338, "learning_rate": 1.9859089921807126e-05, "loss": 0.0147, "step": 22210 }, { "epoch": 4.826238053866203, "grad_norm": 0.0006836046231910586, "learning_rate": 1.9845514769765423e-05, "loss": 0.0001, "step": 22220 }, { "epoch": 4.828410078192876, "grad_norm": 0.00064464146271348, "learning_rate": 1.983193961772372e-05, "loss": 0.0011, "step": 22230 }, { "epoch": 4.830582102519548, "grad_norm": 0.0006400091806426644, "learning_rate": 1.9818364465682016e-05, "loss": 0.0001, "step": 22240 }, { "epoch": 4.83275412684622, "grad_norm": 0.0007611711043864489, "learning_rate": 1.9804789313640316e-05, "loss": 0.0001, "step": 22250 }, { "epoch": 4.834926151172893, "grad_norm": 0.0007814390119165182, "learning_rate": 1.9791214161598612e-05, "loss": 0.0001, "step": 22260 }, { "epoch": 4.837098175499565, "grad_norm": 0.0021302583627402782, "learning_rate": 1.977763900955691e-05, "loss": 0.0078, "step": 22270 }, { "epoch": 4.839270199826238, "grad_norm": 0.0006355360383167863, "learning_rate": 1.9764063857515205e-05, "loss": 0.0013, "step": 22280 }, { "epoch": 4.84144222415291, "grad_norm": 0.006262447685003281, "learning_rate": 1.97504887054735e-05, "loss": 0.0022, "step": 22290 }, { "epoch": 4.843614248479583, "grad_norm": 0.12360040843486786, "learning_rate": 1.9736913553431798e-05, "loss": 0.0007, "step": 22300 }, { "epoch": 4.845786272806255, "grad_norm": 0.013160685077309608, "learning_rate": 1.9723338401390095e-05, "loss": 0.0001, "step": 22310 }, { "epoch": 4.847958297132928, "grad_norm": 0.006475683301687241, "learning_rate": 1.970976324934839e-05, "loss": 0.0001, "step": 22320 }, { "epoch": 4.8501303214596, "grad_norm": 2.062697410583496, "learning_rate": 1.969618809730669e-05, "loss": 0.0092, "step": 22330 }, { "epoch": 4.852302345786272, "grad_norm": 0.0044073620811104774, "learning_rate": 1.9682612945264988e-05, "loss": 0.0001, "step": 22340 }, { "epoch": 4.854474370112945, "grad_norm": 0.0005856105126440525, "learning_rate": 1.9669037793223284e-05, "loss": 0.0339, "step": 22350 }, { "epoch": 4.856646394439617, "grad_norm": 0.0006312388577498496, "learning_rate": 1.9655462641181584e-05, "loss": 0.0058, "step": 22360 }, { "epoch": 4.85881841876629, "grad_norm": 0.0006607999093830585, "learning_rate": 1.964188748913988e-05, "loss": 0.0005, "step": 22370 }, { "epoch": 4.8609904430929625, "grad_norm": 0.0006427134503610432, "learning_rate": 1.9628312337098177e-05, "loss": 0.0094, "step": 22380 }, { "epoch": 4.863162467419635, "grad_norm": 0.000609448819886893, "learning_rate": 1.9614737185056473e-05, "loss": 0.0001, "step": 22390 }, { "epoch": 4.8653344917463075, "grad_norm": 0.4275817573070526, "learning_rate": 1.960116203301477e-05, "loss": 0.0097, "step": 22400 }, { "epoch": 4.8675065160729805, "grad_norm": 0.0006039931322447956, "learning_rate": 1.958758688097307e-05, "loss": 0.0001, "step": 22410 }, { "epoch": 4.8696785403996525, "grad_norm": 0.0006329385214485228, "learning_rate": 1.9574011728931366e-05, "loss": 0.004, "step": 22420 }, { "epoch": 4.871850564726325, "grad_norm": 0.0005937905516475439, "learning_rate": 1.9560436576889663e-05, "loss": 0.0001, "step": 22430 }, { "epoch": 4.874022589052998, "grad_norm": 0.0005967863835394382, "learning_rate": 1.954686142484796e-05, "loss": 0.0261, "step": 22440 }, { "epoch": 4.87619461337967, "grad_norm": 0.008394586853682995, "learning_rate": 1.9533286272806256e-05, "loss": 0.0001, "step": 22450 }, { "epoch": 4.878366637706343, "grad_norm": 0.0006573013961315155, "learning_rate": 1.9519711120764552e-05, "loss": 0.0079, "step": 22460 }, { "epoch": 4.880538662033015, "grad_norm": 0.0006314768688753247, "learning_rate": 1.950613596872285e-05, "loss": 0.0003, "step": 22470 }, { "epoch": 4.882710686359687, "grad_norm": 0.0006235949695110321, "learning_rate": 1.949256081668115e-05, "loss": 0.0001, "step": 22480 }, { "epoch": 4.88488271068636, "grad_norm": 0.000721083371900022, "learning_rate": 1.9478985664639445e-05, "loss": 0.0156, "step": 22490 }, { "epoch": 4.887054735013032, "grad_norm": 0.0006140803452581167, "learning_rate": 1.9465410512597742e-05, "loss": 0.0002, "step": 22500 }, { "epoch": 4.889226759339705, "grad_norm": 0.455748975276947, "learning_rate": 1.945183536055604e-05, "loss": 0.0042, "step": 22510 }, { "epoch": 4.891398783666377, "grad_norm": 0.0006114744464866817, "learning_rate": 1.9438260208514335e-05, "loss": 0.0001, "step": 22520 }, { "epoch": 4.89357080799305, "grad_norm": 0.000603028922341764, "learning_rate": 1.942468505647263e-05, "loss": 0.0001, "step": 22530 }, { "epoch": 4.895742832319722, "grad_norm": 0.0005933665088377893, "learning_rate": 1.941110990443093e-05, "loss": 0.0001, "step": 22540 }, { "epoch": 4.897914856646395, "grad_norm": 0.0005882186815142632, "learning_rate": 1.9397534752389228e-05, "loss": 0.0001, "step": 22550 }, { "epoch": 4.900086880973067, "grad_norm": 0.0006071260431781411, "learning_rate": 1.9383959600347528e-05, "loss": 0.0023, "step": 22560 }, { "epoch": 4.902258905299739, "grad_norm": 0.0005930233746767044, "learning_rate": 1.9370384448305824e-05, "loss": 0.0001, "step": 22570 }, { "epoch": 4.904430929626412, "grad_norm": 0.0007253455114550889, "learning_rate": 1.935680929626412e-05, "loss": 0.0001, "step": 22580 }, { "epoch": 4.906602953953084, "grad_norm": 0.0006349599570967257, "learning_rate": 1.9343234144222417e-05, "loss": 0.0001, "step": 22590 }, { "epoch": 4.908774978279757, "grad_norm": 0.000644247978925705, "learning_rate": 1.9329658992180714e-05, "loss": 0.0143, "step": 22600 }, { "epoch": 4.910947002606429, "grad_norm": 0.020878519862890244, "learning_rate": 1.931608384013901e-05, "loss": 0.0002, "step": 22610 }, { "epoch": 4.913119026933102, "grad_norm": 0.0006180730415508151, "learning_rate": 1.9302508688097307e-05, "loss": 0.0019, "step": 22620 }, { "epoch": 4.915291051259774, "grad_norm": 0.005366886965930462, "learning_rate": 1.9288933536055603e-05, "loss": 0.0002, "step": 22630 }, { "epoch": 4.917463075586447, "grad_norm": 0.0005870962049812078, "learning_rate": 1.9275358384013903e-05, "loss": 0.0001, "step": 22640 }, { "epoch": 4.919635099913119, "grad_norm": 0.000580133986659348, "learning_rate": 1.92617832319722e-05, "loss": 0.0001, "step": 22650 }, { "epoch": 4.921807124239791, "grad_norm": 0.0006379460101015866, "learning_rate": 1.9248208079930496e-05, "loss": 0.0322, "step": 22660 }, { "epoch": 4.923979148566464, "grad_norm": 0.0008098538964986801, "learning_rate": 1.9234632927888793e-05, "loss": 0.0001, "step": 22670 }, { "epoch": 4.926151172893136, "grad_norm": 0.0011943280696868896, "learning_rate": 1.922105777584709e-05, "loss": 0.0009, "step": 22680 }, { "epoch": 4.928323197219809, "grad_norm": 0.01928878016769886, "learning_rate": 1.9207482623805386e-05, "loss": 0.0001, "step": 22690 }, { "epoch": 4.930495221546481, "grad_norm": 0.0005802169325761497, "learning_rate": 1.9193907471763682e-05, "loss": 0.0001, "step": 22700 }, { "epoch": 4.932667245873153, "grad_norm": 0.0059051173739135265, "learning_rate": 1.9180332319721982e-05, "loss": 0.0001, "step": 22710 }, { "epoch": 4.934839270199826, "grad_norm": 0.0032551810145378113, "learning_rate": 1.916675716768028e-05, "loss": 0.0012, "step": 22720 }, { "epoch": 4.937011294526498, "grad_norm": 0.0008213729597628117, "learning_rate": 1.9153182015638575e-05, "loss": 0.0001, "step": 22730 }, { "epoch": 4.939183318853171, "grad_norm": 0.0005722604691982269, "learning_rate": 1.9139606863596875e-05, "loss": 0.0001, "step": 22740 }, { "epoch": 4.941355343179843, "grad_norm": 0.0005691969417966902, "learning_rate": 1.912603171155517e-05, "loss": 0.0001, "step": 22750 }, { "epoch": 4.943527367506516, "grad_norm": 0.0005698164459317923, "learning_rate": 1.9112456559513468e-05, "loss": 0.0001, "step": 22760 }, { "epoch": 4.945699391833188, "grad_norm": 0.0005637307767756283, "learning_rate": 1.9098881407471765e-05, "loss": 0.0022, "step": 22770 }, { "epoch": 4.947871416159861, "grad_norm": 0.0005699021276086569, "learning_rate": 1.908530625543006e-05, "loss": 0.0001, "step": 22780 }, { "epoch": 4.950043440486533, "grad_norm": 0.0005615533445961773, "learning_rate": 1.907173110338836e-05, "loss": 0.004, "step": 22790 }, { "epoch": 4.9522154648132055, "grad_norm": 0.0005626556230708957, "learning_rate": 1.9058155951346657e-05, "loss": 0.0274, "step": 22800 }, { "epoch": 4.9543874891398785, "grad_norm": 0.0006404595333151519, "learning_rate": 1.9044580799304954e-05, "loss": 0.0001, "step": 22810 }, { "epoch": 4.9565595134665505, "grad_norm": 0.0005639271112158895, "learning_rate": 1.903100564726325e-05, "loss": 0.0111, "step": 22820 }, { "epoch": 4.9587315377932235, "grad_norm": 0.011098935268819332, "learning_rate": 1.9017430495221547e-05, "loss": 0.0001, "step": 22830 }, { "epoch": 4.960903562119896, "grad_norm": 0.0006036728736944497, "learning_rate": 1.9003855343179843e-05, "loss": 0.0014, "step": 22840 }, { "epoch": 4.9630755864465685, "grad_norm": 0.006722571793943644, "learning_rate": 1.899028019113814e-05, "loss": 0.0001, "step": 22850 }, { "epoch": 4.965247610773241, "grad_norm": 0.0006086063804104924, "learning_rate": 1.897670503909644e-05, "loss": 0.0001, "step": 22860 }, { "epoch": 4.967419635099914, "grad_norm": 0.001963576767593622, "learning_rate": 1.8963129887054736e-05, "loss": 0.0001, "step": 22870 }, { "epoch": 4.969591659426586, "grad_norm": 0.0009560598991811275, "learning_rate": 1.8949554735013033e-05, "loss": 0.0296, "step": 22880 }, { "epoch": 4.971763683753258, "grad_norm": 0.0029919431544840336, "learning_rate": 1.893597958297133e-05, "loss": 0.0001, "step": 22890 }, { "epoch": 4.973935708079931, "grad_norm": 0.0005524032167159021, "learning_rate": 1.8922404430929626e-05, "loss": 0.0001, "step": 22900 }, { "epoch": 4.976107732406603, "grad_norm": 0.0005599394789896905, "learning_rate": 1.8908829278887922e-05, "loss": 0.0038, "step": 22910 }, { "epoch": 4.978279756733276, "grad_norm": 0.003142011584714055, "learning_rate": 1.8895254126846222e-05, "loss": 0.0001, "step": 22920 }, { "epoch": 4.980451781059948, "grad_norm": 0.0005478629609569907, "learning_rate": 1.888167897480452e-05, "loss": 0.003, "step": 22930 }, { "epoch": 4.98262380538662, "grad_norm": 0.0005452021723613143, "learning_rate": 1.886810382276282e-05, "loss": 0.0003, "step": 22940 }, { "epoch": 4.984795829713293, "grad_norm": 0.000553121033590287, "learning_rate": 1.8854528670721115e-05, "loss": 0.046, "step": 22950 }, { "epoch": 4.986967854039965, "grad_norm": 0.002784899901598692, "learning_rate": 1.8840953518679412e-05, "loss": 0.0001, "step": 22960 }, { "epoch": 4.989139878366638, "grad_norm": 0.0037593538872897625, "learning_rate": 1.8827378366637708e-05, "loss": 0.0154, "step": 22970 }, { "epoch": 4.99131190269331, "grad_norm": 0.0006829476333223283, "learning_rate": 1.8813803214596005e-05, "loss": 0.0001, "step": 22980 }, { "epoch": 4.993483927019983, "grad_norm": 0.0006080670282244682, "learning_rate": 1.88002280625543e-05, "loss": 0.0132, "step": 22990 }, { "epoch": 4.995655951346655, "grad_norm": 0.0028045615181326866, "learning_rate": 1.8786652910512598e-05, "loss": 0.0364, "step": 23000 }, { "epoch": 4.997827975673328, "grad_norm": 8.378561019897461, "learning_rate": 1.8773077758470894e-05, "loss": 0.0076, "step": 23010 }, { "epoch": 5.0, "grad_norm": 0.0020884855184704065, "learning_rate": 1.8759502606429194e-05, "loss": 0.0001, "step": 23020 }, { "epoch": 5.0, "eval_f1": 0.575875486381323, "eval_loss": 0.08333506435155869, "eval_runtime": 82.6846, "eval_samples_per_second": 120.639, "eval_steps_per_second": 7.547, "step": 23020 }, { "epoch": 5.002172024326672, "grad_norm": 0.0006133164861239493, "learning_rate": 1.874592745438749e-05, "loss": 0.0355, "step": 23030 }, { "epoch": 5.004344048653345, "grad_norm": 0.0006215014145709574, "learning_rate": 1.8732352302345787e-05, "loss": 0.0001, "step": 23040 }, { "epoch": 5.006516072980017, "grad_norm": 0.0006174238515086472, "learning_rate": 1.8718777150304084e-05, "loss": 0.0001, "step": 23050 }, { "epoch": 5.00868809730669, "grad_norm": 0.0008445650455541909, "learning_rate": 1.870520199826238e-05, "loss": 0.0001, "step": 23060 }, { "epoch": 5.010860121633362, "grad_norm": 0.0005881586112082005, "learning_rate": 1.8691626846220677e-05, "loss": 0.0001, "step": 23070 }, { "epoch": 5.013032145960035, "grad_norm": 0.0006194966263137758, "learning_rate": 1.8678051694178973e-05, "loss": 0.0001, "step": 23080 }, { "epoch": 5.015204170286707, "grad_norm": 0.0007004704675637186, "learning_rate": 1.8664476542137273e-05, "loss": 0.0001, "step": 23090 }, { "epoch": 5.017376194613379, "grad_norm": 0.030953222885727882, "learning_rate": 1.865090139009557e-05, "loss": 0.0545, "step": 23100 }, { "epoch": 5.019548218940052, "grad_norm": 0.0037942533381283283, "learning_rate": 1.8637326238053866e-05, "loss": 0.0003, "step": 23110 }, { "epoch": 5.021720243266724, "grad_norm": 0.004638850688934326, "learning_rate": 1.8623751086012166e-05, "loss": 0.0052, "step": 23120 }, { "epoch": 5.023892267593397, "grad_norm": 0.006132997572422028, "learning_rate": 1.8610175933970463e-05, "loss": 0.0008, "step": 23130 }, { "epoch": 5.026064291920069, "grad_norm": 0.031111733987927437, "learning_rate": 1.859660078192876e-05, "loss": 0.0004, "step": 23140 }, { "epoch": 5.028236316246742, "grad_norm": 0.019011540338397026, "learning_rate": 1.8583025629887056e-05, "loss": 0.0003, "step": 23150 }, { "epoch": 5.030408340573414, "grad_norm": 0.001691153272986412, "learning_rate": 1.8569450477845352e-05, "loss": 0.0004, "step": 23160 }, { "epoch": 5.032580364900087, "grad_norm": 0.0015675558242946863, "learning_rate": 1.8555875325803652e-05, "loss": 0.0002, "step": 23170 }, { "epoch": 5.034752389226759, "grad_norm": 0.0054156165570020676, "learning_rate": 1.854230017376195e-05, "loss": 0.0002, "step": 23180 }, { "epoch": 5.0369244135534315, "grad_norm": 0.0011364802485331893, "learning_rate": 1.8528725021720245e-05, "loss": 0.0002, "step": 23190 }, { "epoch": 5.039096437880104, "grad_norm": 0.001140189589932561, "learning_rate": 1.851514986967854e-05, "loss": 0.0002, "step": 23200 }, { "epoch": 5.0412684622067765, "grad_norm": 0.030046336352825165, "learning_rate": 1.8501574717636838e-05, "loss": 0.0002, "step": 23210 }, { "epoch": 5.0434404865334495, "grad_norm": 0.0009006512118503451, "learning_rate": 1.8487999565595135e-05, "loss": 0.0002, "step": 23220 }, { "epoch": 5.0456125108601215, "grad_norm": 0.0010009000543504953, "learning_rate": 1.847442441355343e-05, "loss": 0.0003, "step": 23230 }, { "epoch": 5.0477845351867945, "grad_norm": 0.0016936525935307145, "learning_rate": 1.8460849261511728e-05, "loss": 0.04, "step": 23240 }, { "epoch": 5.049956559513467, "grad_norm": 0.001645937329158187, "learning_rate": 1.8447274109470027e-05, "loss": 0.0065, "step": 23250 }, { "epoch": 5.052128583840139, "grad_norm": 0.0010947795817628503, "learning_rate": 1.8433698957428324e-05, "loss": 0.0003, "step": 23260 }, { "epoch": 5.054300608166812, "grad_norm": 0.0009428582852706313, "learning_rate": 1.842012380538662e-05, "loss": 0.0003, "step": 23270 }, { "epoch": 5.056472632493484, "grad_norm": 0.02614458091557026, "learning_rate": 1.8406548653344917e-05, "loss": 0.0002, "step": 23280 }, { "epoch": 5.058644656820157, "grad_norm": 0.0008888828451745212, "learning_rate": 1.8392973501303213e-05, "loss": 0.0001, "step": 23290 }, { "epoch": 5.060816681146829, "grad_norm": 0.01737385056912899, "learning_rate": 1.8379398349261513e-05, "loss": 0.0003, "step": 23300 }, { "epoch": 5.062988705473502, "grad_norm": 0.0010263947769999504, "learning_rate": 1.836582319721981e-05, "loss": 0.0002, "step": 23310 }, { "epoch": 5.065160729800174, "grad_norm": 0.000807849457487464, "learning_rate": 1.835224804517811e-05, "loss": 0.0002, "step": 23320 }, { "epoch": 5.067332754126846, "grad_norm": 0.0008964362787082791, "learning_rate": 1.8338672893136406e-05, "loss": 0.0003, "step": 23330 }, { "epoch": 5.069504778453519, "grad_norm": 0.008567686192691326, "learning_rate": 1.8325097741094703e-05, "loss": 0.0004, "step": 23340 }, { "epoch": 5.071676802780191, "grad_norm": 0.0010218977695330977, "learning_rate": 1.8311522589053e-05, "loss": 0.0031, "step": 23350 }, { "epoch": 5.073848827106864, "grad_norm": 0.0007686801254749298, "learning_rate": 1.8297947437011296e-05, "loss": 0.0015, "step": 23360 }, { "epoch": 5.076020851433536, "grad_norm": 0.004290709272027016, "learning_rate": 1.8284372284969592e-05, "loss": 0.0002, "step": 23370 }, { "epoch": 5.078192875760209, "grad_norm": 0.0007577894139103591, "learning_rate": 1.827079713292789e-05, "loss": 0.0001, "step": 23380 }, { "epoch": 5.080364900086881, "grad_norm": 0.0007783273467794061, "learning_rate": 1.8257221980886185e-05, "loss": 0.0041, "step": 23390 }, { "epoch": 5.082536924413553, "grad_norm": 0.0024375859647989273, "learning_rate": 1.8243646828844485e-05, "loss": 0.0113, "step": 23400 }, { "epoch": 5.084708948740226, "grad_norm": 0.0007347238133661449, "learning_rate": 1.8230071676802782e-05, "loss": 0.0002, "step": 23410 }, { "epoch": 5.086880973066898, "grad_norm": 0.01347825676202774, "learning_rate": 1.8216496524761078e-05, "loss": 0.0004, "step": 23420 }, { "epoch": 5.089052997393571, "grad_norm": 0.0006978354067541659, "learning_rate": 1.8202921372719375e-05, "loss": 0.0145, "step": 23430 }, { "epoch": 5.091225021720243, "grad_norm": 0.0006992680137045681, "learning_rate": 1.818934622067767e-05, "loss": 0.0002, "step": 23440 }, { "epoch": 5.093397046046916, "grad_norm": 0.0029848958365619183, "learning_rate": 1.8175771068635968e-05, "loss": 0.0003, "step": 23450 }, { "epoch": 5.095569070373588, "grad_norm": 0.0031678786035627127, "learning_rate": 1.8162195916594264e-05, "loss": 0.0001, "step": 23460 }, { "epoch": 5.097741094700261, "grad_norm": 0.0006989357643760741, "learning_rate": 1.8148620764552564e-05, "loss": 0.0002, "step": 23470 }, { "epoch": 5.099913119026933, "grad_norm": 0.010042333044111729, "learning_rate": 1.813504561251086e-05, "loss": 0.0071, "step": 23480 }, { "epoch": 5.102085143353605, "grad_norm": 0.0007547451532445848, "learning_rate": 1.8121470460469157e-05, "loss": 0.0001, "step": 23490 }, { "epoch": 5.104257167680278, "grad_norm": 0.001228576060384512, "learning_rate": 1.8107895308427457e-05, "loss": 0.0002, "step": 23500 }, { "epoch": 5.10642919200695, "grad_norm": 0.011400981806218624, "learning_rate": 1.8094320156385754e-05, "loss": 0.0003, "step": 23510 }, { "epoch": 5.108601216333623, "grad_norm": 0.0007331727538257837, "learning_rate": 1.808074500434405e-05, "loss": 0.0001, "step": 23520 }, { "epoch": 5.110773240660295, "grad_norm": 0.20844632387161255, "learning_rate": 1.8067169852302347e-05, "loss": 0.0002, "step": 23530 }, { "epoch": 5.112945264986968, "grad_norm": 0.0008100902196019888, "learning_rate": 1.8053594700260643e-05, "loss": 0.0098, "step": 23540 }, { "epoch": 5.11511728931364, "grad_norm": 0.0008311902638524771, "learning_rate": 1.8040019548218943e-05, "loss": 0.0002, "step": 23550 }, { "epoch": 5.117289313640312, "grad_norm": 0.0008574782987125218, "learning_rate": 1.802644439617724e-05, "loss": 0.0002, "step": 23560 }, { "epoch": 5.119461337966985, "grad_norm": 0.0006869113421998918, "learning_rate": 1.8012869244135536e-05, "loss": 0.0018, "step": 23570 }, { "epoch": 5.121633362293657, "grad_norm": 0.0008113477961160243, "learning_rate": 1.7999294092093833e-05, "loss": 0.0001, "step": 23580 }, { "epoch": 5.12380538662033, "grad_norm": 0.0038217424880713224, "learning_rate": 1.798571894005213e-05, "loss": 0.0002, "step": 23590 }, { "epoch": 5.125977410947002, "grad_norm": 0.0006686209235340357, "learning_rate": 1.7972143788010426e-05, "loss": 0.0001, "step": 23600 }, { "epoch": 5.128149435273675, "grad_norm": 0.032629430294036865, "learning_rate": 1.7958568635968722e-05, "loss": 0.0002, "step": 23610 }, { "epoch": 5.1303214596003475, "grad_norm": 0.0008298791362904012, "learning_rate": 1.794499348392702e-05, "loss": 0.0004, "step": 23620 }, { "epoch": 5.1324934839270195, "grad_norm": 0.008889926597476006, "learning_rate": 1.793141833188532e-05, "loss": 0.0001, "step": 23630 }, { "epoch": 5.1346655082536925, "grad_norm": 0.0006458312273025513, "learning_rate": 1.7917843179843615e-05, "loss": 0.0053, "step": 23640 }, { "epoch": 5.136837532580365, "grad_norm": 0.002981486963108182, "learning_rate": 1.790426802780191e-05, "loss": 0.0001, "step": 23650 }, { "epoch": 5.1390095569070375, "grad_norm": 19.913515090942383, "learning_rate": 1.7890692875760208e-05, "loss": 0.0131, "step": 23660 }, { "epoch": 5.14118158123371, "grad_norm": 0.0006599088083021343, "learning_rate": 1.7877117723718505e-05, "loss": 0.0013, "step": 23670 }, { "epoch": 5.143353605560383, "grad_norm": 0.0008784077363088727, "learning_rate": 1.7863542571676804e-05, "loss": 0.0001, "step": 23680 }, { "epoch": 5.145525629887055, "grad_norm": 0.0006363080465234816, "learning_rate": 1.78499674196351e-05, "loss": 0.0001, "step": 23690 }, { "epoch": 5.147697654213728, "grad_norm": 0.0006334060453809798, "learning_rate": 1.78363922675934e-05, "loss": 0.0025, "step": 23700 }, { "epoch": 5.1498696785404, "grad_norm": 0.0006302617839537561, "learning_rate": 1.7822817115551697e-05, "loss": 0.0025, "step": 23710 }, { "epoch": 5.152041702867072, "grad_norm": 0.0006213096203282475, "learning_rate": 1.7809241963509994e-05, "loss": 0.0034, "step": 23720 }, { "epoch": 5.154213727193745, "grad_norm": 0.0006246123812161386, "learning_rate": 1.779566681146829e-05, "loss": 0.0001, "step": 23730 }, { "epoch": 5.156385751520417, "grad_norm": 0.0006313191843219101, "learning_rate": 1.7782091659426587e-05, "loss": 0.0001, "step": 23740 }, { "epoch": 5.15855777584709, "grad_norm": 0.0014395987382158637, "learning_rate": 1.7768516507384883e-05, "loss": 0.0008, "step": 23750 }, { "epoch": 5.160729800173762, "grad_norm": 0.002919491846114397, "learning_rate": 1.775494135534318e-05, "loss": 0.0331, "step": 23760 }, { "epoch": 5.162901824500435, "grad_norm": 0.004078062251210213, "learning_rate": 1.7741366203301476e-05, "loss": 0.0003, "step": 23770 }, { "epoch": 5.165073848827107, "grad_norm": 0.0006804884178563952, "learning_rate": 1.7727791051259776e-05, "loss": 0.0066, "step": 23780 }, { "epoch": 5.167245873153779, "grad_norm": 0.006823851726949215, "learning_rate": 1.7714215899218073e-05, "loss": 0.0004, "step": 23790 }, { "epoch": 5.169417897480452, "grad_norm": 0.006236494984477758, "learning_rate": 1.770064074717637e-05, "loss": 0.0001, "step": 23800 }, { "epoch": 5.171589921807124, "grad_norm": 0.0006333237397484481, "learning_rate": 1.7687065595134666e-05, "loss": 0.0002, "step": 23810 }, { "epoch": 5.173761946133797, "grad_norm": 0.000644736282993108, "learning_rate": 1.7673490443092962e-05, "loss": 0.0003, "step": 23820 }, { "epoch": 5.175933970460469, "grad_norm": 0.0006276658968999982, "learning_rate": 1.765991529105126e-05, "loss": 0.0138, "step": 23830 }, { "epoch": 5.178105994787142, "grad_norm": 0.003340308554470539, "learning_rate": 1.7646340139009555e-05, "loss": 0.0065, "step": 23840 }, { "epoch": 5.180278019113814, "grad_norm": 0.7302592396736145, "learning_rate": 1.7632764986967852e-05, "loss": 0.0131, "step": 23850 }, { "epoch": 5.182450043440486, "grad_norm": 0.0007103821262717247, "learning_rate": 1.7619189834926152e-05, "loss": 0.0003, "step": 23860 }, { "epoch": 5.184622067767159, "grad_norm": 0.0006702254759147763, "learning_rate": 1.7605614682884448e-05, "loss": 0.0006, "step": 23870 }, { "epoch": 5.186794092093831, "grad_norm": 0.0006133012939244509, "learning_rate": 1.7592039530842748e-05, "loss": 0.0002, "step": 23880 }, { "epoch": 5.188966116420504, "grad_norm": 0.0006230532308109105, "learning_rate": 1.7578464378801045e-05, "loss": 0.0003, "step": 23890 }, { "epoch": 5.191138140747176, "grad_norm": 0.0006302871042862535, "learning_rate": 1.756488922675934e-05, "loss": 0.0009, "step": 23900 }, { "epoch": 5.193310165073849, "grad_norm": 0.0007144107366912067, "learning_rate": 1.7551314074717638e-05, "loss": 0.0001, "step": 23910 }, { "epoch": 5.195482189400521, "grad_norm": 3.326815128326416, "learning_rate": 1.7537738922675934e-05, "loss": 0.014, "step": 23920 }, { "epoch": 5.197654213727194, "grad_norm": 0.0007625357247889042, "learning_rate": 1.7524163770634234e-05, "loss": 0.0002, "step": 23930 }, { "epoch": 5.199826238053866, "grad_norm": 0.0006717897485941648, "learning_rate": 1.751058861859253e-05, "loss": 0.0005, "step": 23940 }, { "epoch": 5.201998262380538, "grad_norm": 0.0006527569494210184, "learning_rate": 1.7497013466550827e-05, "loss": 0.0004, "step": 23950 }, { "epoch": 5.204170286707211, "grad_norm": 0.0006034320103935897, "learning_rate": 1.7483438314509124e-05, "loss": 0.0002, "step": 23960 }, { "epoch": 5.206342311033883, "grad_norm": 0.0006436226540245116, "learning_rate": 1.746986316246742e-05, "loss": 0.0001, "step": 23970 }, { "epoch": 5.208514335360556, "grad_norm": 0.0006348975584842265, "learning_rate": 1.7456288010425717e-05, "loss": 0.0005, "step": 23980 }, { "epoch": 5.210686359687228, "grad_norm": 1.1442747116088867, "learning_rate": 1.7442712858384013e-05, "loss": 0.0174, "step": 23990 }, { "epoch": 5.212858384013901, "grad_norm": 0.0006141769699752331, "learning_rate": 1.742913770634231e-05, "loss": 0.0026, "step": 24000 }, { "epoch": 5.215030408340573, "grad_norm": 0.0015413524815812707, "learning_rate": 1.741556255430061e-05, "loss": 0.0007, "step": 24010 }, { "epoch": 5.2172024326672455, "grad_norm": 0.0006206813850440085, "learning_rate": 1.7401987402258906e-05, "loss": 0.0002, "step": 24020 }, { "epoch": 5.219374456993918, "grad_norm": 0.0006547339726239443, "learning_rate": 1.7388412250217203e-05, "loss": 0.0013, "step": 24030 }, { "epoch": 5.2215464813205905, "grad_norm": 0.0006602110806852579, "learning_rate": 1.73748370981755e-05, "loss": 0.0013, "step": 24040 }, { "epoch": 5.2237185056472635, "grad_norm": 0.0006027701310813427, "learning_rate": 1.7361261946133796e-05, "loss": 0.0005, "step": 24050 }, { "epoch": 5.2258905299739355, "grad_norm": 0.000601082225330174, "learning_rate": 1.7347686794092095e-05, "loss": 0.0003, "step": 24060 }, { "epoch": 5.2280625543006085, "grad_norm": 0.000594939396250993, "learning_rate": 1.7334111642050392e-05, "loss": 0.0193, "step": 24070 }, { "epoch": 5.230234578627281, "grad_norm": 0.0006422780570574105, "learning_rate": 1.7320536490008692e-05, "loss": 0.0006, "step": 24080 }, { "epoch": 5.232406602953953, "grad_norm": 0.0006172214052639902, "learning_rate": 1.730696133796699e-05, "loss": 0.0023, "step": 24090 }, { "epoch": 5.234578627280626, "grad_norm": 0.0006009265780448914, "learning_rate": 1.7293386185925285e-05, "loss": 0.016, "step": 24100 }, { "epoch": 5.236750651607298, "grad_norm": 0.0006303676636889577, "learning_rate": 1.727981103388358e-05, "loss": 0.0041, "step": 24110 }, { "epoch": 5.238922675933971, "grad_norm": 0.0005849118460901082, "learning_rate": 1.7266235881841878e-05, "loss": 0.0002, "step": 24120 }, { "epoch": 5.241094700260643, "grad_norm": 0.001120755448937416, "learning_rate": 1.7252660729800174e-05, "loss": 0.0002, "step": 24130 }, { "epoch": 5.243266724587316, "grad_norm": 0.0005915990332141519, "learning_rate": 1.723908557775847e-05, "loss": 0.0001, "step": 24140 }, { "epoch": 5.245438748913988, "grad_norm": 0.0006512019317597151, "learning_rate": 1.7225510425716767e-05, "loss": 0.0011, "step": 24150 }, { "epoch": 5.247610773240661, "grad_norm": 0.0005761512438766658, "learning_rate": 1.7211935273675067e-05, "loss": 0.0044, "step": 24160 }, { "epoch": 5.249782797567333, "grad_norm": 0.0006002063164487481, "learning_rate": 1.7198360121633364e-05, "loss": 0.0003, "step": 24170 }, { "epoch": 5.251954821894005, "grad_norm": 0.0006288880831561983, "learning_rate": 1.718478496959166e-05, "loss": 0.0005, "step": 24180 }, { "epoch": 5.254126846220678, "grad_norm": 0.0006209379062056541, "learning_rate": 1.7171209817549957e-05, "loss": 0.0063, "step": 24190 }, { "epoch": 5.25629887054735, "grad_norm": 0.000875494908541441, "learning_rate": 1.7157634665508253e-05, "loss": 0.0001, "step": 24200 }, { "epoch": 5.258470894874023, "grad_norm": 0.0006017699488438666, "learning_rate": 1.714405951346655e-05, "loss": 0.0006, "step": 24210 }, { "epoch": 5.260642919200695, "grad_norm": 0.0005895401118323207, "learning_rate": 1.7130484361424846e-05, "loss": 0.0097, "step": 24220 }, { "epoch": 5.262814943527368, "grad_norm": 0.0006465850165113807, "learning_rate": 1.7116909209383143e-05, "loss": 0.0076, "step": 24230 }, { "epoch": 5.26498696785404, "grad_norm": 0.0005974260275252163, "learning_rate": 1.7103334057341443e-05, "loss": 0.0001, "step": 24240 }, { "epoch": 5.267158992180712, "grad_norm": 0.0005888324230909348, "learning_rate": 1.708975890529974e-05, "loss": 0.0002, "step": 24250 }, { "epoch": 5.269331016507385, "grad_norm": 0.0007290198700502515, "learning_rate": 1.707618375325804e-05, "loss": 0.0002, "step": 24260 }, { "epoch": 5.271503040834057, "grad_norm": 0.0016792593523859978, "learning_rate": 1.7062608601216336e-05, "loss": 0.0009, "step": 24270 }, { "epoch": 5.27367506516073, "grad_norm": 0.0006172910216264427, "learning_rate": 1.7049033449174632e-05, "loss": 0.0002, "step": 24280 }, { "epoch": 5.275847089487402, "grad_norm": 0.0010846874210983515, "learning_rate": 1.703545829713293e-05, "loss": 0.0002, "step": 24290 }, { "epoch": 5.278019113814075, "grad_norm": 0.0006275677005760372, "learning_rate": 1.7021883145091225e-05, "loss": 0.0002, "step": 24300 }, { "epoch": 5.280191138140747, "grad_norm": 0.0005748061230406165, "learning_rate": 1.7008307993049525e-05, "loss": 0.0002, "step": 24310 }, { "epoch": 5.282363162467419, "grad_norm": 0.0006720026140101254, "learning_rate": 1.699473284100782e-05, "loss": 0.0001, "step": 24320 }, { "epoch": 5.284535186794092, "grad_norm": 0.0007187994779087603, "learning_rate": 1.6981157688966118e-05, "loss": 0.0001, "step": 24330 }, { "epoch": 5.286707211120764, "grad_norm": 0.0005684763309545815, "learning_rate": 1.6967582536924415e-05, "loss": 0.0001, "step": 24340 }, { "epoch": 5.288879235447437, "grad_norm": 0.0005650657112710178, "learning_rate": 1.695400738488271e-05, "loss": 0.0028, "step": 24350 }, { "epoch": 5.291051259774109, "grad_norm": 0.0005644088960252702, "learning_rate": 1.6940432232841008e-05, "loss": 0.0001, "step": 24360 }, { "epoch": 5.293223284100782, "grad_norm": 0.0005540383281186223, "learning_rate": 1.6926857080799304e-05, "loss": 0.0001, "step": 24370 }, { "epoch": 5.295395308427454, "grad_norm": 0.000572359946090728, "learning_rate": 1.69132819287576e-05, "loss": 0.0001, "step": 24380 }, { "epoch": 5.297567332754127, "grad_norm": 0.0006077389698475599, "learning_rate": 1.68997067767159e-05, "loss": 0.0001, "step": 24390 }, { "epoch": 5.299739357080799, "grad_norm": 0.0005711704143323004, "learning_rate": 1.6886131624674197e-05, "loss": 0.0003, "step": 24400 }, { "epoch": 5.301911381407471, "grad_norm": 0.0005502477288246155, "learning_rate": 1.6872556472632494e-05, "loss": 0.0001, "step": 24410 }, { "epoch": 5.304083405734144, "grad_norm": 0.0005449872696772218, "learning_rate": 1.685898132059079e-05, "loss": 0.0001, "step": 24420 }, { "epoch": 5.3062554300608165, "grad_norm": 0.0005534543306566775, "learning_rate": 1.6845406168549087e-05, "loss": 0.0001, "step": 24430 }, { "epoch": 5.308427454387489, "grad_norm": 0.0005486282170750201, "learning_rate": 1.6831831016507387e-05, "loss": 0.0001, "step": 24440 }, { "epoch": 5.3105994787141615, "grad_norm": 0.0005433621699921787, "learning_rate": 1.6818255864465683e-05, "loss": 0.021, "step": 24450 }, { "epoch": 5.3127715030408345, "grad_norm": 0.0005527997273020446, "learning_rate": 1.680468071242398e-05, "loss": 0.002, "step": 24460 }, { "epoch": 5.3149435273675065, "grad_norm": 0.0005365969846025109, "learning_rate": 1.679110556038228e-05, "loss": 0.0001, "step": 24470 }, { "epoch": 5.317115551694179, "grad_norm": 0.0005567724583670497, "learning_rate": 1.6777530408340576e-05, "loss": 0.0025, "step": 24480 }, { "epoch": 5.319287576020852, "grad_norm": 0.0007047782419249415, "learning_rate": 1.6763955256298872e-05, "loss": 0.0011, "step": 24490 }, { "epoch": 5.321459600347524, "grad_norm": 0.0005513280630111694, "learning_rate": 1.675038010425717e-05, "loss": 0.0001, "step": 24500 }, { "epoch": 5.323631624674197, "grad_norm": 0.04640301689505577, "learning_rate": 1.6736804952215465e-05, "loss": 0.0002, "step": 24510 }, { "epoch": 5.325803649000869, "grad_norm": 0.0005909107276238501, "learning_rate": 1.6723229800173762e-05, "loss": 0.0001, "step": 24520 }, { "epoch": 5.327975673327542, "grad_norm": 0.0005448001902550459, "learning_rate": 1.670965464813206e-05, "loss": 0.0065, "step": 24530 }, { "epoch": 5.330147697654214, "grad_norm": 0.002460588002577424, "learning_rate": 1.669607949609036e-05, "loss": 0.0001, "step": 24540 }, { "epoch": 5.332319721980886, "grad_norm": 0.0005325566744431853, "learning_rate": 1.6682504344048655e-05, "loss": 0.0001, "step": 24550 }, { "epoch": 5.334491746307559, "grad_norm": 0.000526305811945349, "learning_rate": 1.666892919200695e-05, "loss": 0.0002, "step": 24560 }, { "epoch": 5.336663770634231, "grad_norm": 0.005369687918573618, "learning_rate": 1.6655354039965248e-05, "loss": 0.0001, "step": 24570 }, { "epoch": 5.338835794960904, "grad_norm": 0.0005400310037657619, "learning_rate": 1.6641778887923544e-05, "loss": 0.0068, "step": 24580 }, { "epoch": 5.341007819287576, "grad_norm": 0.0005356398760341108, "learning_rate": 1.662820373588184e-05, "loss": 0.0024, "step": 24590 }, { "epoch": 5.343179843614249, "grad_norm": 0.01858246512711048, "learning_rate": 1.6614628583840137e-05, "loss": 0.0001, "step": 24600 }, { "epoch": 5.345351867940921, "grad_norm": 0.000568159855902195, "learning_rate": 1.6601053431798437e-05, "loss": 0.0001, "step": 24610 }, { "epoch": 5.347523892267594, "grad_norm": 0.0005330589483492076, "learning_rate": 1.6587478279756734e-05, "loss": 0.0001, "step": 24620 }, { "epoch": 5.349695916594266, "grad_norm": 0.0006389102200046182, "learning_rate": 1.657390312771503e-05, "loss": 0.0607, "step": 24630 }, { "epoch": 5.351867940920938, "grad_norm": 0.0015882436418905854, "learning_rate": 1.656032797567333e-05, "loss": 0.001, "step": 24640 }, { "epoch": 5.354039965247611, "grad_norm": 0.0010832021944224834, "learning_rate": 1.6546752823631627e-05, "loss": 0.0002, "step": 24650 }, { "epoch": 5.356211989574283, "grad_norm": 0.007294220384210348, "learning_rate": 1.6533177671589923e-05, "loss": 0.0049, "step": 24660 }, { "epoch": 5.358384013900956, "grad_norm": 0.0007425106014125049, "learning_rate": 1.651960251954822e-05, "loss": 0.02, "step": 24670 }, { "epoch": 5.360556038227628, "grad_norm": 0.000792081409599632, "learning_rate": 1.6506027367506516e-05, "loss": 0.0001, "step": 24680 }, { "epoch": 5.362728062554301, "grad_norm": 0.017137495800852776, "learning_rate": 1.6492452215464816e-05, "loss": 0.0169, "step": 24690 }, { "epoch": 5.364900086880973, "grad_norm": 0.0006564608193002641, "learning_rate": 1.6478877063423113e-05, "loss": 0.0002, "step": 24700 }, { "epoch": 5.367072111207645, "grad_norm": 0.0006727157742716372, "learning_rate": 1.646530191138141e-05, "loss": 0.0001, "step": 24710 }, { "epoch": 5.369244135534318, "grad_norm": 0.0006364115397445858, "learning_rate": 1.6451726759339706e-05, "loss": 0.0001, "step": 24720 }, { "epoch": 5.37141615986099, "grad_norm": 0.0006498926086351275, "learning_rate": 1.6438151607298002e-05, "loss": 0.0034, "step": 24730 }, { "epoch": 5.373588184187663, "grad_norm": 0.0006514867418445647, "learning_rate": 1.64245764552563e-05, "loss": 0.0004, "step": 24740 }, { "epoch": 5.375760208514335, "grad_norm": 0.000643563864286989, "learning_rate": 1.6412358818418766e-05, "loss": 0.0194, "step": 24750 }, { "epoch": 5.377932232841008, "grad_norm": 0.0006255882908590138, "learning_rate": 1.6398783666377065e-05, "loss": 0.0001, "step": 24760 }, { "epoch": 5.38010425716768, "grad_norm": 0.0006202755030244589, "learning_rate": 1.6385208514335362e-05, "loss": 0.0001, "step": 24770 }, { "epoch": 5.382276281494352, "grad_norm": 0.0006363045540638268, "learning_rate": 1.637163336229366e-05, "loss": 0.0001, "step": 24780 }, { "epoch": 5.384448305821025, "grad_norm": 0.0007371389074251056, "learning_rate": 1.6358058210251955e-05, "loss": 0.0559, "step": 24790 }, { "epoch": 5.386620330147697, "grad_norm": 0.0025946300011128187, "learning_rate": 1.634448305821025e-05, "loss": 0.0001, "step": 24800 }, { "epoch": 5.38879235447437, "grad_norm": 0.0008282421040348709, "learning_rate": 1.6330907906168548e-05, "loss": 0.0001, "step": 24810 }, { "epoch": 5.390964378801042, "grad_norm": 0.0008425298728980124, "learning_rate": 1.6317332754126845e-05, "loss": 0.0001, "step": 24820 }, { "epoch": 5.393136403127715, "grad_norm": 0.0007536081247963011, "learning_rate": 1.6303757602085144e-05, "loss": 0.0001, "step": 24830 }, { "epoch": 5.395308427454387, "grad_norm": 0.0009574370342306793, "learning_rate": 1.629018245004344e-05, "loss": 0.0001, "step": 24840 }, { "epoch": 5.39748045178106, "grad_norm": 0.0009711087332107127, "learning_rate": 1.627660729800174e-05, "loss": 0.0001, "step": 24850 }, { "epoch": 5.3996524761077325, "grad_norm": 0.0007579062366858125, "learning_rate": 1.6263032145960037e-05, "loss": 0.0001, "step": 24860 }, { "epoch": 5.4018245004344045, "grad_norm": 0.0007839860627427697, "learning_rate": 1.6249456993918334e-05, "loss": 0.0001, "step": 24870 }, { "epoch": 5.4039965247610775, "grad_norm": 0.00206410582177341, "learning_rate": 1.623588184187663e-05, "loss": 0.0001, "step": 24880 }, { "epoch": 5.40616854908775, "grad_norm": 0.0009084375342354178, "learning_rate": 1.6222306689834927e-05, "loss": 0.008, "step": 24890 }, { "epoch": 5.4083405734144225, "grad_norm": 0.0027578831650316715, "learning_rate": 1.6208731537793223e-05, "loss": 0.0052, "step": 24900 }, { "epoch": 5.410512597741095, "grad_norm": 0.018679317086935043, "learning_rate": 1.619515638575152e-05, "loss": 0.012, "step": 24910 }, { "epoch": 5.412684622067768, "grad_norm": 0.0007577328360639513, "learning_rate": 1.618158123370982e-05, "loss": 0.003, "step": 24920 }, { "epoch": 5.41485664639444, "grad_norm": 0.0007110742153599858, "learning_rate": 1.6168006081668116e-05, "loss": 0.0002, "step": 24930 }, { "epoch": 5.417028670721112, "grad_norm": 0.0006982145132496953, "learning_rate": 1.6154430929626413e-05, "loss": 0.0001, "step": 24940 }, { "epoch": 5.419200695047785, "grad_norm": 0.9744926691055298, "learning_rate": 1.614085577758471e-05, "loss": 0.0024, "step": 24950 }, { "epoch": 5.421372719374457, "grad_norm": 0.3139134645462036, "learning_rate": 1.6127280625543006e-05, "loss": 0.0073, "step": 24960 }, { "epoch": 5.42354474370113, "grad_norm": 0.0016664909198880196, "learning_rate": 1.6113705473501302e-05, "loss": 0.0025, "step": 24970 }, { "epoch": 5.425716768027802, "grad_norm": 0.0018273413879796863, "learning_rate": 1.61001303214596e-05, "loss": 0.0001, "step": 24980 }, { "epoch": 5.427888792354475, "grad_norm": 0.009619076736271381, "learning_rate": 1.60865551694179e-05, "loss": 0.0036, "step": 24990 }, { "epoch": 5.430060816681147, "grad_norm": 0.0006586744566448033, "learning_rate": 1.6072980017376195e-05, "loss": 0.0001, "step": 25000 }, { "epoch": 5.432232841007819, "grad_norm": 0.0006743803387507796, "learning_rate": 1.6059404865334492e-05, "loss": 0.0001, "step": 25010 }, { "epoch": 5.434404865334492, "grad_norm": 0.0007701460854150355, "learning_rate": 1.6045829713292788e-05, "loss": 0.0001, "step": 25020 }, { "epoch": 5.436576889661164, "grad_norm": 0.003409789642319083, "learning_rate": 1.6032254561251088e-05, "loss": 0.0143, "step": 25030 }, { "epoch": 5.438748913987837, "grad_norm": 0.0014798047486692667, "learning_rate": 1.6018679409209385e-05, "loss": 0.0002, "step": 25040 }, { "epoch": 5.440920938314509, "grad_norm": 0.002662486396729946, "learning_rate": 1.600510425716768e-05, "loss": 0.0083, "step": 25050 }, { "epoch": 5.443092962641182, "grad_norm": 0.0006909591029398143, "learning_rate": 1.5991529105125978e-05, "loss": 0.0037, "step": 25060 }, { "epoch": 5.445264986967854, "grad_norm": 0.0006745163118466735, "learning_rate": 1.5977953953084278e-05, "loss": 0.0001, "step": 25070 }, { "epoch": 5.447437011294527, "grad_norm": 0.23285184800624847, "learning_rate": 1.5964378801042574e-05, "loss": 0.0047, "step": 25080 }, { "epoch": 5.449609035621199, "grad_norm": 0.0006249352591112256, "learning_rate": 1.595080364900087e-05, "loss": 0.0001, "step": 25090 }, { "epoch": 5.451781059947871, "grad_norm": 0.0006067950162105262, "learning_rate": 1.5937228496959167e-05, "loss": 0.0013, "step": 25100 }, { "epoch": 5.453953084274544, "grad_norm": 5.546998500823975, "learning_rate": 1.5923653344917464e-05, "loss": 0.0486, "step": 25110 }, { "epoch": 5.456125108601216, "grad_norm": 0.0011793351732194424, "learning_rate": 1.591007819287576e-05, "loss": 0.0135, "step": 25120 }, { "epoch": 5.458297132927889, "grad_norm": 0.0008291056728921831, "learning_rate": 1.5896503040834057e-05, "loss": 0.0001, "step": 25130 }, { "epoch": 5.460469157254561, "grad_norm": 0.0031490300316363573, "learning_rate": 1.5882927888792356e-05, "loss": 0.0002, "step": 25140 }, { "epoch": 5.462641181581233, "grad_norm": 0.0009691480663605034, "learning_rate": 1.5869352736750653e-05, "loss": 0.0021, "step": 25150 }, { "epoch": 5.464813205907906, "grad_norm": 0.002213733736425638, "learning_rate": 1.585577758470895e-05, "loss": 0.0059, "step": 25160 }, { "epoch": 5.466985230234578, "grad_norm": 0.0017564035952091217, "learning_rate": 1.5842202432667246e-05, "loss": 0.0001, "step": 25170 }, { "epoch": 5.469157254561251, "grad_norm": 0.0007754802936688066, "learning_rate": 1.5828627280625543e-05, "loss": 0.0061, "step": 25180 }, { "epoch": 5.471329278887923, "grad_norm": 0.0006599400658160448, "learning_rate": 1.581505212858384e-05, "loss": 0.0001, "step": 25190 }, { "epoch": 5.473501303214596, "grad_norm": 0.0007077509071677923, "learning_rate": 1.5801476976542136e-05, "loss": 0.0001, "step": 25200 }, { "epoch": 5.475673327541268, "grad_norm": 0.0008078523096628487, "learning_rate": 1.5787901824500435e-05, "loss": 0.0001, "step": 25210 }, { "epoch": 5.477845351867941, "grad_norm": 0.0027974487747997046, "learning_rate": 1.5774326672458732e-05, "loss": 0.0001, "step": 25220 }, { "epoch": 5.480017376194613, "grad_norm": 0.00064576615113765, "learning_rate": 1.5760751520417032e-05, "loss": 0.0001, "step": 25230 }, { "epoch": 5.4821894005212854, "grad_norm": 0.0006342840497381985, "learning_rate": 1.574717636837533e-05, "loss": 0.0001, "step": 25240 }, { "epoch": 5.484361424847958, "grad_norm": 0.0006040172302164137, "learning_rate": 1.5733601216333625e-05, "loss": 0.0001, "step": 25250 }, { "epoch": 5.4865334491746305, "grad_norm": 0.0007217189413495362, "learning_rate": 1.572002606429192e-05, "loss": 0.0015, "step": 25260 }, { "epoch": 5.4887054735013034, "grad_norm": 1.1857420206069946, "learning_rate": 1.5706450912250218e-05, "loss": 0.0371, "step": 25270 }, { "epoch": 5.4908774978279755, "grad_norm": 0.0006299542728811502, "learning_rate": 1.5692875760208514e-05, "loss": 0.0001, "step": 25280 }, { "epoch": 5.4930495221546485, "grad_norm": 0.0006110373069532216, "learning_rate": 1.567930060816681e-05, "loss": 0.0086, "step": 25290 }, { "epoch": 5.4952215464813206, "grad_norm": 0.0006376666133292019, "learning_rate": 1.566572545612511e-05, "loss": 0.0001, "step": 25300 }, { "epoch": 5.4973935708079935, "grad_norm": 0.000611914845649153, "learning_rate": 1.5652150304083407e-05, "loss": 0.0034, "step": 25310 }, { "epoch": 5.499565595134666, "grad_norm": 0.0006409480702131987, "learning_rate": 1.5638575152041704e-05, "loss": 0.0001, "step": 25320 }, { "epoch": 5.501737619461338, "grad_norm": 0.0006519390735775232, "learning_rate": 1.5625e-05, "loss": 0.0349, "step": 25330 }, { "epoch": 5.503909643788011, "grad_norm": 0.004979894496500492, "learning_rate": 1.5611424847958297e-05, "loss": 0.0001, "step": 25340 }, { "epoch": 5.506081668114683, "grad_norm": 0.003943281248211861, "learning_rate": 1.5597849695916593e-05, "loss": 0.0001, "step": 25350 }, { "epoch": 5.508253692441356, "grad_norm": 0.0005990856443531811, "learning_rate": 1.558427454387489e-05, "loss": 0.0001, "step": 25360 }, { "epoch": 5.510425716768028, "grad_norm": 0.0006143053760752082, "learning_rate": 1.557069939183319e-05, "loss": 0.0001, "step": 25370 }, { "epoch": 5.5125977410947, "grad_norm": 0.000608195026870817, "learning_rate": 1.5557124239791486e-05, "loss": 0.0001, "step": 25380 }, { "epoch": 5.514769765421373, "grad_norm": 0.003303609788417816, "learning_rate": 1.5543549087749783e-05, "loss": 0.0002, "step": 25390 }, { "epoch": 5.516941789748045, "grad_norm": 0.0005992467049509287, "learning_rate": 1.552997393570808e-05, "loss": 0.0001, "step": 25400 }, { "epoch": 5.519113814074718, "grad_norm": 0.0005971924983896315, "learning_rate": 1.551639878366638e-05, "loss": 0.0001, "step": 25410 }, { "epoch": 5.52128583840139, "grad_norm": 0.17835816740989685, "learning_rate": 1.5502823631624676e-05, "loss": 0.0314, "step": 25420 }, { "epoch": 5.523457862728063, "grad_norm": 0.0006768538733012974, "learning_rate": 1.5489248479582972e-05, "loss": 0.0001, "step": 25430 }, { "epoch": 5.525629887054735, "grad_norm": 0.0006399670382961631, "learning_rate": 1.547567332754127e-05, "loss": 0.0001, "step": 25440 }, { "epoch": 5.527801911381408, "grad_norm": 0.0006358442478813231, "learning_rate": 1.546209817549957e-05, "loss": 0.0001, "step": 25450 }, { "epoch": 5.52997393570808, "grad_norm": 0.0006128513487055898, "learning_rate": 1.5448523023457865e-05, "loss": 0.0052, "step": 25460 }, { "epoch": 5.532145960034752, "grad_norm": 0.0007636906229890883, "learning_rate": 1.543494787141616e-05, "loss": 0.0001, "step": 25470 }, { "epoch": 5.534317984361425, "grad_norm": 0.0006304889684543014, "learning_rate": 1.5421372719374458e-05, "loss": 0.0001, "step": 25480 }, { "epoch": 5.536490008688097, "grad_norm": 0.03822551667690277, "learning_rate": 1.5407797567332755e-05, "loss": 0.027, "step": 25490 }, { "epoch": 5.53866203301477, "grad_norm": 0.0006133398273959756, "learning_rate": 1.539422241529105e-05, "loss": 0.0001, "step": 25500 }, { "epoch": 5.540834057341442, "grad_norm": 0.0024534007534384727, "learning_rate": 1.5380647263249348e-05, "loss": 0.0014, "step": 25510 }, { "epoch": 5.543006081668115, "grad_norm": 0.0006024197209626436, "learning_rate": 1.5367072111207644e-05, "loss": 0.0001, "step": 25520 }, { "epoch": 5.545178105994787, "grad_norm": 0.0012399987317621708, "learning_rate": 1.5353496959165944e-05, "loss": 0.0082, "step": 25530 }, { "epoch": 5.54735013032146, "grad_norm": 0.0005957476096227765, "learning_rate": 1.533992180712424e-05, "loss": 0.0001, "step": 25540 }, { "epoch": 5.549522154648132, "grad_norm": 0.0005901667755097151, "learning_rate": 1.5326346655082537e-05, "loss": 0.0061, "step": 25550 }, { "epoch": 5.551694178974804, "grad_norm": 0.0007840208127163351, "learning_rate": 1.5312771503040834e-05, "loss": 0.0001, "step": 25560 }, { "epoch": 5.553866203301477, "grad_norm": 0.0030813610646873713, "learning_rate": 1.529919635099913e-05, "loss": 0.0001, "step": 25570 }, { "epoch": 5.556038227628149, "grad_norm": 0.0005978733533993363, "learning_rate": 1.5285621198957427e-05, "loss": 0.0001, "step": 25580 }, { "epoch": 5.558210251954822, "grad_norm": 0.0018240666249766946, "learning_rate": 1.5272046046915726e-05, "loss": 0.0001, "step": 25590 }, { "epoch": 5.560382276281494, "grad_norm": 0.003863741410896182, "learning_rate": 1.5258470894874025e-05, "loss": 0.0001, "step": 25600 }, { "epoch": 5.562554300608166, "grad_norm": 0.0005884706042706966, "learning_rate": 1.5244895742832321e-05, "loss": 0.0166, "step": 25610 }, { "epoch": 5.564726324934839, "grad_norm": 0.0005884277052246034, "learning_rate": 1.5231320590790618e-05, "loss": 0.0001, "step": 25620 }, { "epoch": 5.566898349261511, "grad_norm": 0.0018177337478846312, "learning_rate": 1.5217745438748914e-05, "loss": 0.0002, "step": 25630 }, { "epoch": 5.569070373588184, "grad_norm": 0.0005721076158806682, "learning_rate": 1.5204170286707212e-05, "loss": 0.0001, "step": 25640 }, { "epoch": 5.571242397914856, "grad_norm": 3.635812997817993, "learning_rate": 1.5190595134665509e-05, "loss": 0.0493, "step": 25650 }, { "epoch": 5.573414422241529, "grad_norm": 0.0006589922704733908, "learning_rate": 1.5177019982623805e-05, "loss": 0.0001, "step": 25660 }, { "epoch": 5.5755864465682015, "grad_norm": 0.0009152049897238612, "learning_rate": 1.5163444830582102e-05, "loss": 0.0001, "step": 25670 }, { "epoch": 5.577758470894874, "grad_norm": 0.0008373759919777513, "learning_rate": 1.5149869678540402e-05, "loss": 0.0001, "step": 25680 }, { "epoch": 5.5799304952215465, "grad_norm": 0.0024861509446054697, "learning_rate": 1.5136294526498698e-05, "loss": 0.0001, "step": 25690 }, { "epoch": 5.582102519548219, "grad_norm": 0.0006951112300157547, "learning_rate": 1.5122719374456995e-05, "loss": 0.0005, "step": 25700 }, { "epoch": 5.5842745438748915, "grad_norm": 0.0007802178151905537, "learning_rate": 1.5109144222415291e-05, "loss": 0.0001, "step": 25710 }, { "epoch": 5.586446568201564, "grad_norm": 0.002290277276188135, "learning_rate": 1.5095569070373588e-05, "loss": 0.0247, "step": 25720 }, { "epoch": 5.588618592528237, "grad_norm": 0.0008800785290077329, "learning_rate": 1.5081993918331886e-05, "loss": 0.0001, "step": 25730 }, { "epoch": 5.590790616854909, "grad_norm": 0.013386576436460018, "learning_rate": 1.5068418766290183e-05, "loss": 0.0001, "step": 25740 }, { "epoch": 5.592962641181582, "grad_norm": 0.0014391910517588258, "learning_rate": 1.5054843614248482e-05, "loss": 0.0004, "step": 25750 }, { "epoch": 5.595134665508254, "grad_norm": 0.0017789318226277828, "learning_rate": 1.5041268462206779e-05, "loss": 0.0001, "step": 25760 }, { "epoch": 5.597306689834927, "grad_norm": 0.0007133111357688904, "learning_rate": 1.5027693310165076e-05, "loss": 0.0052, "step": 25770 }, { "epoch": 5.599478714161599, "grad_norm": 0.0008147881599143147, "learning_rate": 1.5014118158123372e-05, "loss": 0.0001, "step": 25780 }, { "epoch": 5.601650738488271, "grad_norm": 0.0018234155140817165, "learning_rate": 1.5000543006081669e-05, "loss": 0.0002, "step": 25790 }, { "epoch": 5.603822762814944, "grad_norm": 0.0006993436836637557, "learning_rate": 1.4986967854039965e-05, "loss": 0.0001, "step": 25800 }, { "epoch": 5.605994787141616, "grad_norm": 0.0007021636120043695, "learning_rate": 1.4973392701998262e-05, "loss": 0.0055, "step": 25810 }, { "epoch": 5.608166811468289, "grad_norm": 0.0006139131146483123, "learning_rate": 1.495981754995656e-05, "loss": 0.0001, "step": 25820 }, { "epoch": 5.610338835794961, "grad_norm": 0.0006054277182556689, "learning_rate": 1.4946242397914858e-05, "loss": 0.0001, "step": 25830 }, { "epoch": 5.612510860121633, "grad_norm": 0.0033255741000175476, "learning_rate": 1.4932667245873156e-05, "loss": 0.047, "step": 25840 }, { "epoch": 5.614682884448306, "grad_norm": 0.0007535509066656232, "learning_rate": 1.4919092093831453e-05, "loss": 0.0004, "step": 25850 }, { "epoch": 5.616854908774978, "grad_norm": 0.0018026516772806644, "learning_rate": 1.490551694178975e-05, "loss": 0.0002, "step": 25860 }, { "epoch": 5.619026933101651, "grad_norm": 0.03184160590171814, "learning_rate": 1.4891941789748046e-05, "loss": 0.0049, "step": 25870 }, { "epoch": 5.621198957428323, "grad_norm": 0.0012461596634238958, "learning_rate": 1.4878366637706342e-05, "loss": 0.0001, "step": 25880 }, { "epoch": 5.623370981754996, "grad_norm": 0.0024950348306447268, "learning_rate": 1.4864791485664639e-05, "loss": 0.0001, "step": 25890 }, { "epoch": 5.625543006081668, "grad_norm": 0.006434556096792221, "learning_rate": 1.4851216333622935e-05, "loss": 0.0001, "step": 25900 }, { "epoch": 5.627715030408341, "grad_norm": 0.0007220886182039976, "learning_rate": 1.4837641181581235e-05, "loss": 0.0001, "step": 25910 }, { "epoch": 5.629887054735013, "grad_norm": 0.0008968530455604196, "learning_rate": 1.4824066029539532e-05, "loss": 0.0265, "step": 25920 }, { "epoch": 5.632059079061685, "grad_norm": 0.011500025168061256, "learning_rate": 1.481049087749783e-05, "loss": 0.0002, "step": 25930 }, { "epoch": 5.634231103388358, "grad_norm": 0.0007694981177337468, "learning_rate": 1.4796915725456126e-05, "loss": 0.0007, "step": 25940 }, { "epoch": 5.63640312771503, "grad_norm": 0.0009162534261122346, "learning_rate": 1.4783340573414423e-05, "loss": 0.0001, "step": 25950 }, { "epoch": 5.638575152041703, "grad_norm": 0.0019229091703891754, "learning_rate": 1.476976542137272e-05, "loss": 0.0002, "step": 25960 }, { "epoch": 5.640747176368375, "grad_norm": 0.0014625373296439648, "learning_rate": 1.4756190269331016e-05, "loss": 0.0001, "step": 25970 }, { "epoch": 5.642919200695048, "grad_norm": 0.0010808553779497743, "learning_rate": 1.4742615117289316e-05, "loss": 0.0012, "step": 25980 }, { "epoch": 5.64509122502172, "grad_norm": 0.00990188866853714, "learning_rate": 1.4729039965247612e-05, "loss": 0.0002, "step": 25990 }, { "epoch": 5.647263249348393, "grad_norm": 0.056729722768068314, "learning_rate": 1.4715464813205909e-05, "loss": 0.0002, "step": 26000 }, { "epoch": 5.649435273675065, "grad_norm": 0.0019011656986549497, "learning_rate": 1.4701889661164205e-05, "loss": 0.0366, "step": 26010 }, { "epoch": 5.651607298001737, "grad_norm": 0.0027743070386350155, "learning_rate": 1.4688314509122503e-05, "loss": 0.0003, "step": 26020 }, { "epoch": 5.65377932232841, "grad_norm": 0.004712869878858328, "learning_rate": 1.46747393570808e-05, "loss": 0.0002, "step": 26030 }, { "epoch": 5.655951346655082, "grad_norm": 0.0006625755340792239, "learning_rate": 1.4661164205039096e-05, "loss": 0.0004, "step": 26040 }, { "epoch": 5.658123370981755, "grad_norm": 0.026191281154751778, "learning_rate": 1.4647589052997393e-05, "loss": 0.0005, "step": 26050 }, { "epoch": 5.660295395308427, "grad_norm": 0.000669986882712692, "learning_rate": 1.4634013900955693e-05, "loss": 0.0181, "step": 26060 }, { "epoch": 5.6624674196350995, "grad_norm": 0.0005998788401484489, "learning_rate": 1.462043874891399e-05, "loss": 0.0001, "step": 26070 }, { "epoch": 5.664639443961772, "grad_norm": 0.0005905433208681643, "learning_rate": 1.4606863596872286e-05, "loss": 0.0001, "step": 26080 }, { "epoch": 5.6668114682884445, "grad_norm": 0.0005546990432776511, "learning_rate": 1.4593288444830582e-05, "loss": 0.0181, "step": 26090 }, { "epoch": 5.6689834926151175, "grad_norm": 0.0005743417423218489, "learning_rate": 1.4579713292788879e-05, "loss": 0.0003, "step": 26100 }, { "epoch": 5.6711555169417895, "grad_norm": 0.0017763548530638218, "learning_rate": 1.4566138140747177e-05, "loss": 0.0001, "step": 26110 }, { "epoch": 5.6733275412684625, "grad_norm": 0.0005718552274629474, "learning_rate": 1.4552562988705474e-05, "loss": 0.0001, "step": 26120 }, { "epoch": 5.675499565595135, "grad_norm": 0.0005786096444353461, "learning_rate": 1.453898783666377e-05, "loss": 0.0001, "step": 26130 }, { "epoch": 5.6776715899218075, "grad_norm": 0.0006460116128437221, "learning_rate": 1.452541268462207e-05, "loss": 0.0234, "step": 26140 }, { "epoch": 5.67984361424848, "grad_norm": 0.0006802164134569466, "learning_rate": 1.4511837532580367e-05, "loss": 0.0077, "step": 26150 }, { "epoch": 5.682015638575152, "grad_norm": 0.017687102779746056, "learning_rate": 1.4498262380538663e-05, "loss": 0.0003, "step": 26160 }, { "epoch": 5.684187662901825, "grad_norm": 0.000584051595069468, "learning_rate": 1.448468722849696e-05, "loss": 0.0003, "step": 26170 }, { "epoch": 5.686359687228497, "grad_norm": 0.004040115978568792, "learning_rate": 1.4471112076455256e-05, "loss": 0.0015, "step": 26180 }, { "epoch": 5.68853171155517, "grad_norm": 0.019951876252889633, "learning_rate": 1.4457536924413553e-05, "loss": 0.0004, "step": 26190 }, { "epoch": 5.690703735881842, "grad_norm": 0.0008795502944849432, "learning_rate": 1.444396177237185e-05, "loss": 0.0004, "step": 26200 }, { "epoch": 5.692875760208515, "grad_norm": 0.004787113983184099, "learning_rate": 1.4430386620330149e-05, "loss": 0.0002, "step": 26210 }, { "epoch": 5.695047784535187, "grad_norm": 0.0067748213186860085, "learning_rate": 1.4416811468288447e-05, "loss": 0.0004, "step": 26220 }, { "epoch": 5.69721980886186, "grad_norm": 0.008101826533675194, "learning_rate": 1.4403236316246744e-05, "loss": 0.0002, "step": 26230 }, { "epoch": 5.699391833188532, "grad_norm": 0.0006347130984067917, "learning_rate": 1.438966116420504e-05, "loss": 0.0001, "step": 26240 }, { "epoch": 5.701563857515204, "grad_norm": 0.0006116887088865042, "learning_rate": 1.4376086012163337e-05, "loss": 0.0001, "step": 26250 }, { "epoch": 5.703735881841877, "grad_norm": 0.0005490531329996884, "learning_rate": 1.4362510860121633e-05, "loss": 0.0003, "step": 26260 }, { "epoch": 5.705907906168549, "grad_norm": 0.0005719310138374567, "learning_rate": 1.434893570807993e-05, "loss": 0.0001, "step": 26270 }, { "epoch": 5.708079930495222, "grad_norm": 0.01422004122287035, "learning_rate": 1.4335360556038226e-05, "loss": 0.0228, "step": 26280 }, { "epoch": 5.710251954821894, "grad_norm": 0.0006018744898028672, "learning_rate": 1.4321785403996526e-05, "loss": 0.0002, "step": 26290 }, { "epoch": 5.712423979148566, "grad_norm": 0.0006384833832271397, "learning_rate": 1.4308210251954823e-05, "loss": 0.0001, "step": 26300 }, { "epoch": 5.714596003475239, "grad_norm": 0.006105415057390928, "learning_rate": 1.4294635099913121e-05, "loss": 0.001, "step": 26310 }, { "epoch": 5.716768027801911, "grad_norm": 0.0006127614760771394, "learning_rate": 1.4281059947871417e-05, "loss": 0.0047, "step": 26320 }, { "epoch": 5.718940052128584, "grad_norm": 0.0006579833570867777, "learning_rate": 1.4267484795829714e-05, "loss": 0.0007, "step": 26330 }, { "epoch": 5.721112076455256, "grad_norm": 0.0005666995421051979, "learning_rate": 1.425390964378801e-05, "loss": 0.0065, "step": 26340 }, { "epoch": 5.723284100781929, "grad_norm": 0.0005409110453911126, "learning_rate": 1.4240334491746307e-05, "loss": 0.0003, "step": 26350 }, { "epoch": 5.725456125108601, "grad_norm": 0.0005358332418836653, "learning_rate": 1.4226759339704607e-05, "loss": 0.0067, "step": 26360 }, { "epoch": 5.727628149435274, "grad_norm": 0.007814955897629261, "learning_rate": 1.4213184187662903e-05, "loss": 0.0007, "step": 26370 }, { "epoch": 5.729800173761946, "grad_norm": 0.0005765220848843455, "learning_rate": 1.41996090356212e-05, "loss": 0.0005, "step": 26380 }, { "epoch": 5.731972198088618, "grad_norm": 0.006794157437980175, "learning_rate": 1.4186033883579496e-05, "loss": 0.0002, "step": 26390 }, { "epoch": 5.734144222415291, "grad_norm": 0.000567117880564183, "learning_rate": 1.4172458731537795e-05, "loss": 0.0002, "step": 26400 }, { "epoch": 5.736316246741963, "grad_norm": 0.0006249643629416823, "learning_rate": 1.4158883579496091e-05, "loss": 0.0001, "step": 26410 }, { "epoch": 5.738488271068636, "grad_norm": 0.0005337175680324435, "learning_rate": 1.4145308427454388e-05, "loss": 0.0001, "step": 26420 }, { "epoch": 5.740660295395308, "grad_norm": 0.0005305648664943874, "learning_rate": 1.4131733275412684e-05, "loss": 0.0004, "step": 26430 }, { "epoch": 5.742832319721981, "grad_norm": 0.0005393430474214256, "learning_rate": 1.4118158123370984e-05, "loss": 0.0063, "step": 26440 }, { "epoch": 5.745004344048653, "grad_norm": 0.0005549487541429698, "learning_rate": 1.410458297132928e-05, "loss": 0.0001, "step": 26450 }, { "epoch": 5.747176368375325, "grad_norm": 0.02173413708806038, "learning_rate": 1.4091007819287577e-05, "loss": 0.0003, "step": 26460 }, { "epoch": 5.749348392701998, "grad_norm": 0.008116286247968674, "learning_rate": 1.4077432667245873e-05, "loss": 0.0005, "step": 26470 }, { "epoch": 5.7515204170286705, "grad_norm": 0.0005138172418810427, "learning_rate": 1.406385751520417e-05, "loss": 0.0002, "step": 26480 }, { "epoch": 5.753692441355343, "grad_norm": 0.0005188611685298383, "learning_rate": 1.4050282363162468e-05, "loss": 0.0014, "step": 26490 }, { "epoch": 5.7558644656820155, "grad_norm": 0.0005352711887098849, "learning_rate": 1.4036707211120765e-05, "loss": 0.0039, "step": 26500 }, { "epoch": 5.7580364900086884, "grad_norm": 0.0005132692167535424, "learning_rate": 1.4023132059079061e-05, "loss": 0.007, "step": 26510 }, { "epoch": 5.7602085143353605, "grad_norm": 0.000535594008397311, "learning_rate": 1.4009556907037361e-05, "loss": 0.0001, "step": 26520 }, { "epoch": 5.762380538662033, "grad_norm": 0.0005138751002959907, "learning_rate": 1.3995981754995658e-05, "loss": 0.0001, "step": 26530 }, { "epoch": 5.7645525629887056, "grad_norm": 0.0005084550939500332, "learning_rate": 1.3982406602953954e-05, "loss": 0.0001, "step": 26540 }, { "epoch": 5.766724587315378, "grad_norm": 0.000515693100169301, "learning_rate": 1.396883145091225e-05, "loss": 0.0001, "step": 26550 }, { "epoch": 5.768896611642051, "grad_norm": 0.0007487752009183168, "learning_rate": 1.3955256298870547e-05, "loss": 0.0003, "step": 26560 }, { "epoch": 5.771068635968723, "grad_norm": 0.0005719950422644615, "learning_rate": 1.3941681146828844e-05, "loss": 0.0001, "step": 26570 }, { "epoch": 5.773240660295396, "grad_norm": 0.0005108347395434976, "learning_rate": 1.3928105994787142e-05, "loss": 0.0001, "step": 26580 }, { "epoch": 5.775412684622068, "grad_norm": 0.0005025700083933771, "learning_rate": 1.391453084274544e-05, "loss": 0.0001, "step": 26590 }, { "epoch": 5.777584708948741, "grad_norm": 0.0005161292501725256, "learning_rate": 1.3900955690703738e-05, "loss": 0.0001, "step": 26600 }, { "epoch": 5.779756733275413, "grad_norm": 0.000515054736752063, "learning_rate": 1.3887380538662035e-05, "loss": 0.0001, "step": 26610 }, { "epoch": 5.781928757602085, "grad_norm": 0.0005010199383832514, "learning_rate": 1.3873805386620331e-05, "loss": 0.0001, "step": 26620 }, { "epoch": 5.784100781928758, "grad_norm": 0.0005252442206256092, "learning_rate": 1.3860230234578628e-05, "loss": 0.0001, "step": 26630 }, { "epoch": 5.78627280625543, "grad_norm": 0.0005354645545594394, "learning_rate": 1.3846655082536924e-05, "loss": 0.0001, "step": 26640 }, { "epoch": 5.788444830582103, "grad_norm": 0.0004989413428120315, "learning_rate": 1.383307993049522e-05, "loss": 0.003, "step": 26650 }, { "epoch": 5.790616854908775, "grad_norm": 0.0004969520960003138, "learning_rate": 1.3819504778453517e-05, "loss": 0.0001, "step": 26660 }, { "epoch": 5.792788879235447, "grad_norm": 0.0005011470057070255, "learning_rate": 1.3805929626411817e-05, "loss": 0.0001, "step": 26670 }, { "epoch": 5.79496090356212, "grad_norm": 0.0004997824435122311, "learning_rate": 1.3792354474370114e-05, "loss": 0.0001, "step": 26680 }, { "epoch": 5.797132927888792, "grad_norm": 0.0004935134784318507, "learning_rate": 1.3778779322328412e-05, "loss": 0.001, "step": 26690 }, { "epoch": 5.799304952215465, "grad_norm": 0.000501400965731591, "learning_rate": 1.3765204170286708e-05, "loss": 0.0001, "step": 26700 }, { "epoch": 5.801476976542137, "grad_norm": 0.004372824914753437, "learning_rate": 1.3751629018245005e-05, "loss": 0.0002, "step": 26710 }, { "epoch": 5.80364900086881, "grad_norm": 0.0005527559551410377, "learning_rate": 1.3738053866203301e-05, "loss": 0.0607, "step": 26720 }, { "epoch": 5.805821025195482, "grad_norm": 0.0007034401642158628, "learning_rate": 1.3724478714161598e-05, "loss": 0.0001, "step": 26730 }, { "epoch": 5.807993049522155, "grad_norm": 0.0029618972912430763, "learning_rate": 1.3710903562119894e-05, "loss": 0.0001, "step": 26740 }, { "epoch": 5.810165073848827, "grad_norm": 0.0008377031190320849, "learning_rate": 1.3697328410078194e-05, "loss": 0.023, "step": 26750 }, { "epoch": 5.812337098175499, "grad_norm": 0.0009827688336372375, "learning_rate": 1.368375325803649e-05, "loss": 0.0001, "step": 26760 }, { "epoch": 5.814509122502172, "grad_norm": 0.0007727128686383367, "learning_rate": 1.3670178105994787e-05, "loss": 0.0006, "step": 26770 }, { "epoch": 5.816681146828844, "grad_norm": 0.0007160462555475533, "learning_rate": 1.3656602953953086e-05, "loss": 0.0001, "step": 26780 }, { "epoch": 5.818853171155517, "grad_norm": 0.0021952898241579533, "learning_rate": 1.3643027801911382e-05, "loss": 0.0514, "step": 26790 }, { "epoch": 5.821025195482189, "grad_norm": 0.008272473700344563, "learning_rate": 1.3629452649869679e-05, "loss": 0.0007, "step": 26800 }, { "epoch": 5.823197219808862, "grad_norm": 0.0034944340586662292, "learning_rate": 1.3615877497827975e-05, "loss": 0.0028, "step": 26810 }, { "epoch": 5.825369244135534, "grad_norm": 0.015482169575989246, "learning_rate": 1.3602302345786275e-05, "loss": 0.0005, "step": 26820 }, { "epoch": 5.827541268462207, "grad_norm": 0.0030670249834656715, "learning_rate": 1.3588727193744571e-05, "loss": 0.0012, "step": 26830 }, { "epoch": 5.829713292788879, "grad_norm": 0.17072609066963196, "learning_rate": 1.3575152041702868e-05, "loss": 0.0005, "step": 26840 }, { "epoch": 5.831885317115551, "grad_norm": 0.0022793428506702185, "learning_rate": 1.3561576889661164e-05, "loss": 0.0071, "step": 26850 }, { "epoch": 5.834057341442224, "grad_norm": 0.0021639687474817038, "learning_rate": 1.3548001737619461e-05, "loss": 0.0002, "step": 26860 }, { "epoch": 5.836229365768896, "grad_norm": 0.0018504821928218007, "learning_rate": 1.353442658557776e-05, "loss": 0.0002, "step": 26870 }, { "epoch": 5.838401390095569, "grad_norm": 0.0019850528333336115, "learning_rate": 1.3520851433536056e-05, "loss": 0.0002, "step": 26880 }, { "epoch": 5.840573414422241, "grad_norm": 0.002545407973229885, "learning_rate": 1.3507276281494352e-05, "loss": 0.0002, "step": 26890 }, { "epoch": 5.8427454387489135, "grad_norm": 0.0014092468190938234, "learning_rate": 1.3493701129452652e-05, "loss": 0.0003, "step": 26900 }, { "epoch": 5.8449174630755865, "grad_norm": 0.0015003462322056293, "learning_rate": 1.3480125977410949e-05, "loss": 0.0001, "step": 26910 }, { "epoch": 5.8470894874022585, "grad_norm": 0.0012168603716418147, "learning_rate": 1.3466550825369245e-05, "loss": 0.0003, "step": 26920 }, { "epoch": 5.8492615117289315, "grad_norm": 0.0010929395211860538, "learning_rate": 1.3452975673327542e-05, "loss": 0.0002, "step": 26930 }, { "epoch": 5.851433536055604, "grad_norm": 0.001105991075746715, "learning_rate": 1.3439400521285838e-05, "loss": 0.0028, "step": 26940 }, { "epoch": 5.8536055603822765, "grad_norm": 0.0009902853053063154, "learning_rate": 1.3425825369244135e-05, "loss": 0.0001, "step": 26950 }, { "epoch": 5.855777584708949, "grad_norm": 0.0010550229344516993, "learning_rate": 1.3412250217202433e-05, "loss": 0.0061, "step": 26960 }, { "epoch": 5.857949609035622, "grad_norm": 0.0009797523962333798, "learning_rate": 1.3398675065160731e-05, "loss": 0.0001, "step": 26970 }, { "epoch": 5.860121633362294, "grad_norm": 0.0010357762221246958, "learning_rate": 1.338509991311903e-05, "loss": 0.0001, "step": 26980 }, { "epoch": 5.862293657688966, "grad_norm": 0.0010978849604725838, "learning_rate": 1.3371524761077326e-05, "loss": 0.0001, "step": 26990 }, { "epoch": 5.864465682015639, "grad_norm": 0.0009231647709384561, "learning_rate": 1.3357949609035622e-05, "loss": 0.0001, "step": 27000 }, { "epoch": 5.866637706342311, "grad_norm": 0.000857473467476666, "learning_rate": 1.3344374456993919e-05, "loss": 0.0001, "step": 27010 }, { "epoch": 5.868809730668984, "grad_norm": 0.0011523263528943062, "learning_rate": 1.3330799304952215e-05, "loss": 0.0003, "step": 27020 }, { "epoch": 5.870981754995656, "grad_norm": 0.002420986071228981, "learning_rate": 1.3317224152910512e-05, "loss": 0.0537, "step": 27030 }, { "epoch": 5.873153779322329, "grad_norm": 0.007347964681684971, "learning_rate": 1.3303649000868808e-05, "loss": 0.0003, "step": 27040 }, { "epoch": 5.875325803649001, "grad_norm": 0.002605201443657279, "learning_rate": 1.3290073848827108e-05, "loss": 0.0004, "step": 27050 }, { "epoch": 5.877497827975674, "grad_norm": 0.0027562305331230164, "learning_rate": 1.3276498696785405e-05, "loss": 0.0004, "step": 27060 }, { "epoch": 5.879669852302346, "grad_norm": 0.5885543823242188, "learning_rate": 1.3262923544743703e-05, "loss": 0.0023, "step": 27070 }, { "epoch": 5.881841876629018, "grad_norm": 0.0020992341451346874, "learning_rate": 1.3249348392702e-05, "loss": 0.0002, "step": 27080 }, { "epoch": 5.884013900955691, "grad_norm": 0.0018192260758951306, "learning_rate": 1.3235773240660296e-05, "loss": 0.0003, "step": 27090 }, { "epoch": 5.886185925282363, "grad_norm": 0.0018530006054788828, "learning_rate": 1.3222198088618592e-05, "loss": 0.0105, "step": 27100 }, { "epoch": 5.888357949609036, "grad_norm": 0.002540824469178915, "learning_rate": 1.3208622936576889e-05, "loss": 0.0002, "step": 27110 }, { "epoch": 5.890529973935708, "grad_norm": 0.0017492754850536585, "learning_rate": 1.3195047784535185e-05, "loss": 0.0002, "step": 27120 }, { "epoch": 5.89270199826238, "grad_norm": 0.001549109467305243, "learning_rate": 1.3181472632493485e-05, "loss": 0.0002, "step": 27130 }, { "epoch": 5.894874022589053, "grad_norm": 0.0017339596524834633, "learning_rate": 1.3167897480451782e-05, "loss": 0.0064, "step": 27140 }, { "epoch": 5.897046046915725, "grad_norm": 0.0021280774381011724, "learning_rate": 1.3154322328410078e-05, "loss": 0.0398, "step": 27150 }, { "epoch": 5.899218071242398, "grad_norm": 0.004335555247962475, "learning_rate": 1.3140747176368377e-05, "loss": 0.0002, "step": 27160 }, { "epoch": 5.90139009556907, "grad_norm": 0.011722725816071033, "learning_rate": 1.3127172024326673e-05, "loss": 0.0004, "step": 27170 }, { "epoch": 5.903562119895743, "grad_norm": 0.0019563138484954834, "learning_rate": 1.311359687228497e-05, "loss": 0.0004, "step": 27180 }, { "epoch": 5.905734144222415, "grad_norm": 0.002340024570003152, "learning_rate": 1.3100021720243266e-05, "loss": 0.0003, "step": 27190 }, { "epoch": 5.907906168549088, "grad_norm": 0.0030410848557949066, "learning_rate": 1.3086446568201566e-05, "loss": 0.0002, "step": 27200 }, { "epoch": 5.91007819287576, "grad_norm": 0.0047626374289393425, "learning_rate": 1.3072871416159863e-05, "loss": 0.0002, "step": 27210 }, { "epoch": 5.912250217202432, "grad_norm": 0.001380560570396483, "learning_rate": 1.3059296264118159e-05, "loss": 0.0004, "step": 27220 }, { "epoch": 5.914422241529105, "grad_norm": 0.0029822904616594315, "learning_rate": 1.3045721112076456e-05, "loss": 0.0002, "step": 27230 }, { "epoch": 5.916594265855777, "grad_norm": 0.002323774853721261, "learning_rate": 1.3032145960034752e-05, "loss": 0.0002, "step": 27240 }, { "epoch": 5.91876629018245, "grad_norm": 0.004266597796231508, "learning_rate": 1.301857080799305e-05, "loss": 0.0001, "step": 27250 }, { "epoch": 5.920938314509122, "grad_norm": 0.0013143382966518402, "learning_rate": 1.3004995655951347e-05, "loss": 0.0066, "step": 27260 }, { "epoch": 5.923110338835795, "grad_norm": 0.001244851271621883, "learning_rate": 1.2991420503909643e-05, "loss": 0.031, "step": 27270 }, { "epoch": 5.925282363162467, "grad_norm": 0.000747871061321348, "learning_rate": 1.2977845351867943e-05, "loss": 0.0001, "step": 27280 }, { "epoch": 5.92745438748914, "grad_norm": 0.0012308891164138913, "learning_rate": 1.296427019982624e-05, "loss": 0.0002, "step": 27290 }, { "epoch": 5.929626411815812, "grad_norm": 0.0008152248337864876, "learning_rate": 1.2950695047784536e-05, "loss": 0.0001, "step": 27300 }, { "epoch": 5.9317984361424845, "grad_norm": 0.000931603426579386, "learning_rate": 1.2937119895742833e-05, "loss": 0.0041, "step": 27310 }, { "epoch": 5.933970460469157, "grad_norm": 0.0010958875063806772, "learning_rate": 1.292354474370113e-05, "loss": 0.0001, "step": 27320 }, { "epoch": 5.9361424847958295, "grad_norm": 0.001938451430760324, "learning_rate": 1.2909969591659426e-05, "loss": 0.0001, "step": 27330 }, { "epoch": 5.9383145091225025, "grad_norm": 0.0010382839245721698, "learning_rate": 1.2896394439617724e-05, "loss": 0.0034, "step": 27340 }, { "epoch": 5.9404865334491745, "grad_norm": 0.0007932804292067885, "learning_rate": 1.2882819287576022e-05, "loss": 0.0001, "step": 27350 }, { "epoch": 5.942658557775847, "grad_norm": 0.0008296747109852731, "learning_rate": 1.286924413553432e-05, "loss": 0.0002, "step": 27360 }, { "epoch": 5.94483058210252, "grad_norm": 0.0008137888507917523, "learning_rate": 1.2855668983492617e-05, "loss": 0.0001, "step": 27370 }, { "epoch": 5.947002606429192, "grad_norm": 0.0007017937605269253, "learning_rate": 1.2842093831450913e-05, "loss": 0.0001, "step": 27380 }, { "epoch": 5.949174630755865, "grad_norm": 0.001600543037056923, "learning_rate": 1.282851867940921e-05, "loss": 0.0001, "step": 27390 }, { "epoch": 5.951346655082537, "grad_norm": 0.001051751896739006, "learning_rate": 1.2814943527367506e-05, "loss": 0.0001, "step": 27400 }, { "epoch": 5.95351867940921, "grad_norm": 0.000667224929202348, "learning_rate": 1.2801368375325803e-05, "loss": 0.0001, "step": 27410 }, { "epoch": 5.955690703735882, "grad_norm": 0.0007074500899761915, "learning_rate": 1.2787793223284101e-05, "loss": 0.0002, "step": 27420 }, { "epoch": 5.957862728062555, "grad_norm": 0.0007487855036742985, "learning_rate": 1.27742180712424e-05, "loss": 0.0019, "step": 27430 }, { "epoch": 5.960034752389227, "grad_norm": 0.0010091594886034727, "learning_rate": 1.2760642919200696e-05, "loss": 0.0491, "step": 27440 }, { "epoch": 5.962206776715899, "grad_norm": 0.0017638842109590769, "learning_rate": 1.2747067767158994e-05, "loss": 0.0001, "step": 27450 }, { "epoch": 5.964378801042572, "grad_norm": 0.0033730980940163136, "learning_rate": 1.273349261511729e-05, "loss": 0.0001, "step": 27460 }, { "epoch": 5.966550825369244, "grad_norm": 0.0007800173480063677, "learning_rate": 1.2719917463075587e-05, "loss": 0.0001, "step": 27470 }, { "epoch": 5.968722849695917, "grad_norm": 0.6365306973457336, "learning_rate": 1.2706342311033884e-05, "loss": 0.0344, "step": 27480 }, { "epoch": 5.970894874022589, "grad_norm": 0.0016547476407140493, "learning_rate": 1.269276715899218e-05, "loss": 0.0005, "step": 27490 }, { "epoch": 5.973066898349262, "grad_norm": 0.003191007999703288, "learning_rate": 1.2679192006950477e-05, "loss": 0.0007, "step": 27500 }, { "epoch": 5.975238922675934, "grad_norm": 1.2369848489761353, "learning_rate": 1.2665616854908776e-05, "loss": 0.0343, "step": 27510 }, { "epoch": 5.977410947002607, "grad_norm": 0.0017436889465898275, "learning_rate": 1.2652041702867073e-05, "loss": 0.0002, "step": 27520 }, { "epoch": 5.979582971329279, "grad_norm": 0.002784136915579438, "learning_rate": 1.263846655082537e-05, "loss": 0.0007, "step": 27530 }, { "epoch": 5.981754995655951, "grad_norm": 0.003783125663176179, "learning_rate": 1.2624891398783668e-05, "loss": 0.0004, "step": 27540 }, { "epoch": 5.983927019982624, "grad_norm": 0.0025968304835259914, "learning_rate": 1.2611316246741964e-05, "loss": 0.0065, "step": 27550 }, { "epoch": 5.986099044309296, "grad_norm": 0.004436062183231115, "learning_rate": 1.259774109470026e-05, "loss": 0.0004, "step": 27560 }, { "epoch": 5.988271068635969, "grad_norm": 0.001537824748083949, "learning_rate": 1.2584165942658557e-05, "loss": 0.0095, "step": 27570 }, { "epoch": 5.990443092962641, "grad_norm": 0.001534433220513165, "learning_rate": 1.2570590790616857e-05, "loss": 0.0143, "step": 27580 }, { "epoch": 5.992615117289313, "grad_norm": 0.004164781887084246, "learning_rate": 1.2557015638575154e-05, "loss": 0.0046, "step": 27590 }, { "epoch": 5.994787141615986, "grad_norm": 0.004971928428858519, "learning_rate": 1.254344048653345e-05, "loss": 0.0005, "step": 27600 }, { "epoch": 5.996959165942658, "grad_norm": 0.007716748397797346, "learning_rate": 1.2529865334491747e-05, "loss": 0.0005, "step": 27610 }, { "epoch": 5.999131190269331, "grad_norm": 0.0008516062516719103, "learning_rate": 1.2516290182450043e-05, "loss": 0.0154, "step": 27620 }, { "epoch": 6.0, "eval_f1": 0.6761565836298933, "eval_loss": 0.06111188605427742, "eval_runtime": 81.7432, "eval_samples_per_second": 122.029, "eval_steps_per_second": 7.634, "step": 27624 }, { "epoch": 6.001303214596003, "grad_norm": 0.006091386545449495, "learning_rate": 1.2502715030408341e-05, "loss": 0.0001, "step": 27630 }, { "epoch": 6.003475238922676, "grad_norm": 0.0009421011782251298, "learning_rate": 1.248913987836664e-05, "loss": 0.0002, "step": 27640 }, { "epoch": 6.005647263249348, "grad_norm": 0.007014581002295017, "learning_rate": 1.2475564726324936e-05, "loss": 0.0002, "step": 27650 }, { "epoch": 6.007819287576021, "grad_norm": 0.0006896135164424777, "learning_rate": 1.2461989574283233e-05, "loss": 0.0001, "step": 27660 }, { "epoch": 6.009991311902693, "grad_norm": 0.0006911637610755861, "learning_rate": 1.2448414422241529e-05, "loss": 0.0002, "step": 27670 }, { "epoch": 6.012163336229365, "grad_norm": 0.0013413167325779796, "learning_rate": 1.2434839270199827e-05, "loss": 0.0001, "step": 27680 }, { "epoch": 6.014335360556038, "grad_norm": 0.0009952643886208534, "learning_rate": 1.2421264118158124e-05, "loss": 0.0001, "step": 27690 }, { "epoch": 6.01650738488271, "grad_norm": 0.0007215281366370618, "learning_rate": 1.240768896611642e-05, "loss": 0.0061, "step": 27700 }, { "epoch": 6.018679409209383, "grad_norm": 0.0007175153587013483, "learning_rate": 1.2394113814074718e-05, "loss": 0.0005, "step": 27710 }, { "epoch": 6.0208514335360555, "grad_norm": 0.0008878212538547814, "learning_rate": 1.2380538662033015e-05, "loss": 0.0002, "step": 27720 }, { "epoch": 6.023023457862728, "grad_norm": 0.0006465452606789768, "learning_rate": 1.2366963509991313e-05, "loss": 0.0001, "step": 27730 }, { "epoch": 6.0251954821894005, "grad_norm": 0.004886427894234657, "learning_rate": 1.235338835794961e-05, "loss": 0.0001, "step": 27740 }, { "epoch": 6.027367506516073, "grad_norm": 0.000832171062938869, "learning_rate": 1.2339813205907906e-05, "loss": 0.0136, "step": 27750 }, { "epoch": 6.0295395308427455, "grad_norm": 0.0007295712712220848, "learning_rate": 1.2326238053866204e-05, "loss": 0.0001, "step": 27760 }, { "epoch": 6.031711555169418, "grad_norm": 0.0024271977599710226, "learning_rate": 1.2312662901824501e-05, "loss": 0.0002, "step": 27770 }, { "epoch": 6.0338835794960906, "grad_norm": 0.02930435724556446, "learning_rate": 1.2299087749782797e-05, "loss": 0.0002, "step": 27780 }, { "epoch": 6.036055603822763, "grad_norm": 0.0006333515630103648, "learning_rate": 1.2285512597741096e-05, "loss": 0.0048, "step": 27790 }, { "epoch": 6.038227628149436, "grad_norm": 0.003724567359313369, "learning_rate": 1.2271937445699392e-05, "loss": 0.0001, "step": 27800 }, { "epoch": 6.040399652476108, "grad_norm": 0.0007120940135791898, "learning_rate": 1.2258362293657689e-05, "loss": 0.006, "step": 27810 }, { "epoch": 6.042571676802781, "grad_norm": 0.000745340483263135, "learning_rate": 1.2244787141615987e-05, "loss": 0.0001, "step": 27820 }, { "epoch": 6.044743701129453, "grad_norm": 0.0007420461624860764, "learning_rate": 1.2231211989574285e-05, "loss": 0.0001, "step": 27830 }, { "epoch": 6.046915725456125, "grad_norm": 0.0011481235269457102, "learning_rate": 1.2217636837532582e-05, "loss": 0.0001, "step": 27840 }, { "epoch": 6.049087749782798, "grad_norm": 0.007169825490564108, "learning_rate": 1.2204061685490878e-05, "loss": 0.0002, "step": 27850 }, { "epoch": 6.05125977410947, "grad_norm": 0.003404540941119194, "learning_rate": 1.2190486533449175e-05, "loss": 0.0047, "step": 27860 }, { "epoch": 6.053431798436143, "grad_norm": 0.0006610918790102005, "learning_rate": 1.2176911381407473e-05, "loss": 0.0026, "step": 27870 }, { "epoch": 6.055603822762815, "grad_norm": 0.0006066480418667197, "learning_rate": 1.216333622936577e-05, "loss": 0.0001, "step": 27880 }, { "epoch": 6.057775847089488, "grad_norm": 0.0006539212772622705, "learning_rate": 1.2149761077324066e-05, "loss": 0.0001, "step": 27890 }, { "epoch": 6.05994787141616, "grad_norm": 0.0006049839430488646, "learning_rate": 1.2136185925282364e-05, "loss": 0.0002, "step": 27900 }, { "epoch": 6.062119895742832, "grad_norm": 0.3469615578651428, "learning_rate": 1.212261077324066e-05, "loss": 0.0039, "step": 27910 }, { "epoch": 6.064291920069505, "grad_norm": 0.0005610610824078321, "learning_rate": 1.2109035621198959e-05, "loss": 0.0001, "step": 27920 }, { "epoch": 6.066463944396177, "grad_norm": 0.003698694286867976, "learning_rate": 1.2095460469157255e-05, "loss": 0.0004, "step": 27930 }, { "epoch": 6.06863596872285, "grad_norm": 0.0033073897939175367, "learning_rate": 1.2081885317115552e-05, "loss": 0.0002, "step": 27940 }, { "epoch": 6.070807993049522, "grad_norm": 0.0005659732269123197, "learning_rate": 1.206831016507385e-05, "loss": 0.0001, "step": 27950 }, { "epoch": 6.072980017376195, "grad_norm": 0.002684724284335971, "learning_rate": 1.2054735013032146e-05, "loss": 0.0001, "step": 27960 }, { "epoch": 6.075152041702867, "grad_norm": 0.0005653385887853801, "learning_rate": 1.2041159860990443e-05, "loss": 0.0001, "step": 27970 }, { "epoch": 6.077324066029539, "grad_norm": 0.000581434287596494, "learning_rate": 1.2027584708948741e-05, "loss": 0.0002, "step": 27980 }, { "epoch": 6.079496090356212, "grad_norm": 0.0017103266436606646, "learning_rate": 1.2014009556907038e-05, "loss": 0.0001, "step": 27990 }, { "epoch": 6.081668114682884, "grad_norm": 0.0005672717234119773, "learning_rate": 1.2000434404865336e-05, "loss": 0.0001, "step": 28000 }, { "epoch": 6.083840139009557, "grad_norm": 0.0005900085088796914, "learning_rate": 1.1986859252823632e-05, "loss": 0.0001, "step": 28010 }, { "epoch": 6.086012163336229, "grad_norm": 0.0005386321572586894, "learning_rate": 1.197328410078193e-05, "loss": 0.0001, "step": 28020 }, { "epoch": 6.088184187662902, "grad_norm": 0.0009092948166653514, "learning_rate": 1.1959708948740227e-05, "loss": 0.0001, "step": 28030 }, { "epoch": 6.090356211989574, "grad_norm": 0.0005588334170170128, "learning_rate": 1.1946133796698524e-05, "loss": 0.0001, "step": 28040 }, { "epoch": 6.092528236316246, "grad_norm": 0.0015306295827031136, "learning_rate": 1.193255864465682e-05, "loss": 0.0001, "step": 28050 }, { "epoch": 6.094700260642919, "grad_norm": 0.0005438084481284022, "learning_rate": 1.1918983492615118e-05, "loss": 0.0001, "step": 28060 }, { "epoch": 6.096872284969591, "grad_norm": 0.0005323129007592797, "learning_rate": 1.1905408340573415e-05, "loss": 0.0001, "step": 28070 }, { "epoch": 6.099044309296264, "grad_norm": 0.0005739867337979376, "learning_rate": 1.1891833188531711e-05, "loss": 0.0001, "step": 28080 }, { "epoch": 6.101216333622936, "grad_norm": 0.004397066310048103, "learning_rate": 1.187825803649001e-05, "loss": 0.0001, "step": 28090 }, { "epoch": 6.103388357949609, "grad_norm": 0.00933290459215641, "learning_rate": 1.1864682884448306e-05, "loss": 0.0001, "step": 28100 }, { "epoch": 6.105560382276281, "grad_norm": 0.0016863916534930468, "learning_rate": 1.1851107732406604e-05, "loss": 0.0001, "step": 28110 }, { "epoch": 6.107732406602954, "grad_norm": 0.0005723837530240417, "learning_rate": 1.18375325803649e-05, "loss": 0.0001, "step": 28120 }, { "epoch": 6.109904430929626, "grad_norm": 0.0005782105727121234, "learning_rate": 1.1823957428323197e-05, "loss": 0.0001, "step": 28130 }, { "epoch": 6.1120764552562985, "grad_norm": 0.001487166155129671, "learning_rate": 1.1810382276281495e-05, "loss": 0.0047, "step": 28140 }, { "epoch": 6.1142484795829715, "grad_norm": 0.0005293136346153915, "learning_rate": 1.1796807124239792e-05, "loss": 0.0001, "step": 28150 }, { "epoch": 6.1164205039096435, "grad_norm": 0.0006125413347035646, "learning_rate": 1.1783231972198088e-05, "loss": 0.0001, "step": 28160 }, { "epoch": 6.1185925282363165, "grad_norm": 0.0005350797437131405, "learning_rate": 1.1769656820156387e-05, "loss": 0.0001, "step": 28170 }, { "epoch": 6.120764552562989, "grad_norm": 0.0005251934053376317, "learning_rate": 1.1756081668114683e-05, "loss": 0.0001, "step": 28180 }, { "epoch": 6.1229365768896615, "grad_norm": 0.0005300256889313459, "learning_rate": 1.1742506516072981e-05, "loss": 0.0049, "step": 28190 }, { "epoch": 6.125108601216334, "grad_norm": 0.0026551426853984594, "learning_rate": 1.1728931364031278e-05, "loss": 0.0041, "step": 28200 }, { "epoch": 6.127280625543006, "grad_norm": 0.000529299140907824, "learning_rate": 1.1715356211989576e-05, "loss": 0.0004, "step": 28210 }, { "epoch": 6.129452649869679, "grad_norm": 0.0005411395686678588, "learning_rate": 1.1701781059947873e-05, "loss": 0.0001, "step": 28220 }, { "epoch": 6.131624674196351, "grad_norm": 0.0013515339232981205, "learning_rate": 1.1688205907906169e-05, "loss": 0.0001, "step": 28230 }, { "epoch": 6.133796698523024, "grad_norm": 0.0005237034056335688, "learning_rate": 1.1674630755864466e-05, "loss": 0.0001, "step": 28240 }, { "epoch": 6.135968722849696, "grad_norm": 0.0005243255873210728, "learning_rate": 1.1661055603822764e-05, "loss": 0.0001, "step": 28250 }, { "epoch": 6.138140747176369, "grad_norm": 0.0018816670635715127, "learning_rate": 1.164748045178106e-05, "loss": 0.0051, "step": 28260 }, { "epoch": 6.140312771503041, "grad_norm": 0.015584097243845463, "learning_rate": 1.1633905299739357e-05, "loss": 0.0001, "step": 28270 }, { "epoch": 6.142484795829713, "grad_norm": 0.0005188002251088619, "learning_rate": 1.1621687662901825e-05, "loss": 0.0136, "step": 28280 }, { "epoch": 6.144656820156386, "grad_norm": 0.0005213937256485224, "learning_rate": 1.1608112510860122e-05, "loss": 0.0001, "step": 28290 }, { "epoch": 6.146828844483058, "grad_norm": 0.0005156263941898942, "learning_rate": 1.1594537358818418e-05, "loss": 0.0001, "step": 28300 }, { "epoch": 6.149000868809731, "grad_norm": 0.0005170433432795107, "learning_rate": 1.1580962206776717e-05, "loss": 0.0001, "step": 28310 }, { "epoch": 6.151172893136403, "grad_norm": 0.0017105289734899998, "learning_rate": 1.1567387054735015e-05, "loss": 0.0001, "step": 28320 }, { "epoch": 6.153344917463076, "grad_norm": 0.0005193065735511482, "learning_rate": 1.1553811902693311e-05, "loss": 0.0001, "step": 28330 }, { "epoch": 6.155516941789748, "grad_norm": 0.0005105392774567008, "learning_rate": 1.1540236750651608e-05, "loss": 0.0001, "step": 28340 }, { "epoch": 6.157688966116421, "grad_norm": 0.0005192344542592764, "learning_rate": 1.1526661598609904e-05, "loss": 0.0001, "step": 28350 }, { "epoch": 6.159860990443093, "grad_norm": 0.0018821783596649766, "learning_rate": 1.1513086446568203e-05, "loss": 0.0001, "step": 28360 }, { "epoch": 6.162033014769765, "grad_norm": 0.0005060366238467395, "learning_rate": 1.1499511294526499e-05, "loss": 0.0001, "step": 28370 }, { "epoch": 6.164205039096438, "grad_norm": 0.0005061374395154417, "learning_rate": 1.1485936142484796e-05, "loss": 0.0001, "step": 28380 }, { "epoch": 6.16637706342311, "grad_norm": 0.000506606069393456, "learning_rate": 1.1472360990443092e-05, "loss": 0.0098, "step": 28390 }, { "epoch": 6.168549087749783, "grad_norm": 0.001095798914320767, "learning_rate": 1.145878583840139e-05, "loss": 0.0001, "step": 28400 }, { "epoch": 6.170721112076455, "grad_norm": 0.0005390364676713943, "learning_rate": 1.1445210686359688e-05, "loss": 0.0001, "step": 28410 }, { "epoch": 6.172893136403128, "grad_norm": 0.020479867234826088, "learning_rate": 1.1431635534317985e-05, "loss": 0.0001, "step": 28420 }, { "epoch": 6.1750651607298, "grad_norm": 0.0005304127698764205, "learning_rate": 1.1418060382276283e-05, "loss": 0.0046, "step": 28430 }, { "epoch": 6.177237185056472, "grad_norm": 0.0005232515395618975, "learning_rate": 1.140448523023458e-05, "loss": 0.0001, "step": 28440 }, { "epoch": 6.179409209383145, "grad_norm": 0.0005304008373059332, "learning_rate": 1.1390910078192876e-05, "loss": 0.0001, "step": 28450 }, { "epoch": 6.181581233709817, "grad_norm": 0.0015208062250167131, "learning_rate": 1.1377334926151173e-05, "loss": 0.0057, "step": 28460 }, { "epoch": 6.18375325803649, "grad_norm": 0.0005598796997219324, "learning_rate": 1.1363759774109471e-05, "loss": 0.0001, "step": 28470 }, { "epoch": 6.185925282363162, "grad_norm": 0.0006372761563397944, "learning_rate": 1.1350184622067767e-05, "loss": 0.0001, "step": 28480 }, { "epoch": 6.188097306689835, "grad_norm": 0.0023240367881953716, "learning_rate": 1.1336609470026064e-05, "loss": 0.0001, "step": 28490 }, { "epoch": 6.190269331016507, "grad_norm": 0.0013823268236592412, "learning_rate": 1.1323034317984362e-05, "loss": 0.0001, "step": 28500 }, { "epoch": 6.192441355343179, "grad_norm": 0.0006541903712786734, "learning_rate": 1.130945916594266e-05, "loss": 0.0001, "step": 28510 }, { "epoch": 6.194613379669852, "grad_norm": 0.0005835950723849237, "learning_rate": 1.1295884013900957e-05, "loss": 0.0001, "step": 28520 }, { "epoch": 6.196785403996524, "grad_norm": 0.001270595588721335, "learning_rate": 1.1282308861859253e-05, "loss": 0.0001, "step": 28530 }, { "epoch": 6.198957428323197, "grad_norm": 0.0005248417728580534, "learning_rate": 1.126873370981755e-05, "loss": 0.0001, "step": 28540 }, { "epoch": 6.2011294526498695, "grad_norm": 0.732323944568634, "learning_rate": 1.1255158557775848e-05, "loss": 0.0021, "step": 28550 }, { "epoch": 6.203301476976542, "grad_norm": 0.0004987027496099472, "learning_rate": 1.1241583405734145e-05, "loss": 0.0001, "step": 28560 }, { "epoch": 6.2054735013032145, "grad_norm": 0.0007200418040156364, "learning_rate": 1.1228008253692441e-05, "loss": 0.0001, "step": 28570 }, { "epoch": 6.2076455256298875, "grad_norm": 0.0005158480489626527, "learning_rate": 1.1214433101650738e-05, "loss": 0.0089, "step": 28580 }, { "epoch": 6.2098175499565595, "grad_norm": 0.0005337757174856961, "learning_rate": 1.1200857949609036e-05, "loss": 0.0001, "step": 28590 }, { "epoch": 6.211989574283232, "grad_norm": 0.0005403549293987453, "learning_rate": 1.1187282797567334e-05, "loss": 0.0002, "step": 28600 }, { "epoch": 6.214161598609905, "grad_norm": 0.0020134146325290203, "learning_rate": 1.117370764552563e-05, "loss": 0.0031, "step": 28610 }, { "epoch": 6.216333622936577, "grad_norm": 0.0007066564867272973, "learning_rate": 1.1160132493483927e-05, "loss": 0.0001, "step": 28620 }, { "epoch": 6.21850564726325, "grad_norm": 0.0005003396654501557, "learning_rate": 1.1146557341442225e-05, "loss": 0.0001, "step": 28630 }, { "epoch": 6.220677671589922, "grad_norm": 0.0017538318643346429, "learning_rate": 1.1132982189400522e-05, "loss": 0.0001, "step": 28640 }, { "epoch": 6.222849695916595, "grad_norm": 0.0005141882575117052, "learning_rate": 1.1119407037358818e-05, "loss": 0.0001, "step": 28650 }, { "epoch": 6.225021720243267, "grad_norm": 0.0005083966534584761, "learning_rate": 1.1105831885317116e-05, "loss": 0.0001, "step": 28660 }, { "epoch": 6.227193744569939, "grad_norm": 0.000546907598618418, "learning_rate": 1.1092256733275413e-05, "loss": 0.0001, "step": 28670 }, { "epoch": 6.229365768896612, "grad_norm": 0.0015611139824613929, "learning_rate": 1.107868158123371e-05, "loss": 0.006, "step": 28680 }, { "epoch": 6.231537793223284, "grad_norm": 0.0006153453723527491, "learning_rate": 1.1065106429192008e-05, "loss": 0.0044, "step": 28690 }, { "epoch": 6.233709817549957, "grad_norm": 0.000602956220973283, "learning_rate": 1.1051531277150306e-05, "loss": 0.0003, "step": 28700 }, { "epoch": 6.235881841876629, "grad_norm": 0.001490755588747561, "learning_rate": 1.1037956125108602e-05, "loss": 0.0001, "step": 28710 }, { "epoch": 6.238053866203302, "grad_norm": 0.0006966798682697117, "learning_rate": 1.1024380973066899e-05, "loss": 0.0001, "step": 28720 }, { "epoch": 6.240225890529974, "grad_norm": 0.0005024041165597737, "learning_rate": 1.1010805821025195e-05, "loss": 0.0001, "step": 28730 }, { "epoch": 6.242397914856646, "grad_norm": 0.0009793771896511316, "learning_rate": 1.0997230668983494e-05, "loss": 0.0001, "step": 28740 }, { "epoch": 6.244569939183319, "grad_norm": 0.0005014459602534771, "learning_rate": 1.098365551694179e-05, "loss": 0.0001, "step": 28750 }, { "epoch": 6.246741963509991, "grad_norm": 0.0006804326549172401, "learning_rate": 1.0970080364900087e-05, "loss": 0.0001, "step": 28760 }, { "epoch": 6.248913987836664, "grad_norm": 0.0005381435621529818, "learning_rate": 1.0956505212858385e-05, "loss": 0.0001, "step": 28770 }, { "epoch": 6.251086012163336, "grad_norm": 0.00048754297313280404, "learning_rate": 1.0942930060816681e-05, "loss": 0.002, "step": 28780 }, { "epoch": 6.253258036490009, "grad_norm": 0.000489606405608356, "learning_rate": 1.092935490877498e-05, "loss": 0.0061, "step": 28790 }, { "epoch": 6.255430060816681, "grad_norm": 0.0004804472264368087, "learning_rate": 1.0915779756733276e-05, "loss": 0.0001, "step": 28800 }, { "epoch": 6.257602085143354, "grad_norm": 0.0005286936648190022, "learning_rate": 1.0902204604691573e-05, "loss": 0.0001, "step": 28810 }, { "epoch": 6.259774109470026, "grad_norm": 0.00048563070595264435, "learning_rate": 1.088862945264987e-05, "loss": 0.0001, "step": 28820 }, { "epoch": 6.261946133796698, "grad_norm": 0.0004783800686709583, "learning_rate": 1.0875054300608167e-05, "loss": 0.0001, "step": 28830 }, { "epoch": 6.264118158123371, "grad_norm": 0.00048122688895091414, "learning_rate": 1.0861479148566464e-05, "loss": 0.0046, "step": 28840 }, { "epoch": 6.266290182450043, "grad_norm": 0.00048009914462454617, "learning_rate": 1.0847903996524762e-05, "loss": 0.0143, "step": 28850 }, { "epoch": 6.268462206776716, "grad_norm": 0.0004848612006753683, "learning_rate": 1.0834328844483058e-05, "loss": 0.0001, "step": 28860 }, { "epoch": 6.270634231103388, "grad_norm": 0.0009488983778283, "learning_rate": 1.0820753692441355e-05, "loss": 0.0001, "step": 28870 }, { "epoch": 6.272806255430061, "grad_norm": 0.0010840209433808923, "learning_rate": 1.0807178540399653e-05, "loss": 0.0054, "step": 28880 }, { "epoch": 6.274978279756733, "grad_norm": 0.00047432768042199314, "learning_rate": 1.0793603388357951e-05, "loss": 0.0001, "step": 28890 }, { "epoch": 6.277150304083405, "grad_norm": 0.0009752907208167017, "learning_rate": 1.0780028236316248e-05, "loss": 0.0041, "step": 28900 }, { "epoch": 6.279322328410078, "grad_norm": 0.02803998626768589, "learning_rate": 1.0766453084274544e-05, "loss": 0.0001, "step": 28910 }, { "epoch": 6.28149435273675, "grad_norm": 0.0023725698702037334, "learning_rate": 1.0752877932232841e-05, "loss": 0.0001, "step": 28920 }, { "epoch": 6.283666377063423, "grad_norm": 0.00048725263332016766, "learning_rate": 1.0739302780191139e-05, "loss": 0.0001, "step": 28930 }, { "epoch": 6.285838401390095, "grad_norm": 0.0009969660313799977, "learning_rate": 1.0725727628149436e-05, "loss": 0.0001, "step": 28940 }, { "epoch": 6.288010425716768, "grad_norm": 0.00048657169099897146, "learning_rate": 1.0712152476107732e-05, "loss": 0.0001, "step": 28950 }, { "epoch": 6.2901824500434405, "grad_norm": 0.0012719962978735566, "learning_rate": 1.069857732406603e-05, "loss": 0.0001, "step": 28960 }, { "epoch": 6.2923544743701125, "grad_norm": 0.00047141601680777967, "learning_rate": 1.0685002172024327e-05, "loss": 0.0001, "step": 28970 }, { "epoch": 6.2945264986967855, "grad_norm": 0.0004745768674183637, "learning_rate": 1.0671427019982625e-05, "loss": 0.0, "step": 28980 }, { "epoch": 6.296698523023458, "grad_norm": 0.0005675083375535905, "learning_rate": 1.0657851867940922e-05, "loss": 0.0001, "step": 28990 }, { "epoch": 6.2988705473501305, "grad_norm": 0.00048619258450344205, "learning_rate": 1.0644276715899218e-05, "loss": 0.0001, "step": 29000 }, { "epoch": 6.301042571676803, "grad_norm": 0.0004687570908572525, "learning_rate": 1.0630701563857516e-05, "loss": 0.0001, "step": 29010 }, { "epoch": 6.303214596003476, "grad_norm": 0.0004763658216688782, "learning_rate": 1.0617126411815813e-05, "loss": 0.0001, "step": 29020 }, { "epoch": 6.305386620330148, "grad_norm": 0.0004660674021579325, "learning_rate": 1.060355125977411e-05, "loss": 0.0001, "step": 29030 }, { "epoch": 6.307558644656821, "grad_norm": 0.0004942914238199592, "learning_rate": 1.0589976107732407e-05, "loss": 0.0001, "step": 29040 }, { "epoch": 6.309730668983493, "grad_norm": 0.0004738739226013422, "learning_rate": 1.0576400955690704e-05, "loss": 0.0001, "step": 29050 }, { "epoch": 6.311902693310165, "grad_norm": 0.0004960225778631866, "learning_rate": 1.0562825803649002e-05, "loss": 0.0001, "step": 29060 }, { "epoch": 6.314074717636838, "grad_norm": 0.00047240074491128325, "learning_rate": 1.0549250651607299e-05, "loss": 0.0001, "step": 29070 }, { "epoch": 6.31624674196351, "grad_norm": 0.0005025041755288839, "learning_rate": 1.0535675499565597e-05, "loss": 0.0001, "step": 29080 }, { "epoch": 6.318418766290183, "grad_norm": 0.0004930326831527054, "learning_rate": 1.0522100347523893e-05, "loss": 0.0001, "step": 29090 }, { "epoch": 6.320590790616855, "grad_norm": 0.0010761783923953772, "learning_rate": 1.050852519548219e-05, "loss": 0.0001, "step": 29100 }, { "epoch": 6.322762814943528, "grad_norm": 0.00047367255319841206, "learning_rate": 1.0494950043440486e-05, "loss": 0.0001, "step": 29110 }, { "epoch": 6.3249348392702, "grad_norm": 0.0015355387004092336, "learning_rate": 1.0481374891398785e-05, "loss": 0.0607, "step": 29120 }, { "epoch": 6.327106863596872, "grad_norm": 0.0014881700044497848, "learning_rate": 1.0467799739357081e-05, "loss": 0.0045, "step": 29130 }, { "epoch": 6.329278887923545, "grad_norm": 0.0017769057303667068, "learning_rate": 1.0454224587315378e-05, "loss": 0.0002, "step": 29140 }, { "epoch": 6.331450912250217, "grad_norm": 0.23785489797592163, "learning_rate": 1.0440649435273676e-05, "loss": 0.0054, "step": 29150 }, { "epoch": 6.33362293657689, "grad_norm": 2.4570157527923584, "learning_rate": 1.0427074283231972e-05, "loss": 0.0405, "step": 29160 }, { "epoch": 6.335794960903562, "grad_norm": 0.001386605086736381, "learning_rate": 1.041349913119027e-05, "loss": 0.0002, "step": 29170 }, { "epoch": 6.337966985230235, "grad_norm": 0.006186521612107754, "learning_rate": 1.0399923979148567e-05, "loss": 0.0003, "step": 29180 }, { "epoch": 6.340139009556907, "grad_norm": 0.002584697911515832, "learning_rate": 1.0386348827106864e-05, "loss": 0.0006, "step": 29190 }, { "epoch": 6.342311033883579, "grad_norm": 0.0030540975276380777, "learning_rate": 1.0372773675065162e-05, "loss": 0.0003, "step": 29200 }, { "epoch": 6.344483058210252, "grad_norm": 0.001179517013952136, "learning_rate": 1.0359198523023458e-05, "loss": 0.001, "step": 29210 }, { "epoch": 6.346655082536924, "grad_norm": 0.007270792964845896, "learning_rate": 1.0345623370981755e-05, "loss": 0.0001, "step": 29220 }, { "epoch": 6.348827106863597, "grad_norm": 0.001838905387558043, "learning_rate": 1.0332048218940053e-05, "loss": 0.0001, "step": 29230 }, { "epoch": 6.350999131190269, "grad_norm": 0.2083090841770172, "learning_rate": 1.031847306689835e-05, "loss": 0.0052, "step": 29240 }, { "epoch": 6.353171155516942, "grad_norm": 0.005161400884389877, "learning_rate": 1.0304897914856648e-05, "loss": 0.0001, "step": 29250 }, { "epoch": 6.355343179843614, "grad_norm": 0.0009388396283611655, "learning_rate": 1.0291322762814944e-05, "loss": 0.0001, "step": 29260 }, { "epoch": 6.357515204170287, "grad_norm": 0.001116700004786253, "learning_rate": 1.0277747610773242e-05, "loss": 0.0001, "step": 29270 }, { "epoch": 6.359687228496959, "grad_norm": 0.0007811775431036949, "learning_rate": 1.0264172458731539e-05, "loss": 0.0001, "step": 29280 }, { "epoch": 6.361859252823631, "grad_norm": 0.0009371462510898709, "learning_rate": 1.0250597306689835e-05, "loss": 0.0043, "step": 29290 }, { "epoch": 6.364031277150304, "grad_norm": 0.0007141511887311935, "learning_rate": 1.0237022154648132e-05, "loss": 0.0001, "step": 29300 }, { "epoch": 6.366203301476976, "grad_norm": 0.0007462035282514989, "learning_rate": 1.022344700260643e-05, "loss": 0.0002, "step": 29310 }, { "epoch": 6.368375325803649, "grad_norm": 0.0007523217936977744, "learning_rate": 1.0209871850564727e-05, "loss": 0.0001, "step": 29320 }, { "epoch": 6.370547350130321, "grad_norm": 0.0008268319652415812, "learning_rate": 1.0196296698523023e-05, "loss": 0.0001, "step": 29330 }, { "epoch": 6.372719374456994, "grad_norm": 0.0009327057632617652, "learning_rate": 1.0182721546481321e-05, "loss": 0.0002, "step": 29340 }, { "epoch": 6.374891398783666, "grad_norm": 0.0016702677821740508, "learning_rate": 1.0169146394439618e-05, "loss": 0.0046, "step": 29350 }, { "epoch": 6.3770634231103385, "grad_norm": 0.0011580121936276555, "learning_rate": 1.0155571242397916e-05, "loss": 0.0222, "step": 29360 }, { "epoch": 6.379235447437011, "grad_norm": 0.0007400794420391321, "learning_rate": 1.0141996090356213e-05, "loss": 0.0001, "step": 29370 }, { "epoch": 6.3814074717636835, "grad_norm": 0.0006812246283516288, "learning_rate": 1.0128420938314509e-05, "loss": 0.0001, "step": 29380 }, { "epoch": 6.3835794960903565, "grad_norm": 0.0010162891121581197, "learning_rate": 1.0114845786272807e-05, "loss": 0.0002, "step": 29390 }, { "epoch": 6.3857515204170285, "grad_norm": 0.0008094085496850312, "learning_rate": 1.0101270634231104e-05, "loss": 0.0001, "step": 29400 }, { "epoch": 6.3879235447437015, "grad_norm": 0.0006741775432601571, "learning_rate": 1.00876954821894e-05, "loss": 0.016, "step": 29410 }, { "epoch": 6.390095569070374, "grad_norm": 0.0005891403998248279, "learning_rate": 1.0074120330147697e-05, "loss": 0.0001, "step": 29420 }, { "epoch": 6.392267593397046, "grad_norm": 0.0006258541252464056, "learning_rate": 1.0060545178105995e-05, "loss": 0.0001, "step": 29430 }, { "epoch": 6.394439617723719, "grad_norm": 0.0006183306686580181, "learning_rate": 1.0046970026064293e-05, "loss": 0.0001, "step": 29440 }, { "epoch": 6.396611642050391, "grad_norm": 0.0007246190216392279, "learning_rate": 1.003339487402259e-05, "loss": 0.0586, "step": 29450 }, { "epoch": 6.398783666377064, "grad_norm": 0.002597115933895111, "learning_rate": 1.0019819721980888e-05, "loss": 0.0002, "step": 29460 }, { "epoch": 6.400955690703736, "grad_norm": 0.0027950238436460495, "learning_rate": 1.0006244569939184e-05, "loss": 0.0005, "step": 29470 }, { "epoch": 6.403127715030409, "grad_norm": 0.004178280476480722, "learning_rate": 9.992669417897481e-06, "loss": 0.029, "step": 29480 }, { "epoch": 6.405299739357081, "grad_norm": 0.02065322920680046, "learning_rate": 9.979094265855777e-06, "loss": 0.0009, "step": 29490 }, { "epoch": 6.407471763683754, "grad_norm": 0.012738760560750961, "learning_rate": 9.965519113814076e-06, "loss": 0.0016, "step": 29500 }, { "epoch": 6.409643788010426, "grad_norm": 0.019186438992619514, "learning_rate": 9.951943961772372e-06, "loss": 0.0011, "step": 29510 }, { "epoch": 6.411815812337098, "grad_norm": 0.0061889952048659325, "learning_rate": 9.938368809730669e-06, "loss": 0.0004, "step": 29520 }, { "epoch": 6.413987836663771, "grad_norm": 0.004089644178748131, "learning_rate": 9.924793657688967e-06, "loss": 0.0003, "step": 29530 }, { "epoch": 6.416159860990443, "grad_norm": 0.003858107840642333, "learning_rate": 9.911218505647265e-06, "loss": 0.0003, "step": 29540 }, { "epoch": 6.418331885317116, "grad_norm": 0.8940383791923523, "learning_rate": 9.897643353605562e-06, "loss": 0.0059, "step": 29550 }, { "epoch": 6.420503909643788, "grad_norm": 0.001113207545131445, "learning_rate": 9.884068201563858e-06, "loss": 0.0002, "step": 29560 }, { "epoch": 6.422675933970461, "grad_norm": 0.0023445405531674623, "learning_rate": 9.870493049522155e-06, "loss": 0.0003, "step": 29570 }, { "epoch": 6.424847958297133, "grad_norm": 0.0012578286696225405, "learning_rate": 9.856917897480453e-06, "loss": 0.0087, "step": 29580 }, { "epoch": 6.427019982623805, "grad_norm": 0.01483129058033228, "learning_rate": 9.84334274543875e-06, "loss": 0.0002, "step": 29590 }, { "epoch": 6.429192006950478, "grad_norm": 0.0018328166333958507, "learning_rate": 9.829767593397046e-06, "loss": 0.0002, "step": 29600 }, { "epoch": 6.43136403127715, "grad_norm": 0.0012817789101973176, "learning_rate": 9.816192441355342e-06, "loss": 0.0001, "step": 29610 }, { "epoch": 6.433536055603823, "grad_norm": 0.002289244905114174, "learning_rate": 9.80261728931364e-06, "loss": 0.0002, "step": 29620 }, { "epoch": 6.435708079930495, "grad_norm": 0.0060974303632974625, "learning_rate": 9.789042137271939e-06, "loss": 0.0054, "step": 29630 }, { "epoch": 6.437880104257168, "grad_norm": 0.0017827164847403765, "learning_rate": 9.775466985230235e-06, "loss": 0.0002, "step": 29640 }, { "epoch": 6.44005212858384, "grad_norm": 0.002662285231053829, "learning_rate": 9.761891833188533e-06, "loss": 0.0002, "step": 29650 }, { "epoch": 6.442224152910512, "grad_norm": 0.0029989073518663645, "learning_rate": 9.74831668114683e-06, "loss": 0.0005, "step": 29660 }, { "epoch": 6.444396177237185, "grad_norm": 0.0008592927479185164, "learning_rate": 9.734741529105126e-06, "loss": 0.0001, "step": 29670 }, { "epoch": 6.446568201563857, "grad_norm": 0.646806001663208, "learning_rate": 9.721166377063423e-06, "loss": 0.0091, "step": 29680 }, { "epoch": 6.44874022589053, "grad_norm": 0.0013074136804789305, "learning_rate": 9.707591225021721e-06, "loss": 0.0002, "step": 29690 }, { "epoch": 6.450912250217202, "grad_norm": 0.0017774419393390417, "learning_rate": 9.694016072980018e-06, "loss": 0.0002, "step": 29700 }, { "epoch": 6.453084274543875, "grad_norm": 0.0008454259368591011, "learning_rate": 9.680440920938314e-06, "loss": 0.0001, "step": 29710 }, { "epoch": 6.455256298870547, "grad_norm": 0.0038401109632104635, "learning_rate": 9.666865768896612e-06, "loss": 0.0048, "step": 29720 }, { "epoch": 6.45742832319722, "grad_norm": 0.001266291132196784, "learning_rate": 9.65329061685491e-06, "loss": 0.0001, "step": 29730 }, { "epoch": 6.459600347523892, "grad_norm": 0.002196115907281637, "learning_rate": 9.639715464813207e-06, "loss": 0.0036, "step": 29740 }, { "epoch": 6.461772371850564, "grad_norm": 0.000684195663779974, "learning_rate": 9.626140312771504e-06, "loss": 0.0001, "step": 29750 }, { "epoch": 6.463944396177237, "grad_norm": 0.0014452398754656315, "learning_rate": 9.6125651607298e-06, "loss": 0.0001, "step": 29760 }, { "epoch": 6.4661164205039094, "grad_norm": 0.04958047717809677, "learning_rate": 9.598990008688098e-06, "loss": 0.0002, "step": 29770 }, { "epoch": 6.468288444830582, "grad_norm": 0.0006793006905354559, "learning_rate": 9.585414856646395e-06, "loss": 0.0005, "step": 29780 }, { "epoch": 6.4704604691572545, "grad_norm": 0.0006012742524035275, "learning_rate": 9.571839704604691e-06, "loss": 0.0001, "step": 29790 }, { "epoch": 6.4726324934839266, "grad_norm": 0.0005449285381473601, "learning_rate": 9.558264552562988e-06, "loss": 0.0001, "step": 29800 }, { "epoch": 6.4748045178105995, "grad_norm": 0.0005929931649006903, "learning_rate": 9.544689400521286e-06, "loss": 0.0002, "step": 29810 }, { "epoch": 6.476976542137272, "grad_norm": 0.0006561490008607507, "learning_rate": 9.531114248479584e-06, "loss": 0.0001, "step": 29820 }, { "epoch": 6.4791485664639445, "grad_norm": 0.0006006149342283607, "learning_rate": 9.51753909643788e-06, "loss": 0.0001, "step": 29830 }, { "epoch": 6.481320590790617, "grad_norm": 0.000574872363358736, "learning_rate": 9.503963944396179e-06, "loss": 0.0001, "step": 29840 }, { "epoch": 6.48349261511729, "grad_norm": 0.0010642276611179113, "learning_rate": 9.490388792354475e-06, "loss": 0.0001, "step": 29850 }, { "epoch": 6.485664639443962, "grad_norm": 0.0005911542684771121, "learning_rate": 9.476813640312772e-06, "loss": 0.0001, "step": 29860 }, { "epoch": 6.487836663770635, "grad_norm": 0.0006189719424583018, "learning_rate": 9.463238488271068e-06, "loss": 0.0001, "step": 29870 }, { "epoch": 6.490008688097307, "grad_norm": 0.0006683343090116978, "learning_rate": 9.449663336229367e-06, "loss": 0.0001, "step": 29880 }, { "epoch": 6.492180712423979, "grad_norm": 0.0014524429570883512, "learning_rate": 9.436088184187663e-06, "loss": 0.0039, "step": 29890 }, { "epoch": 6.494352736750652, "grad_norm": 0.0006171105778776109, "learning_rate": 9.42251303214596e-06, "loss": 0.0001, "step": 29900 }, { "epoch": 6.496524761077324, "grad_norm": 0.0005288756219670177, "learning_rate": 9.408937880104258e-06, "loss": 0.0001, "step": 29910 }, { "epoch": 6.498696785403997, "grad_norm": 0.0005219160229898989, "learning_rate": 9.395362728062556e-06, "loss": 0.0001, "step": 29920 }, { "epoch": 6.500868809730669, "grad_norm": 0.0006902749300934374, "learning_rate": 9.381787576020853e-06, "loss": 0.0087, "step": 29930 }, { "epoch": 6.503040834057342, "grad_norm": 0.0009760453249327838, "learning_rate": 9.368212423979149e-06, "loss": 0.0001, "step": 29940 }, { "epoch": 6.505212858384014, "grad_norm": 0.0005377961206249893, "learning_rate": 9.354637271937446e-06, "loss": 0.0001, "step": 29950 }, { "epoch": 6.507384882710687, "grad_norm": 0.0018513593822717667, "learning_rate": 9.341062119895744e-06, "loss": 0.0001, "step": 29960 }, { "epoch": 6.509556907037359, "grad_norm": 0.0005402403767220676, "learning_rate": 9.32748696785404e-06, "loss": 0.0001, "step": 29970 }, { "epoch": 6.511728931364031, "grad_norm": 0.0006104527274146676, "learning_rate": 9.313911815812337e-06, "loss": 0.0048, "step": 29980 }, { "epoch": 6.513900955690704, "grad_norm": 0.0010075703030452132, "learning_rate": 9.300336663770633e-06, "loss": 0.0001, "step": 29990 }, { "epoch": 6.516072980017376, "grad_norm": 0.0005355360917747021, "learning_rate": 9.286761511728932e-06, "loss": 0.0001, "step": 30000 }, { "epoch": 6.518245004344049, "grad_norm": 0.0005514567019417882, "learning_rate": 9.27318635968723e-06, "loss": 0.0001, "step": 30010 }, { "epoch": 6.520417028670721, "grad_norm": 0.0005905954749323428, "learning_rate": 9.259611207645526e-06, "loss": 0.0001, "step": 30020 }, { "epoch": 6.522589052997393, "grad_norm": 0.0005409869481809437, "learning_rate": 9.246036055603823e-06, "loss": 0.0121, "step": 30030 }, { "epoch": 6.524761077324066, "grad_norm": 0.0005206941277720034, "learning_rate": 9.232460903562121e-06, "loss": 0.0001, "step": 30040 }, { "epoch": 6.526933101650738, "grad_norm": 0.0010512637672945857, "learning_rate": 9.218885751520417e-06, "loss": 0.0001, "step": 30050 }, { "epoch": 6.529105125977411, "grad_norm": 0.0009512483957223594, "learning_rate": 9.205310599478714e-06, "loss": 0.0001, "step": 30060 }, { "epoch": 6.531277150304083, "grad_norm": 0.000541405170224607, "learning_rate": 9.191735447437012e-06, "loss": 0.0001, "step": 30070 }, { "epoch": 6.533449174630756, "grad_norm": 0.0005479655810631812, "learning_rate": 9.178160295395309e-06, "loss": 0.0001, "step": 30080 }, { "epoch": 6.535621198957428, "grad_norm": 0.0006360400002449751, "learning_rate": 9.164585143353605e-06, "loss": 0.0042, "step": 30090 }, { "epoch": 6.537793223284101, "grad_norm": 0.0021848746109753847, "learning_rate": 9.151009991311903e-06, "loss": 0.0001, "step": 30100 }, { "epoch": 6.539965247610773, "grad_norm": 0.0005829419824294746, "learning_rate": 9.137434839270202e-06, "loss": 0.0001, "step": 30110 }, { "epoch": 6.542137271937445, "grad_norm": 0.0009212974109686911, "learning_rate": 9.123859687228498e-06, "loss": 0.0001, "step": 30120 }, { "epoch": 6.544309296264118, "grad_norm": 0.000987934647127986, "learning_rate": 9.110284535186795e-06, "loss": 0.0001, "step": 30130 }, { "epoch": 6.54648132059079, "grad_norm": 0.0005119699635542929, "learning_rate": 9.096709383145091e-06, "loss": 0.0001, "step": 30140 }, { "epoch": 6.548653344917463, "grad_norm": 0.0005486300215125084, "learning_rate": 9.08313423110339e-06, "loss": 0.0041, "step": 30150 }, { "epoch": 6.550825369244135, "grad_norm": 0.0005901909316889942, "learning_rate": 9.069559079061686e-06, "loss": 0.0001, "step": 30160 }, { "epoch": 6.552997393570808, "grad_norm": 0.0005802253726869822, "learning_rate": 9.055983927019982e-06, "loss": 0.0001, "step": 30170 }, { "epoch": 6.55516941789748, "grad_norm": 0.0005272625712677836, "learning_rate": 9.042408774978279e-06, "loss": 0.0001, "step": 30180 }, { "epoch": 6.557341442224153, "grad_norm": 0.0018508587963879108, "learning_rate": 9.028833622936577e-06, "loss": 0.0021, "step": 30190 }, { "epoch": 6.5595134665508255, "grad_norm": 0.0005285025690682232, "learning_rate": 9.015258470894875e-06, "loss": 0.0001, "step": 30200 }, { "epoch": 6.5616854908774975, "grad_norm": 0.0005890244501642883, "learning_rate": 9.001683318853172e-06, "loss": 0.0001, "step": 30210 }, { "epoch": 6.5638575152041705, "grad_norm": 0.00271811755374074, "learning_rate": 8.988108166811468e-06, "loss": 0.0001, "step": 30220 }, { "epoch": 6.566029539530843, "grad_norm": 0.0010901761706918478, "learning_rate": 8.974533014769767e-06, "loss": 0.0001, "step": 30230 }, { "epoch": 6.5682015638575155, "grad_norm": 0.0005904104909859598, "learning_rate": 8.960957862728063e-06, "loss": 0.0001, "step": 30240 }, { "epoch": 6.570373588184188, "grad_norm": 0.0005831909948028624, "learning_rate": 8.94738271068636e-06, "loss": 0.0001, "step": 30250 }, { "epoch": 6.57254561251086, "grad_norm": 0.20518867671489716, "learning_rate": 8.933807558644658e-06, "loss": 0.005, "step": 30260 }, { "epoch": 6.574717636837533, "grad_norm": 0.0007069736020639539, "learning_rate": 8.920232406602954e-06, "loss": 0.0038, "step": 30270 }, { "epoch": 6.576889661164205, "grad_norm": 0.0006283735274337232, "learning_rate": 8.90665725456125e-06, "loss": 0.0001, "step": 30280 }, { "epoch": 6.579061685490878, "grad_norm": 0.0012741173850372434, "learning_rate": 8.893082102519549e-06, "loss": 0.0001, "step": 30290 }, { "epoch": 6.58123370981755, "grad_norm": 0.0006880299188196659, "learning_rate": 8.879506950477847e-06, "loss": 0.0046, "step": 30300 }, { "epoch": 6.583405734144223, "grad_norm": 0.0005726420204155147, "learning_rate": 8.865931798436144e-06, "loss": 0.0001, "step": 30310 }, { "epoch": 6.585577758470895, "grad_norm": 0.0005194940022192895, "learning_rate": 8.85235664639444e-06, "loss": 0.0001, "step": 30320 }, { "epoch": 6.587749782797568, "grad_norm": 0.0005502538406290114, "learning_rate": 8.838781494352737e-06, "loss": 0.0001, "step": 30330 }, { "epoch": 6.58992180712424, "grad_norm": 0.0007209287723526359, "learning_rate": 8.825206342311035e-06, "loss": 0.0001, "step": 30340 }, { "epoch": 6.592093831450912, "grad_norm": 0.0005370117723941803, "learning_rate": 8.811631190269331e-06, "loss": 0.0001, "step": 30350 }, { "epoch": 6.594265855777585, "grad_norm": 0.0005737512256018817, "learning_rate": 8.798056038227628e-06, "loss": 0.0001, "step": 30360 }, { "epoch": 6.596437880104257, "grad_norm": 0.0005802140804007649, "learning_rate": 8.784480886185924e-06, "loss": 0.0001, "step": 30370 }, { "epoch": 6.59860990443093, "grad_norm": 0.0005447014700621367, "learning_rate": 8.770905734144223e-06, "loss": 0.0001, "step": 30380 }, { "epoch": 6.600781928757602, "grad_norm": 0.0006462910096161067, "learning_rate": 8.75733058210252e-06, "loss": 0.0034, "step": 30390 }, { "epoch": 6.602953953084275, "grad_norm": 0.0005304300575517118, "learning_rate": 8.743755430060817e-06, "loss": 0.0001, "step": 30400 }, { "epoch": 6.605125977410947, "grad_norm": 0.000523955503012985, "learning_rate": 8.730180278019114e-06, "loss": 0.0001, "step": 30410 }, { "epoch": 6.60729800173762, "grad_norm": 0.0006388821639120579, "learning_rate": 8.716605125977412e-06, "loss": 0.0001, "step": 30420 }, { "epoch": 6.609470026064292, "grad_norm": 0.0006965682841837406, "learning_rate": 8.703029973935709e-06, "loss": 0.0001, "step": 30430 }, { "epoch": 6.611642050390964, "grad_norm": 0.0008022001711651683, "learning_rate": 8.689454821894005e-06, "loss": 0.0001, "step": 30440 }, { "epoch": 6.613814074717637, "grad_norm": 0.0005276188021525741, "learning_rate": 8.675879669852303e-06, "loss": 0.0001, "step": 30450 }, { "epoch": 6.615986099044309, "grad_norm": 0.0004995689378120005, "learning_rate": 8.6623045178106e-06, "loss": 0.0001, "step": 30460 }, { "epoch": 6.618158123370982, "grad_norm": 0.0006201511714607477, "learning_rate": 8.648729365768896e-06, "loss": 0.0001, "step": 30470 }, { "epoch": 6.620330147697654, "grad_norm": 0.0005640414892695844, "learning_rate": 8.635154213727194e-06, "loss": 0.0004, "step": 30480 }, { "epoch": 6.622502172024326, "grad_norm": 0.000546163646504283, "learning_rate": 8.621579061685493e-06, "loss": 0.0065, "step": 30490 }, { "epoch": 6.624674196350999, "grad_norm": 0.004325380548834801, "learning_rate": 8.60800390964379e-06, "loss": 0.0001, "step": 30500 }, { "epoch": 6.626846220677671, "grad_norm": 0.0005421187379397452, "learning_rate": 8.594428757602086e-06, "loss": 0.0056, "step": 30510 }, { "epoch": 6.629018245004344, "grad_norm": 0.0007157633081078529, "learning_rate": 8.580853605560382e-06, "loss": 0.0001, "step": 30520 }, { "epoch": 6.631190269331016, "grad_norm": 0.0009098859154619277, "learning_rate": 8.56727845351868e-06, "loss": 0.0001, "step": 30530 }, { "epoch": 6.633362293657689, "grad_norm": 0.0010034176521003246, "learning_rate": 8.553703301476977e-06, "loss": 0.0001, "step": 30540 }, { "epoch": 6.635534317984361, "grad_norm": 0.0005072278436273336, "learning_rate": 8.540128149435273e-06, "loss": 0.0001, "step": 30550 }, { "epoch": 6.637706342311034, "grad_norm": 0.0008486681617796421, "learning_rate": 8.52655299739357e-06, "loss": 0.0001, "step": 30560 }, { "epoch": 6.639878366637706, "grad_norm": 0.0005881373072043061, "learning_rate": 8.512977845351868e-06, "loss": 0.0185, "step": 30570 }, { "epoch": 6.642050390964378, "grad_norm": 0.0005281348712742329, "learning_rate": 8.499402693310166e-06, "loss": 0.0001, "step": 30580 }, { "epoch": 6.644222415291051, "grad_norm": 0.0017992773791775107, "learning_rate": 8.485827541268463e-06, "loss": 0.0001, "step": 30590 }, { "epoch": 6.6463944396177235, "grad_norm": 0.0006575282313860953, "learning_rate": 8.47225238922676e-06, "loss": 0.0055, "step": 30600 }, { "epoch": 6.648566463944396, "grad_norm": 0.004106589592993259, "learning_rate": 8.458677237185058e-06, "loss": 0.0001, "step": 30610 }, { "epoch": 6.6507384882710685, "grad_norm": 0.004435055423527956, "learning_rate": 8.445102085143354e-06, "loss": 0.0001, "step": 30620 }, { "epoch": 6.6529105125977415, "grad_norm": 0.0005864517297595739, "learning_rate": 8.43152693310165e-06, "loss": 0.0001, "step": 30630 }, { "epoch": 6.6550825369244135, "grad_norm": 0.002506996737793088, "learning_rate": 8.417951781059947e-06, "loss": 0.0001, "step": 30640 }, { "epoch": 6.6572545612510865, "grad_norm": 0.0006056345882825553, "learning_rate": 8.404376629018245e-06, "loss": 0.008, "step": 30650 }, { "epoch": 6.659426585577759, "grad_norm": 0.0007190427859313786, "learning_rate": 8.390801476976542e-06, "loss": 0.0072, "step": 30660 }, { "epoch": 6.661598609904431, "grad_norm": 0.0009075113339349627, "learning_rate": 8.37722632493484e-06, "loss": 0.0001, "step": 30670 }, { "epoch": 6.663770634231104, "grad_norm": 0.00090401666238904, "learning_rate": 8.363651172893138e-06, "loss": 0.0001, "step": 30680 }, { "epoch": 6.665942658557776, "grad_norm": 0.0028792324010282755, "learning_rate": 8.350076020851435e-06, "loss": 0.0001, "step": 30690 }, { "epoch": 6.668114682884449, "grad_norm": 0.0006368290050886571, "learning_rate": 8.336500868809731e-06, "loss": 0.0001, "step": 30700 }, { "epoch": 6.670286707211121, "grad_norm": 0.002956211566925049, "learning_rate": 8.322925716768028e-06, "loss": 0.0001, "step": 30710 }, { "epoch": 6.672458731537793, "grad_norm": 0.0004954506293870509, "learning_rate": 8.309350564726326e-06, "loss": 0.0001, "step": 30720 }, { "epoch": 6.674630755864466, "grad_norm": 0.010645301081240177, "learning_rate": 8.295775412684622e-06, "loss": 0.0001, "step": 30730 }, { "epoch": 6.676802780191138, "grad_norm": 0.0008476504008285701, "learning_rate": 8.282200260642919e-06, "loss": 0.0413, "step": 30740 }, { "epoch": 6.678974804517811, "grad_norm": 0.0004940642975270748, "learning_rate": 8.268625108601215e-06, "loss": 0.0068, "step": 30750 }, { "epoch": 6.681146828844483, "grad_norm": 0.0005225742934271693, "learning_rate": 8.255049956559514e-06, "loss": 0.0051, "step": 30760 }, { "epoch": 6.683318853171156, "grad_norm": 0.0010523422388359904, "learning_rate": 8.241474804517812e-06, "loss": 0.0001, "step": 30770 }, { "epoch": 6.685490877497828, "grad_norm": 0.0005202463362365961, "learning_rate": 8.227899652476108e-06, "loss": 0.0001, "step": 30780 }, { "epoch": 6.687662901824501, "grad_norm": 0.000996004673652351, "learning_rate": 8.214324500434405e-06, "loss": 0.0001, "step": 30790 }, { "epoch": 6.689834926151173, "grad_norm": 0.0006468400824815035, "learning_rate": 8.200749348392703e-06, "loss": 0.0001, "step": 30800 }, { "epoch": 6.692006950477845, "grad_norm": 0.03909625858068466, "learning_rate": 8.187174196351e-06, "loss": 0.0002, "step": 30810 }, { "epoch": 6.694178974804518, "grad_norm": 0.000541096436791122, "learning_rate": 8.173599044309296e-06, "loss": 0.0001, "step": 30820 }, { "epoch": 6.69635099913119, "grad_norm": 0.0005183752509765327, "learning_rate": 8.161381407471765e-06, "loss": 0.0071, "step": 30830 }, { "epoch": 6.698523023457863, "grad_norm": 0.0005007189465686679, "learning_rate": 8.147806255430061e-06, "loss": 0.0272, "step": 30840 }, { "epoch": 6.700695047784535, "grad_norm": 0.2259320616722107, "learning_rate": 8.134231103388358e-06, "loss": 0.0102, "step": 30850 }, { "epoch": 6.702867072111208, "grad_norm": 0.0012630257988348603, "learning_rate": 8.120655951346654e-06, "loss": 0.0001, "step": 30860 }, { "epoch": 6.70503909643788, "grad_norm": 0.001169295865111053, "learning_rate": 8.107080799304952e-06, "loss": 0.0001, "step": 30870 }, { "epoch": 6.707211120764553, "grad_norm": 0.0005535431555472314, "learning_rate": 8.09350564726325e-06, "loss": 0.0003, "step": 30880 }, { "epoch": 6.709383145091225, "grad_norm": 0.0005410081357695162, "learning_rate": 8.079930495221547e-06, "loss": 0.026, "step": 30890 }, { "epoch": 6.711555169417897, "grad_norm": 0.0007783859618939459, "learning_rate": 8.066355343179844e-06, "loss": 0.0001, "step": 30900 }, { "epoch": 6.71372719374457, "grad_norm": 0.0005165360635146499, "learning_rate": 8.052780191138142e-06, "loss": 0.0002, "step": 30910 }, { "epoch": 6.715899218071242, "grad_norm": 0.0021058362908661366, "learning_rate": 8.039205039096438e-06, "loss": 0.0001, "step": 30920 }, { "epoch": 6.718071242397915, "grad_norm": 0.0005603966419585049, "learning_rate": 8.025629887054735e-06, "loss": 0.009, "step": 30930 }, { "epoch": 6.720243266724587, "grad_norm": 0.0006074358243495226, "learning_rate": 8.012054735013033e-06, "loss": 0.0001, "step": 30940 }, { "epoch": 6.722415291051259, "grad_norm": 0.0005121738649904728, "learning_rate": 7.99847958297133e-06, "loss": 0.0052, "step": 30950 }, { "epoch": 6.724587315377932, "grad_norm": 0.0005047316080890596, "learning_rate": 7.984904430929626e-06, "loss": 0.0001, "step": 30960 }, { "epoch": 6.726759339704604, "grad_norm": 0.0005447333678603172, "learning_rate": 7.971329278887924e-06, "loss": 0.0003, "step": 30970 }, { "epoch": 6.728931364031277, "grad_norm": 0.0005434873746708035, "learning_rate": 7.957754126846222e-06, "loss": 0.0001, "step": 30980 }, { "epoch": 6.731103388357949, "grad_norm": 0.0005110373022034764, "learning_rate": 7.944178974804519e-06, "loss": 0.0001, "step": 30990 }, { "epoch": 6.733275412684622, "grad_norm": 0.0005496907979249954, "learning_rate": 7.930603822762815e-06, "loss": 0.0001, "step": 31000 }, { "epoch": 6.7354474370112944, "grad_norm": 0.000641629914753139, "learning_rate": 7.917028670721112e-06, "loss": 0.0001, "step": 31010 }, { "epoch": 6.737619461337967, "grad_norm": 0.22070743143558502, "learning_rate": 7.90345351867941e-06, "loss": 0.0052, "step": 31020 }, { "epoch": 6.7397914856646395, "grad_norm": 0.0005148387281224132, "learning_rate": 7.889878366637707e-06, "loss": 0.0001, "step": 31030 }, { "epoch": 6.7419635099913116, "grad_norm": 0.0005770947900600731, "learning_rate": 7.876303214596003e-06, "loss": 0.0244, "step": 31040 }, { "epoch": 6.7441355343179845, "grad_norm": 0.0010065827518701553, "learning_rate": 7.8627280625543e-06, "loss": 0.0001, "step": 31050 }, { "epoch": 6.746307558644657, "grad_norm": 0.0007362100295722485, "learning_rate": 7.849152910512598e-06, "loss": 0.0001, "step": 31060 }, { "epoch": 6.7484795829713295, "grad_norm": 0.0005651618121191859, "learning_rate": 7.835577758470896e-06, "loss": 0.0001, "step": 31070 }, { "epoch": 6.750651607298002, "grad_norm": 0.002264307113364339, "learning_rate": 7.822002606429193e-06, "loss": 0.0002, "step": 31080 }, { "epoch": 6.752823631624675, "grad_norm": 0.0014880020171403885, "learning_rate": 7.808427454387489e-06, "loss": 0.0002, "step": 31090 }, { "epoch": 6.754995655951347, "grad_norm": 0.002035699551925063, "learning_rate": 7.794852302345787e-06, "loss": 0.0001, "step": 31100 }, { "epoch": 6.757167680278019, "grad_norm": 0.0005644381162710488, "learning_rate": 7.781277150304084e-06, "loss": 0.0002, "step": 31110 }, { "epoch": 6.759339704604692, "grad_norm": 0.0005862244288437068, "learning_rate": 7.76770199826238e-06, "loss": 0.0001, "step": 31120 }, { "epoch": 6.761511728931364, "grad_norm": 0.0005447894800454378, "learning_rate": 7.754126846220679e-06, "loss": 0.0001, "step": 31130 }, { "epoch": 6.763683753258037, "grad_norm": 0.0011917410884052515, "learning_rate": 7.740551694178975e-06, "loss": 0.0001, "step": 31140 }, { "epoch": 6.765855777584709, "grad_norm": 0.000644481391645968, "learning_rate": 7.726976542137272e-06, "loss": 0.0001, "step": 31150 }, { "epoch": 6.768027801911382, "grad_norm": 0.0005681001930497587, "learning_rate": 7.71340139009557e-06, "loss": 0.0001, "step": 31160 }, { "epoch": 6.770199826238054, "grad_norm": 0.013895424082875252, "learning_rate": 7.699826238053868e-06, "loss": 0.0001, "step": 31170 }, { "epoch": 6.772371850564726, "grad_norm": 0.0007106468547135592, "learning_rate": 7.686251086012164e-06, "loss": 0.0003, "step": 31180 }, { "epoch": 6.774543874891399, "grad_norm": 0.000547458475921303, "learning_rate": 7.672675933970461e-06, "loss": 0.0001, "step": 31190 }, { "epoch": 6.776715899218071, "grad_norm": 0.0023357209283858538, "learning_rate": 7.659100781928757e-06, "loss": 0.0222, "step": 31200 }, { "epoch": 6.778887923544744, "grad_norm": 0.013180797919631004, "learning_rate": 7.645525629887056e-06, "loss": 0.017, "step": 31210 }, { "epoch": 6.781059947871416, "grad_norm": 0.03352075815200806, "learning_rate": 7.631950477845352e-06, "loss": 0.0003, "step": 31220 }, { "epoch": 6.783231972198089, "grad_norm": 0.00327364937402308, "learning_rate": 7.6183753258036496e-06, "loss": 0.0002, "step": 31230 }, { "epoch": 6.785403996524761, "grad_norm": 0.004868528805673122, "learning_rate": 7.604800173761946e-06, "loss": 0.0001, "step": 31240 }, { "epoch": 6.787576020851434, "grad_norm": 0.0014305302174761891, "learning_rate": 7.591225021720244e-06, "loss": 0.0042, "step": 31250 }, { "epoch": 6.789748045178106, "grad_norm": 0.001355145126581192, "learning_rate": 7.577649869678541e-06, "loss": 0.0002, "step": 31260 }, { "epoch": 6.791920069504778, "grad_norm": 0.0006391478236764669, "learning_rate": 7.564074717636837e-06, "loss": 0.0004, "step": 31270 }, { "epoch": 6.794092093831451, "grad_norm": 0.0006392408395186067, "learning_rate": 7.550499565595135e-06, "loss": 0.0001, "step": 31280 }, { "epoch": 6.796264118158123, "grad_norm": 0.0009195109596475959, "learning_rate": 7.536924413553433e-06, "loss": 0.0001, "step": 31290 }, { "epoch": 6.798436142484796, "grad_norm": 0.0015891172224655747, "learning_rate": 7.523349261511729e-06, "loss": 0.0001, "step": 31300 }, { "epoch": 6.800608166811468, "grad_norm": 0.0005748227122239769, "learning_rate": 7.509774109470026e-06, "loss": 0.0001, "step": 31310 }, { "epoch": 6.80278019113814, "grad_norm": 0.0006642222870141268, "learning_rate": 7.496198957428324e-06, "loss": 0.0001, "step": 31320 }, { "epoch": 6.804952215464813, "grad_norm": 0.0006176797323860228, "learning_rate": 7.482623805386621e-06, "loss": 0.0001, "step": 31330 }, { "epoch": 6.807124239791485, "grad_norm": 0.0005560345598496497, "learning_rate": 7.469048653344918e-06, "loss": 0.0003, "step": 31340 }, { "epoch": 6.809296264118158, "grad_norm": 0.0021519416477531195, "learning_rate": 7.4554735013032144e-06, "loss": 0.0036, "step": 31350 }, { "epoch": 6.81146828844483, "grad_norm": 0.0005443825502879918, "learning_rate": 7.441898349261513e-06, "loss": 0.0001, "step": 31360 }, { "epoch": 6.813640312771503, "grad_norm": 0.0007841295446269214, "learning_rate": 7.428323197219809e-06, "loss": 0.0001, "step": 31370 }, { "epoch": 6.815812337098175, "grad_norm": 0.0018480330472812057, "learning_rate": 7.4147480451781065e-06, "loss": 0.0001, "step": 31380 }, { "epoch": 6.817984361424848, "grad_norm": 0.0018647913821041584, "learning_rate": 7.401172893136403e-06, "loss": 0.0001, "step": 31390 }, { "epoch": 6.82015638575152, "grad_norm": 0.0005843811668455601, "learning_rate": 7.387597741094701e-06, "loss": 0.0002, "step": 31400 }, { "epoch": 6.8223284100781925, "grad_norm": 0.0010984676191583276, "learning_rate": 7.374022589052998e-06, "loss": 0.0005, "step": 31410 }, { "epoch": 6.824500434404865, "grad_norm": 0.0005059109535068274, "learning_rate": 7.360447437011295e-06, "loss": 0.0001, "step": 31420 }, { "epoch": 6.8266724587315375, "grad_norm": 0.0004944863030686975, "learning_rate": 7.346872284969592e-06, "loss": 0.0017, "step": 31430 }, { "epoch": 6.8288444830582105, "grad_norm": 0.0004914201563224196, "learning_rate": 7.33329713292789e-06, "loss": 0.0001, "step": 31440 }, { "epoch": 6.8310165073848825, "grad_norm": 0.0005860158707946539, "learning_rate": 7.319721980886186e-06, "loss": 0.0038, "step": 31450 }, { "epoch": 6.8331885317115555, "grad_norm": 0.0004897097824141383, "learning_rate": 7.306146828844483e-06, "loss": 0.0001, "step": 31460 }, { "epoch": 6.835360556038228, "grad_norm": 0.0013720918213948607, "learning_rate": 7.29257167680278e-06, "loss": 0.0001, "step": 31470 }, { "epoch": 6.8375325803649005, "grad_norm": 0.0004990609013475478, "learning_rate": 7.278996524761078e-06, "loss": 0.0001, "step": 31480 }, { "epoch": 6.839704604691573, "grad_norm": 0.0004813902487512678, "learning_rate": 7.265421372719375e-06, "loss": 0.0001, "step": 31490 }, { "epoch": 6.841876629018245, "grad_norm": 0.0009401888237334788, "learning_rate": 7.251846220677671e-06, "loss": 0.0001, "step": 31500 }, { "epoch": 6.844048653344918, "grad_norm": 0.0006055055418983102, "learning_rate": 7.238271068635969e-06, "loss": 0.0001, "step": 31510 }, { "epoch": 6.84622067767159, "grad_norm": 0.010489613749086857, "learning_rate": 7.224695916594267e-06, "loss": 0.0069, "step": 31520 }, { "epoch": 6.848392701998263, "grad_norm": 0.000997158931568265, "learning_rate": 7.2111207645525634e-06, "loss": 0.0001, "step": 31530 }, { "epoch": 6.850564726324935, "grad_norm": 0.00048104513552971184, "learning_rate": 7.19754561251086e-06, "loss": 0.0001, "step": 31540 }, { "epoch": 6.852736750651607, "grad_norm": 0.002893621800467372, "learning_rate": 7.183970460469158e-06, "loss": 0.0001, "step": 31550 }, { "epoch": 6.85490877497828, "grad_norm": 0.0006328218150883913, "learning_rate": 7.170395308427455e-06, "loss": 0.0027, "step": 31560 }, { "epoch": 6.857080799304952, "grad_norm": 0.0006695294287055731, "learning_rate": 7.156820156385752e-06, "loss": 0.0343, "step": 31570 }, { "epoch": 6.859252823631625, "grad_norm": 0.00702779833227396, "learning_rate": 7.1432450043440485e-06, "loss": 0.0001, "step": 31580 }, { "epoch": 6.861424847958297, "grad_norm": 0.0006968594971112907, "learning_rate": 7.129669852302347e-06, "loss": 0.0055, "step": 31590 }, { "epoch": 6.86359687228497, "grad_norm": 0.0021681380458176136, "learning_rate": 7.116094700260643e-06, "loss": 0.0002, "step": 31600 }, { "epoch": 6.865768896611642, "grad_norm": 0.007272107060998678, "learning_rate": 7.102519548218941e-06, "loss": 0.0001, "step": 31610 }, { "epoch": 6.867940920938315, "grad_norm": 0.0004872163408435881, "learning_rate": 7.088944396177237e-06, "loss": 0.0001, "step": 31620 }, { "epoch": 6.870112945264987, "grad_norm": 0.0007561290985904634, "learning_rate": 7.075369244135535e-06, "loss": 0.0001, "step": 31630 }, { "epoch": 6.872284969591659, "grad_norm": 0.10669023543596268, "learning_rate": 7.061794092093832e-06, "loss": 0.0002, "step": 31640 }, { "epoch": 6.874456993918332, "grad_norm": 0.0030692359432578087, "learning_rate": 7.048218940052128e-06, "loss": 0.0001, "step": 31650 }, { "epoch": 6.876629018245004, "grad_norm": 0.0014511916087940335, "learning_rate": 7.034643788010426e-06, "loss": 0.0001, "step": 31660 }, { "epoch": 6.878801042571677, "grad_norm": 0.0005103896837681532, "learning_rate": 7.021068635968724e-06, "loss": 0.0001, "step": 31670 }, { "epoch": 6.880973066898349, "grad_norm": 0.0005026645958423615, "learning_rate": 7.00749348392702e-06, "loss": 0.0001, "step": 31680 }, { "epoch": 6.883145091225022, "grad_norm": 0.00869758054614067, "learning_rate": 6.993918331885317e-06, "loss": 0.0001, "step": 31690 }, { "epoch": 6.885317115551694, "grad_norm": 0.00047411341802217066, "learning_rate": 6.980343179843614e-06, "loss": 0.0001, "step": 31700 }, { "epoch": 6.887489139878367, "grad_norm": 0.00048073820653371513, "learning_rate": 6.9667680278019125e-06, "loss": 0.0001, "step": 31710 }, { "epoch": 6.889661164205039, "grad_norm": 0.0004761155869346112, "learning_rate": 6.953192875760209e-06, "loss": 0.0001, "step": 31720 }, { "epoch": 6.891833188531711, "grad_norm": 0.0005412718746811152, "learning_rate": 6.9396177237185055e-06, "loss": 0.0064, "step": 31730 }, { "epoch": 6.894005212858384, "grad_norm": 0.0005142592126503587, "learning_rate": 6.926042571676804e-06, "loss": 0.0044, "step": 31740 }, { "epoch": 6.896177237185056, "grad_norm": 0.013025223277509212, "learning_rate": 6.9124674196351e-06, "loss": 0.0173, "step": 31750 }, { "epoch": 6.898349261511729, "grad_norm": 0.21733896434307098, "learning_rate": 6.8988922675933975e-06, "loss": 0.001, "step": 31760 }, { "epoch": 6.900521285838401, "grad_norm": 0.0006785244331695139, "learning_rate": 6.885317115551694e-06, "loss": 0.0004, "step": 31770 }, { "epoch": 6.902693310165073, "grad_norm": 0.0012234164169058204, "learning_rate": 6.871741963509992e-06, "loss": 0.0001, "step": 31780 }, { "epoch": 6.904865334491746, "grad_norm": 0.0004961843369528651, "learning_rate": 6.858166811468289e-06, "loss": 0.0039, "step": 31790 }, { "epoch": 6.907037358818418, "grad_norm": 0.0005016808281652629, "learning_rate": 6.844591659426586e-06, "loss": 0.0041, "step": 31800 }, { "epoch": 6.909209383145091, "grad_norm": 0.0005186402704566717, "learning_rate": 6.831016507384883e-06, "loss": 0.0001, "step": 31810 }, { "epoch": 6.911381407471763, "grad_norm": 0.05258096754550934, "learning_rate": 6.817441355343181e-06, "loss": 0.0002, "step": 31820 }, { "epoch": 6.913553431798436, "grad_norm": 0.000563104753382504, "learning_rate": 6.803866203301477e-06, "loss": 0.0001, "step": 31830 }, { "epoch": 6.9157254561251085, "grad_norm": 0.0004913609591312706, "learning_rate": 6.790291051259774e-06, "loss": 0.0003, "step": 31840 }, { "epoch": 6.917897480451781, "grad_norm": 0.0004866503004450351, "learning_rate": 6.776715899218071e-06, "loss": 0.0001, "step": 31850 }, { "epoch": 6.9200695047784535, "grad_norm": 0.0004830710240639746, "learning_rate": 6.763140747176369e-06, "loss": 0.0001, "step": 31860 }, { "epoch": 6.922241529105126, "grad_norm": 0.0005364646785892546, "learning_rate": 6.749565595134666e-06, "loss": 0.0002, "step": 31870 }, { "epoch": 6.9244135534317985, "grad_norm": 0.010428816080093384, "learning_rate": 6.735990443092962e-06, "loss": 0.0001, "step": 31880 }, { "epoch": 6.926585577758471, "grad_norm": 0.0004830741381738335, "learning_rate": 6.72241529105126e-06, "loss": 0.0051, "step": 31890 }, { "epoch": 6.928757602085144, "grad_norm": 0.000482941948575899, "learning_rate": 6.708840139009558e-06, "loss": 0.0001, "step": 31900 }, { "epoch": 6.930929626411816, "grad_norm": 0.0005031520850025117, "learning_rate": 6.6952649869678545e-06, "loss": 0.0001, "step": 31910 }, { "epoch": 6.933101650738489, "grad_norm": 0.0004937806515954435, "learning_rate": 6.681689834926151e-06, "loss": 0.0001, "step": 31920 }, { "epoch": 6.935273675065161, "grad_norm": 0.0004919038037769496, "learning_rate": 6.668114682884449e-06, "loss": 0.0001, "step": 31930 }, { "epoch": 6.937445699391834, "grad_norm": 0.0016344421310350299, "learning_rate": 6.654539530842746e-06, "loss": 0.0157, "step": 31940 }, { "epoch": 6.939617723718506, "grad_norm": 0.0005624077748507261, "learning_rate": 6.640964378801043e-06, "loss": 0.0002, "step": 31950 }, { "epoch": 6.941789748045178, "grad_norm": 0.0005970174679532647, "learning_rate": 6.6273892267593396e-06, "loss": 0.0001, "step": 31960 }, { "epoch": 6.943961772371851, "grad_norm": 0.0005279725883156061, "learning_rate": 6.613814074717638e-06, "loss": 0.0001, "step": 31970 }, { "epoch": 6.946133796698523, "grad_norm": 0.0004851980193052441, "learning_rate": 6.600238922675934e-06, "loss": 0.0004, "step": 31980 }, { "epoch": 6.948305821025196, "grad_norm": 0.0004759115108754486, "learning_rate": 6.586663770634232e-06, "loss": 0.0001, "step": 31990 }, { "epoch": 6.950477845351868, "grad_norm": 0.0005062356358394027, "learning_rate": 6.573088618592528e-06, "loss": 0.0001, "step": 32000 }, { "epoch": 6.95264986967854, "grad_norm": 0.00047750433441251516, "learning_rate": 6.559513466550826e-06, "loss": 0.0001, "step": 32010 }, { "epoch": 6.954821894005213, "grad_norm": 0.49857795238494873, "learning_rate": 6.545938314509123e-06, "loss": 0.0056, "step": 32020 }, { "epoch": 6.956993918331885, "grad_norm": 0.015077983029186726, "learning_rate": 6.532363162467419e-06, "loss": 0.0125, "step": 32030 }, { "epoch": 6.959165942658558, "grad_norm": 0.0009956338908523321, "learning_rate": 6.518788010425717e-06, "loss": 0.0052, "step": 32040 }, { "epoch": 6.96133796698523, "grad_norm": 0.0005113192601129413, "learning_rate": 6.505212858384015e-06, "loss": 0.0001, "step": 32050 }, { "epoch": 6.963509991311903, "grad_norm": 0.000508747179992497, "learning_rate": 6.4916377063423114e-06, "loss": 0.0001, "step": 32060 }, { "epoch": 6.965682015638575, "grad_norm": 0.0006362470448948443, "learning_rate": 6.478062554300608e-06, "loss": 0.0001, "step": 32070 }, { "epoch": 6.967854039965248, "grad_norm": 0.000799038796685636, "learning_rate": 6.464487402258905e-06, "loss": 0.0001, "step": 32080 }, { "epoch": 6.97002606429192, "grad_norm": 0.0016933096339926124, "learning_rate": 6.4509122502172035e-06, "loss": 0.0001, "step": 32090 }, { "epoch": 6.972198088618592, "grad_norm": 0.016275865957140923, "learning_rate": 6.4373370981755e-06, "loss": 0.0001, "step": 32100 }, { "epoch": 6.974370112945265, "grad_norm": 0.0004907046677544713, "learning_rate": 6.4237619461337965e-06, "loss": 0.0001, "step": 32110 }, { "epoch": 6.976542137271937, "grad_norm": 0.0004914928576909006, "learning_rate": 6.410186794092095e-06, "loss": 0.0002, "step": 32120 }, { "epoch": 6.97871416159861, "grad_norm": 0.0004871877026744187, "learning_rate": 6.396611642050391e-06, "loss": 0.005, "step": 32130 }, { "epoch": 6.980886185925282, "grad_norm": 0.0012598761823028326, "learning_rate": 6.383036490008689e-06, "loss": 0.0001, "step": 32140 }, { "epoch": 6.983058210251955, "grad_norm": 0.0013038625475019217, "learning_rate": 6.369461337966985e-06, "loss": 0.0001, "step": 32150 }, { "epoch": 6.985230234578627, "grad_norm": 0.0005218314472585917, "learning_rate": 6.355886185925283e-06, "loss": 0.0001, "step": 32160 }, { "epoch": 6.9874022589053, "grad_norm": 0.0005293041467666626, "learning_rate": 6.34231103388358e-06, "loss": 0.0107, "step": 32170 }, { "epoch": 6.989574283231972, "grad_norm": 0.006864586845040321, "learning_rate": 6.328735881841877e-06, "loss": 0.0161, "step": 32180 }, { "epoch": 6.991746307558644, "grad_norm": 0.0005798496422357857, "learning_rate": 6.315160729800174e-06, "loss": 0.0014, "step": 32190 }, { "epoch": 6.993918331885317, "grad_norm": 0.20845529437065125, "learning_rate": 6.301585577758472e-06, "loss": 0.0051, "step": 32200 }, { "epoch": 6.996090356211989, "grad_norm": 0.004219398833811283, "learning_rate": 6.288010425716768e-06, "loss": 0.0003, "step": 32210 }, { "epoch": 6.998262380538662, "grad_norm": 0.0004969558212906122, "learning_rate": 6.274435273675065e-06, "loss": 0.0001, "step": 32220 }, { "epoch": 7.0, "eval_f1": 0.6423357664233577, "eval_loss": 0.08360765874385834, "eval_runtime": 82.7975, "eval_samples_per_second": 120.475, "eval_steps_per_second": 7.536, "step": 32228 }, { "epoch": 7.000434404865334, "grad_norm": 0.000587178859859705, "learning_rate": 6.260860121633362e-06, "loss": 0.0003, "step": 32230 }, { "epoch": 7.002606429192007, "grad_norm": 0.0005318076582625508, "learning_rate": 6.24728496959166e-06, "loss": 0.0001, "step": 32240 }, { "epoch": 7.0047784535186794, "grad_norm": 0.0005707778618671, "learning_rate": 6.233709817549957e-06, "loss": 0.0001, "step": 32250 }, { "epoch": 7.0069504778453515, "grad_norm": 0.0004858635657001287, "learning_rate": 6.2201346655082535e-06, "loss": 0.0001, "step": 32260 }, { "epoch": 7.0091225021720245, "grad_norm": 0.0015563979977741838, "learning_rate": 6.206559513466551e-06, "loss": 0.0001, "step": 32270 }, { "epoch": 7.0112945264986966, "grad_norm": 0.0009650535066612065, "learning_rate": 6.192984361424848e-06, "loss": 0.0001, "step": 32280 }, { "epoch": 7.0134665508253695, "grad_norm": 0.0005269676330499351, "learning_rate": 6.1794092093831455e-06, "loss": 0.0001, "step": 32290 }, { "epoch": 7.015638575152042, "grad_norm": 0.0009328114101663232, "learning_rate": 6.165834057341442e-06, "loss": 0.0001, "step": 32300 }, { "epoch": 7.0178105994787146, "grad_norm": 0.0004974757903255522, "learning_rate": 6.152258905299739e-06, "loss": 0.0046, "step": 32310 }, { "epoch": 7.019982623805387, "grad_norm": 0.005733178462833166, "learning_rate": 6.138683753258037e-06, "loss": 0.0001, "step": 32320 }, { "epoch": 7.022154648132059, "grad_norm": 0.0009101475006900728, "learning_rate": 6.125108601216334e-06, "loss": 0.0042, "step": 32330 }, { "epoch": 7.024326672458732, "grad_norm": 0.00048514435184188187, "learning_rate": 6.1115334491746315e-06, "loss": 0.0, "step": 32340 }, { "epoch": 7.026498696785404, "grad_norm": 0.0004828857199754566, "learning_rate": 6.097958297132928e-06, "loss": 0.0001, "step": 32350 }, { "epoch": 7.028670721112077, "grad_norm": 0.00048441332182846963, "learning_rate": 6.084383145091225e-06, "loss": 0.0041, "step": 32360 }, { "epoch": 7.030842745438749, "grad_norm": 0.0004826818476431072, "learning_rate": 6.070807993049523e-06, "loss": 0.0, "step": 32370 }, { "epoch": 7.033014769765422, "grad_norm": 0.0004850332625210285, "learning_rate": 6.05723284100782e-06, "loss": 0.0001, "step": 32380 }, { "epoch": 7.035186794092094, "grad_norm": 0.0004887464456260204, "learning_rate": 6.0436576889661165e-06, "loss": 0.0061, "step": 32390 }, { "epoch": 7.037358818418766, "grad_norm": 0.00047691402141936123, "learning_rate": 6.030082536924414e-06, "loss": 0.0001, "step": 32400 }, { "epoch": 7.039530842745439, "grad_norm": 0.0014148523332551122, "learning_rate": 6.01650738488271e-06, "loss": 0.0001, "step": 32410 }, { "epoch": 7.041702867072111, "grad_norm": 0.0004779113514814526, "learning_rate": 6.002932232841009e-06, "loss": 0.0077, "step": 32420 }, { "epoch": 7.043874891398784, "grad_norm": 0.00047428891411982477, "learning_rate": 5.989357080799305e-06, "loss": 0.0001, "step": 32430 }, { "epoch": 7.046046915725456, "grad_norm": 0.0006224108510650694, "learning_rate": 5.9757819287576025e-06, "loss": 0.006, "step": 32440 }, { "epoch": 7.048218940052129, "grad_norm": 0.0004947124980390072, "learning_rate": 5.962206776715899e-06, "loss": 0.0001, "step": 32450 }, { "epoch": 7.050390964378801, "grad_norm": 0.0007785210036672652, "learning_rate": 5.948631624674196e-06, "loss": 0.0001, "step": 32460 }, { "epoch": 7.052562988705474, "grad_norm": 0.0006155652808956802, "learning_rate": 5.935056472632494e-06, "loss": 0.0001, "step": 32470 }, { "epoch": 7.054735013032146, "grad_norm": 0.0004885067464783788, "learning_rate": 5.921481320590791e-06, "loss": 0.0001, "step": 32480 }, { "epoch": 7.056907037358818, "grad_norm": 0.0008703715284354985, "learning_rate": 5.9079061685490876e-06, "loss": 0.0001, "step": 32490 }, { "epoch": 7.059079061685491, "grad_norm": 0.0004715079558081925, "learning_rate": 5.894331016507385e-06, "loss": 0.0033, "step": 32500 }, { "epoch": 7.061251086012163, "grad_norm": 0.4291040599346161, "learning_rate": 5.880755864465682e-06, "loss": 0.0059, "step": 32510 }, { "epoch": 7.063423110338836, "grad_norm": 0.0004748949722852558, "learning_rate": 5.86718071242398e-06, "loss": 0.0001, "step": 32520 }, { "epoch": 7.065595134665508, "grad_norm": 0.00047266524052247405, "learning_rate": 5.853605560382277e-06, "loss": 0.0001, "step": 32530 }, { "epoch": 7.067767158992181, "grad_norm": 0.0004755932022817433, "learning_rate": 5.8400304083405735e-06, "loss": 0.0001, "step": 32540 }, { "epoch": 7.069939183318853, "grad_norm": 0.0011011961614713073, "learning_rate": 5.826455256298871e-06, "loss": 0.0001, "step": 32550 }, { "epoch": 7.072111207645525, "grad_norm": 0.0004747462226077914, "learning_rate": 5.812880104257168e-06, "loss": 0.0001, "step": 32560 }, { "epoch": 7.074283231972198, "grad_norm": 0.0008834132459014654, "learning_rate": 5.7993049522154656e-06, "loss": 0.0002, "step": 32570 }, { "epoch": 7.07645525629887, "grad_norm": 0.00047799412277527153, "learning_rate": 5.785729800173762e-06, "loss": 0.0001, "step": 32580 }, { "epoch": 7.078627280625543, "grad_norm": 0.00047306338092312217, "learning_rate": 5.7721546481320594e-06, "loss": 0.0008, "step": 32590 }, { "epoch": 7.080799304952215, "grad_norm": 0.00047740963054820895, "learning_rate": 5.758579496090356e-06, "loss": 0.009, "step": 32600 }, { "epoch": 7.082971329278888, "grad_norm": 0.0004777971771545708, "learning_rate": 5.745004344048654e-06, "loss": 0.0001, "step": 32610 }, { "epoch": 7.08514335360556, "grad_norm": 0.005595957860350609, "learning_rate": 5.731429192006951e-06, "loss": 0.0001, "step": 32620 }, { "epoch": 7.087315377932232, "grad_norm": 0.00048442769912071526, "learning_rate": 5.717854039965248e-06, "loss": 0.0001, "step": 32630 }, { "epoch": 7.089487402258905, "grad_norm": 0.00048627701471559703, "learning_rate": 5.7042788879235445e-06, "loss": 0.0001, "step": 32640 }, { "epoch": 7.0916594265855775, "grad_norm": 0.0004757237038575113, "learning_rate": 5.690703735881842e-06, "loss": 0.0041, "step": 32650 }, { "epoch": 7.09383145091225, "grad_norm": 0.0006175344460643828, "learning_rate": 5.677128583840139e-06, "loss": 0.0261, "step": 32660 }, { "epoch": 7.0960034752389225, "grad_norm": 0.0004718822310678661, "learning_rate": 5.6635534317984366e-06, "loss": 0.0001, "step": 32670 }, { "epoch": 7.0981754995655955, "grad_norm": 0.00047846182133071125, "learning_rate": 5.649978279756733e-06, "loss": 0.0, "step": 32680 }, { "epoch": 7.1003475238922675, "grad_norm": 0.001272359979338944, "learning_rate": 5.6364031277150304e-06, "loss": 0.0001, "step": 32690 }, { "epoch": 7.10251954821894, "grad_norm": 0.00047480466309934855, "learning_rate": 5.622827975673328e-06, "loss": 0.0001, "step": 32700 }, { "epoch": 7.104691572545613, "grad_norm": 0.0008412067545577884, "learning_rate": 5.609252823631625e-06, "loss": 0.0001, "step": 32710 }, { "epoch": 7.106863596872285, "grad_norm": 0.0004680381389334798, "learning_rate": 5.595677671589922e-06, "loss": 0.0, "step": 32720 }, { "epoch": 7.109035621198958, "grad_norm": 0.000462341500679031, "learning_rate": 5.582102519548219e-06, "loss": 0.0052, "step": 32730 }, { "epoch": 7.11120764552563, "grad_norm": 0.00048780019278638065, "learning_rate": 5.568527367506516e-06, "loss": 0.0001, "step": 32740 }, { "epoch": 7.113379669852303, "grad_norm": 0.0004658056132029742, "learning_rate": 5.554952215464814e-06, "loss": 0.0001, "step": 32750 }, { "epoch": 7.115551694178975, "grad_norm": 0.0010021216003224254, "learning_rate": 5.541377063423111e-06, "loss": 0.0047, "step": 32760 }, { "epoch": 7.117723718505648, "grad_norm": 0.00046110479161143303, "learning_rate": 5.527801911381408e-06, "loss": 0.0043, "step": 32770 }, { "epoch": 7.11989574283232, "grad_norm": 0.0004618534876499325, "learning_rate": 5.514226759339705e-06, "loss": 0.0, "step": 32780 }, { "epoch": 7.122067767158992, "grad_norm": 0.0004645264125429094, "learning_rate": 5.500651607298002e-06, "loss": 0.0001, "step": 32790 }, { "epoch": 7.124239791485665, "grad_norm": 0.00046783083234913647, "learning_rate": 5.4870764552563e-06, "loss": 0.0001, "step": 32800 }, { "epoch": 7.126411815812337, "grad_norm": 0.0021647040266543627, "learning_rate": 5.473501303214596e-06, "loss": 0.0001, "step": 32810 }, { "epoch": 7.12858384013901, "grad_norm": 0.00046100158942863345, "learning_rate": 5.4599261511728935e-06, "loss": 0.0076, "step": 32820 }, { "epoch": 7.130755864465682, "grad_norm": 0.0004608416638802737, "learning_rate": 5.44635099913119e-06, "loss": 0.0001, "step": 32830 }, { "epoch": 7.132927888792355, "grad_norm": 0.00045467689051292837, "learning_rate": 5.432775847089487e-06, "loss": 0.0001, "step": 32840 }, { "epoch": 7.135099913119027, "grad_norm": 0.0013107570121064782, "learning_rate": 5.419200695047785e-06, "loss": 0.0052, "step": 32850 }, { "epoch": 7.137271937445699, "grad_norm": 0.0004807122459169477, "learning_rate": 5.405625543006082e-06, "loss": 0.0046, "step": 32860 }, { "epoch": 7.139443961772372, "grad_norm": 0.0009218992199748755, "learning_rate": 5.392050390964379e-06, "loss": 0.0043, "step": 32870 }, { "epoch": 7.141615986099044, "grad_norm": 0.0004588186275213957, "learning_rate": 5.378475238922676e-06, "loss": 0.0001, "step": 32880 }, { "epoch": 7.143788010425717, "grad_norm": 0.0008795844041742384, "learning_rate": 5.364900086880973e-06, "loss": 0.0067, "step": 32890 }, { "epoch": 7.145960034752389, "grad_norm": 0.0004757294664159417, "learning_rate": 5.351324934839271e-06, "loss": 0.0001, "step": 32900 }, { "epoch": 7.148132059079062, "grad_norm": 0.00046267296420410275, "learning_rate": 5.337749782797567e-06, "loss": 0.0047, "step": 32910 }, { "epoch": 7.150304083405734, "grad_norm": 0.000463083473732695, "learning_rate": 5.3241746307558645e-06, "loss": 0.0001, "step": 32920 }, { "epoch": 7.152476107732406, "grad_norm": 0.0004575937055051327, "learning_rate": 5.310599478714162e-06, "loss": 0.0001, "step": 32930 }, { "epoch": 7.154648132059079, "grad_norm": 0.0041653853841125965, "learning_rate": 5.297024326672459e-06, "loss": 0.0001, "step": 32940 }, { "epoch": 7.156820156385751, "grad_norm": 0.00046218730858527124, "learning_rate": 5.283449174630757e-06, "loss": 0.0, "step": 32950 }, { "epoch": 7.158992180712424, "grad_norm": 0.0015792973572388291, "learning_rate": 5.269874022589053e-06, "loss": 0.0001, "step": 32960 }, { "epoch": 7.161164205039096, "grad_norm": 0.0004536303167697042, "learning_rate": 5.2562988705473505e-06, "loss": 0.0001, "step": 32970 }, { "epoch": 7.163336229365769, "grad_norm": 0.00048602151218801737, "learning_rate": 5.242723718505648e-06, "loss": 0.003, "step": 32980 }, { "epoch": 7.165508253692441, "grad_norm": 0.00046395332901738584, "learning_rate": 5.229148566463945e-06, "loss": 0.0001, "step": 32990 }, { "epoch": 7.167680278019114, "grad_norm": 0.0008334364974871278, "learning_rate": 5.215573414422242e-06, "loss": 0.0041, "step": 33000 }, { "epoch": 7.169852302345786, "grad_norm": 0.00045471618068404496, "learning_rate": 5.201998262380539e-06, "loss": 0.0043, "step": 33010 }, { "epoch": 7.172024326672458, "grad_norm": 0.00046859317808412015, "learning_rate": 5.1884231103388356e-06, "loss": 0.0063, "step": 33020 }, { "epoch": 7.174196350999131, "grad_norm": 0.000619838887359947, "learning_rate": 5.174847958297134e-06, "loss": 0.0001, "step": 33030 }, { "epoch": 7.176368375325803, "grad_norm": 0.0004607265873346478, "learning_rate": 5.16127280625543e-06, "loss": 0.0001, "step": 33040 }, { "epoch": 7.178540399652476, "grad_norm": 0.0004701963916886598, "learning_rate": 5.147697654213728e-06, "loss": 0.0001, "step": 33050 }, { "epoch": 7.180712423979148, "grad_norm": 0.0004519505600910634, "learning_rate": 5.134122502172024e-06, "loss": 0.0035, "step": 33060 }, { "epoch": 7.182884448305821, "grad_norm": 0.00045899933320470154, "learning_rate": 5.1205473501303215e-06, "loss": 0.0, "step": 33070 }, { "epoch": 7.1850564726324935, "grad_norm": 0.00045867246808484197, "learning_rate": 5.106972198088619e-06, "loss": 0.0001, "step": 33080 }, { "epoch": 7.1872284969591655, "grad_norm": 0.0005122332950122654, "learning_rate": 5.093397046046916e-06, "loss": 0.0001, "step": 33090 }, { "epoch": 7.1894005212858385, "grad_norm": 0.0004529119178187102, "learning_rate": 5.079821894005213e-06, "loss": 0.0001, "step": 33100 }, { "epoch": 7.191572545612511, "grad_norm": 0.00045432275510393083, "learning_rate": 5.06624674196351e-06, "loss": 0.0001, "step": 33110 }, { "epoch": 7.1937445699391835, "grad_norm": 0.0006407810724340379, "learning_rate": 5.052671589921807e-06, "loss": 0.0001, "step": 33120 }, { "epoch": 7.195916594265856, "grad_norm": 0.00045336701441556215, "learning_rate": 5.039096437880105e-06, "loss": 0.0, "step": 33130 }, { "epoch": 7.198088618592529, "grad_norm": 0.0004545553238131106, "learning_rate": 5.025521285838402e-06, "loss": 0.0001, "step": 33140 }, { "epoch": 7.200260642919201, "grad_norm": 0.00044919364154338837, "learning_rate": 5.011946133796699e-06, "loss": 0.0001, "step": 33150 }, { "epoch": 7.202432667245873, "grad_norm": 0.00045379812945611775, "learning_rate": 4.998370981754996e-06, "loss": 0.0001, "step": 33160 }, { "epoch": 7.204604691572546, "grad_norm": 0.0004533766768872738, "learning_rate": 4.984795829713293e-06, "loss": 0.0001, "step": 33170 }, { "epoch": 7.206776715899218, "grad_norm": 0.0009349008905701339, "learning_rate": 4.971220677671591e-06, "loss": 0.0001, "step": 33180 }, { "epoch": 7.208948740225891, "grad_norm": 0.0005631750682368875, "learning_rate": 4.957645525629887e-06, "loss": 0.0, "step": 33190 }, { "epoch": 7.211120764552563, "grad_norm": 0.00044737185817211866, "learning_rate": 4.9440703735881846e-06, "loss": 0.0001, "step": 33200 }, { "epoch": 7.213292788879236, "grad_norm": 0.00045651328400708735, "learning_rate": 4.930495221546481e-06, "loss": 0.0054, "step": 33210 }, { "epoch": 7.215464813205908, "grad_norm": 0.0006020652363076806, "learning_rate": 4.916920069504779e-06, "loss": 0.0001, "step": 33220 }, { "epoch": 7.217636837532581, "grad_norm": 0.0004551556194201112, "learning_rate": 4.903344917463076e-06, "loss": 0.0001, "step": 33230 }, { "epoch": 7.219808861859253, "grad_norm": 0.0004467817780096084, "learning_rate": 4.889769765421373e-06, "loss": 0.0001, "step": 33240 }, { "epoch": 7.221980886185925, "grad_norm": 0.0004483225638978183, "learning_rate": 4.87619461337967e-06, "loss": 0.0001, "step": 33250 }, { "epoch": 7.224152910512598, "grad_norm": 0.004001400899142027, "learning_rate": 4.862619461337967e-06, "loss": 0.0049, "step": 33260 }, { "epoch": 7.22632493483927, "grad_norm": 0.00044646792230196297, "learning_rate": 4.849044309296264e-06, "loss": 0.0001, "step": 33270 }, { "epoch": 7.228496959165943, "grad_norm": 0.00044519882067106664, "learning_rate": 4.835469157254562e-06, "loss": 0.0001, "step": 33280 }, { "epoch": 7.230668983492615, "grad_norm": 0.0004504416137933731, "learning_rate": 4.821894005212858e-06, "loss": 0.0001, "step": 33290 }, { "epoch": 7.232841007819288, "grad_norm": 0.0007989492733031511, "learning_rate": 4.808318853171156e-06, "loss": 0.0001, "step": 33300 }, { "epoch": 7.23501303214596, "grad_norm": 0.00045564546599052846, "learning_rate": 4.794743701129453e-06, "loss": 0.0, "step": 33310 }, { "epoch": 7.237185056472632, "grad_norm": 0.0008873406331986189, "learning_rate": 4.78116854908775e-06, "loss": 0.0001, "step": 33320 }, { "epoch": 7.239357080799305, "grad_norm": 0.00045151382801122963, "learning_rate": 4.767593397046048e-06, "loss": 0.0001, "step": 33330 }, { "epoch": 7.241529105125977, "grad_norm": 0.0004555900814011693, "learning_rate": 4.754018245004344e-06, "loss": 0.0001, "step": 33340 }, { "epoch": 7.24370112945265, "grad_norm": 0.0004449512925930321, "learning_rate": 4.7404430929626415e-06, "loss": 0.0002, "step": 33350 }, { "epoch": 7.245873153779322, "grad_norm": 0.00044150289613753557, "learning_rate": 4.726867940920939e-06, "loss": 0.0001, "step": 33360 }, { "epoch": 7.248045178105995, "grad_norm": 0.0004383629420772195, "learning_rate": 4.713292788879236e-06, "loss": 0.005, "step": 33370 }, { "epoch": 7.250217202432667, "grad_norm": 0.004365169908851385, "learning_rate": 4.699717636837533e-06, "loss": 0.0001, "step": 33380 }, { "epoch": 7.252389226759339, "grad_norm": 0.0004420215846039355, "learning_rate": 4.68614248479583e-06, "loss": 0.0045, "step": 33390 }, { "epoch": 7.254561251086012, "grad_norm": 0.00043925183126702905, "learning_rate": 4.672567332754127e-06, "loss": 0.0001, "step": 33400 }, { "epoch": 7.256733275412684, "grad_norm": 0.00044884896487928927, "learning_rate": 4.658992180712425e-06, "loss": 0.0001, "step": 33410 }, { "epoch": 7.258905299739357, "grad_norm": 0.00044087052810937166, "learning_rate": 4.645417028670721e-06, "loss": 0.0001, "step": 33420 }, { "epoch": 7.261077324066029, "grad_norm": 0.3532722294330597, "learning_rate": 4.631841876629019e-06, "loss": 0.0055, "step": 33430 }, { "epoch": 7.263249348392702, "grad_norm": 0.00044010658166371286, "learning_rate": 4.618266724587315e-06, "loss": 0.0, "step": 33440 }, { "epoch": 7.265421372719374, "grad_norm": 0.00044461956713348627, "learning_rate": 4.6046915725456125e-06, "loss": 0.0001, "step": 33450 }, { "epoch": 7.267593397046047, "grad_norm": 0.0010231295600533485, "learning_rate": 4.59111642050391e-06, "loss": 0.0001, "step": 33460 }, { "epoch": 7.269765421372719, "grad_norm": 0.00043528215610422194, "learning_rate": 4.577541268462207e-06, "loss": 0.0001, "step": 33470 }, { "epoch": 7.2719374456993915, "grad_norm": 0.0004665793967433274, "learning_rate": 4.563966116420504e-06, "loss": 0.0001, "step": 33480 }, { "epoch": 7.2741094700260645, "grad_norm": 0.0008605076000094414, "learning_rate": 4.550390964378801e-06, "loss": 0.0001, "step": 33490 }, { "epoch": 7.2762814943527365, "grad_norm": 0.0004442806530278176, "learning_rate": 4.5368158123370985e-06, "loss": 0.0001, "step": 33500 }, { "epoch": 7.2784535186794095, "grad_norm": 0.0004634595534298569, "learning_rate": 4.523240660295396e-06, "loss": 0.0001, "step": 33510 }, { "epoch": 7.280625543006082, "grad_norm": 0.002221224131062627, "learning_rate": 4.509665508253692e-06, "loss": 0.0001, "step": 33520 }, { "epoch": 7.2827975673327545, "grad_norm": 0.0004456366295926273, "learning_rate": 4.49609035621199e-06, "loss": 0.0001, "step": 33530 }, { "epoch": 7.284969591659427, "grad_norm": 0.000450130901299417, "learning_rate": 4.482515204170287e-06, "loss": 0.0, "step": 33540 }, { "epoch": 7.287141615986099, "grad_norm": 0.00043738310341723263, "learning_rate": 4.468940052128584e-06, "loss": 0.0, "step": 33550 }, { "epoch": 7.289313640312772, "grad_norm": 0.0006523529882542789, "learning_rate": 4.455364900086882e-06, "loss": 0.0041, "step": 33560 }, { "epoch": 7.291485664639444, "grad_norm": 0.0004349502851255238, "learning_rate": 4.441789748045178e-06, "loss": 0.0, "step": 33570 }, { "epoch": 7.293657688966117, "grad_norm": 0.0004345090710557997, "learning_rate": 4.428214596003476e-06, "loss": 0.0, "step": 33580 }, { "epoch": 7.295829713292789, "grad_norm": 0.4664416015148163, "learning_rate": 4.414639443961772e-06, "loss": 0.0065, "step": 33590 }, { "epoch": 7.298001737619462, "grad_norm": 0.0004413572605699301, "learning_rate": 4.40106429192007e-06, "loss": 0.0034, "step": 33600 }, { "epoch": 7.300173761946134, "grad_norm": 0.4366700351238251, "learning_rate": 4.387489139878367e-06, "loss": 0.0048, "step": 33610 }, { "epoch": 7.302345786272806, "grad_norm": 0.0004471038409974426, "learning_rate": 4.373913987836664e-06, "loss": 0.0001, "step": 33620 }, { "epoch": 7.304517810599479, "grad_norm": 0.0004463079967536032, "learning_rate": 4.360338835794961e-06, "loss": 0.0001, "step": 33630 }, { "epoch": 7.306689834926151, "grad_norm": 0.0004372438706923276, "learning_rate": 4.346763683753258e-06, "loss": 0.0, "step": 33640 }, { "epoch": 7.308861859252824, "grad_norm": 0.0018223769729956985, "learning_rate": 4.333188531711555e-06, "loss": 0.0001, "step": 33650 }, { "epoch": 7.311033883579496, "grad_norm": 0.00043676598579622805, "learning_rate": 4.319613379669853e-06, "loss": 0.0, "step": 33660 }, { "epoch": 7.313205907906169, "grad_norm": 0.0015346236759796739, "learning_rate": 4.306038227628149e-06, "loss": 0.0001, "step": 33670 }, { "epoch": 7.315377932232841, "grad_norm": 0.001203192281536758, "learning_rate": 4.292463075586447e-06, "loss": 0.0001, "step": 33680 }, { "epoch": 7.317549956559514, "grad_norm": 0.00043293728958815336, "learning_rate": 4.278887923544744e-06, "loss": 0.0001, "step": 33690 }, { "epoch": 7.319721980886186, "grad_norm": 0.000446510297479108, "learning_rate": 4.265312771503041e-06, "loss": 0.0043, "step": 33700 }, { "epoch": 7.321894005212858, "grad_norm": 0.0004393671406432986, "learning_rate": 4.251737619461338e-06, "loss": 0.0, "step": 33710 }, { "epoch": 7.324066029539531, "grad_norm": 0.0005656811990775168, "learning_rate": 4.238162467419635e-06, "loss": 0.0001, "step": 33720 }, { "epoch": 7.326238053866203, "grad_norm": 0.00045842313556931913, "learning_rate": 4.224587315377932e-06, "loss": 0.0001, "step": 33730 }, { "epoch": 7.328410078192876, "grad_norm": 0.00043327995808795094, "learning_rate": 4.21101216333623e-06, "loss": 0.0, "step": 33740 }, { "epoch": 7.330582102519548, "grad_norm": 0.0004443236975930631, "learning_rate": 4.197437011294527e-06, "loss": 0.0038, "step": 33750 }, { "epoch": 7.332754126846221, "grad_norm": 0.000434982095612213, "learning_rate": 4.183861859252824e-06, "loss": 0.0001, "step": 33760 }, { "epoch": 7.334926151172893, "grad_norm": 0.00046286580618470907, "learning_rate": 4.170286707211121e-06, "loss": 0.0001, "step": 33770 }, { "epoch": 7.337098175499565, "grad_norm": 0.0004482912190724164, "learning_rate": 4.156711555169418e-06, "loss": 0.0001, "step": 33780 }, { "epoch": 7.339270199826238, "grad_norm": 0.00043282005935907364, "learning_rate": 4.143136403127716e-06, "loss": 0.0001, "step": 33790 }, { "epoch": 7.34144222415291, "grad_norm": 0.0020901900716125965, "learning_rate": 4.129561251086012e-06, "loss": 0.0001, "step": 33800 }, { "epoch": 7.343614248479583, "grad_norm": 0.0004341735620982945, "learning_rate": 4.11598609904431e-06, "loss": 0.0041, "step": 33810 }, { "epoch": 7.345786272806255, "grad_norm": 0.00043011820525862277, "learning_rate": 4.102410947002606e-06, "loss": 0.0, "step": 33820 }, { "epoch": 7.347958297132928, "grad_norm": 0.00043060528696514666, "learning_rate": 4.0888357949609036e-06, "loss": 0.0047, "step": 33830 }, { "epoch": 7.3501303214596, "grad_norm": 0.00042701844358816743, "learning_rate": 4.075260642919201e-06, "loss": 0.0036, "step": 33840 }, { "epoch": 7.352302345786272, "grad_norm": 0.0009441542206332088, "learning_rate": 4.061685490877498e-06, "loss": 0.0001, "step": 33850 }, { "epoch": 7.354474370112945, "grad_norm": 0.0007577169453725219, "learning_rate": 4.048110338835795e-06, "loss": 0.0001, "step": 33860 }, { "epoch": 7.356646394439617, "grad_norm": 0.00048064981820061803, "learning_rate": 4.034535186794092e-06, "loss": 0.0001, "step": 33870 }, { "epoch": 7.35881841876629, "grad_norm": 0.00044433947186917067, "learning_rate": 4.0209600347523895e-06, "loss": 0.0, "step": 33880 }, { "epoch": 7.3609904430929625, "grad_norm": 0.0004222741990815848, "learning_rate": 4.007384882710687e-06, "loss": 0.0, "step": 33890 }, { "epoch": 7.363162467419635, "grad_norm": 0.0008366369875147939, "learning_rate": 3.993809730668983e-06, "loss": 0.0416, "step": 33900 }, { "epoch": 7.3653344917463075, "grad_norm": 0.0004299264110159129, "learning_rate": 3.980234578627281e-06, "loss": 0.0001, "step": 33910 }, { "epoch": 7.3675065160729805, "grad_norm": 0.0007753439131192863, "learning_rate": 3.966659426585577e-06, "loss": 0.0001, "step": 33920 }, { "epoch": 7.3696785403996525, "grad_norm": 0.0007699221605435014, "learning_rate": 3.9530842745438754e-06, "loss": 0.0001, "step": 33930 }, { "epoch": 7.371850564726325, "grad_norm": 0.0004444130463525653, "learning_rate": 3.939509122502173e-06, "loss": 0.0001, "step": 33940 }, { "epoch": 7.374022589052998, "grad_norm": 0.0004256993706803769, "learning_rate": 3.925933970460469e-06, "loss": 0.0, "step": 33950 }, { "epoch": 7.37619461337967, "grad_norm": 0.00043124344665557146, "learning_rate": 3.912358818418767e-06, "loss": 0.0, "step": 33960 }, { "epoch": 7.378366637706343, "grad_norm": 0.0004286852781660855, "learning_rate": 3.898783666377063e-06, "loss": 0.0, "step": 33970 }, { "epoch": 7.380538662033015, "grad_norm": 0.0004296654078643769, "learning_rate": 3.885208514335361e-06, "loss": 0.0, "step": 33980 }, { "epoch": 7.382710686359688, "grad_norm": 0.0004529616271611303, "learning_rate": 3.871633362293658e-06, "loss": 0.0, "step": 33990 }, { "epoch": 7.38488271068636, "grad_norm": 0.00044139905367046595, "learning_rate": 3.858058210251955e-06, "loss": 0.0001, "step": 34000 }, { "epoch": 7.387054735013032, "grad_norm": 0.00042843882692977786, "learning_rate": 3.844483058210252e-06, "loss": 0.0001, "step": 34010 }, { "epoch": 7.389226759339705, "grad_norm": 0.00043310338514856994, "learning_rate": 3.830907906168549e-06, "loss": 0.0, "step": 34020 }, { "epoch": 7.391398783666377, "grad_norm": 0.0007619079551659524, "learning_rate": 3.8173327541268464e-06, "loss": 0.0001, "step": 34030 }, { "epoch": 7.39357080799305, "grad_norm": 0.00042568857315927744, "learning_rate": 3.803757602085144e-06, "loss": 0.0001, "step": 34040 }, { "epoch": 7.395742832319722, "grad_norm": 0.00042350677540525794, "learning_rate": 3.7901824500434403e-06, "loss": 0.0, "step": 34050 }, { "epoch": 7.397914856646395, "grad_norm": 0.0004444782971404493, "learning_rate": 3.776607298001738e-06, "loss": 0.0, "step": 34060 }, { "epoch": 7.400086880973067, "grad_norm": 0.0015245000831782818, "learning_rate": 3.7630321459600346e-06, "loss": 0.0061, "step": 34070 }, { "epoch": 7.402258905299739, "grad_norm": 0.0004239232512190938, "learning_rate": 3.749456993918332e-06, "loss": 0.0001, "step": 34080 }, { "epoch": 7.404430929626412, "grad_norm": 0.0025918257888406515, "learning_rate": 3.735881841876629e-06, "loss": 0.0001, "step": 34090 }, { "epoch": 7.406602953953084, "grad_norm": 0.002145191188901663, "learning_rate": 3.7223066898349262e-06, "loss": 0.0, "step": 34100 }, { "epoch": 7.408774978279757, "grad_norm": 0.0013365385821089149, "learning_rate": 3.708731537793223e-06, "loss": 0.0001, "step": 34110 }, { "epoch": 7.410947002606429, "grad_norm": 0.20957709848880768, "learning_rate": 3.6951563857515205e-06, "loss": 0.0053, "step": 34120 }, { "epoch": 7.413119026933102, "grad_norm": 0.0004579925735015422, "learning_rate": 3.6815812337098175e-06, "loss": 0.0001, "step": 34130 }, { "epoch": 7.415291051259774, "grad_norm": 0.00043436442501842976, "learning_rate": 3.668006081668115e-06, "loss": 0.0001, "step": 34140 }, { "epoch": 7.417463075586447, "grad_norm": 0.00043290789471939206, "learning_rate": 3.654430929626412e-06, "loss": 0.0001, "step": 34150 }, { "epoch": 7.419635099913119, "grad_norm": 0.0004213610081933439, "learning_rate": 3.640855777584709e-06, "loss": 0.0037, "step": 34160 }, { "epoch": 7.421807124239791, "grad_norm": 0.0004223252472002059, "learning_rate": 3.6272806255430065e-06, "loss": 0.0001, "step": 34170 }, { "epoch": 7.423979148566464, "grad_norm": 0.00043533978168852627, "learning_rate": 3.6137054735013034e-06, "loss": 0.0, "step": 34180 }, { "epoch": 7.426151172893136, "grad_norm": 0.0019435312133282423, "learning_rate": 3.6001303214596007e-06, "loss": 0.0001, "step": 34190 }, { "epoch": 7.428323197219809, "grad_norm": 0.00043479521991685033, "learning_rate": 3.5865551694178977e-06, "loss": 0.0, "step": 34200 }, { "epoch": 7.430495221546481, "grad_norm": 0.000424668105551973, "learning_rate": 3.572980017376195e-06, "loss": 0.0001, "step": 34210 }, { "epoch": 7.432667245873154, "grad_norm": 0.0010326962219551206, "learning_rate": 3.559404865334492e-06, "loss": 0.0001, "step": 34220 }, { "epoch": 7.434839270199826, "grad_norm": 0.0004303526075091213, "learning_rate": 3.5458297132927893e-06, "loss": 0.0001, "step": 34230 }, { "epoch": 7.437011294526498, "grad_norm": 0.0009326340514235198, "learning_rate": 3.532254561251086e-06, "loss": 0.0001, "step": 34240 }, { "epoch": 7.439183318853171, "grad_norm": 0.16912616789340973, "learning_rate": 3.5186794092093836e-06, "loss": 0.0035, "step": 34250 }, { "epoch": 7.441355343179843, "grad_norm": 0.0004265088646207005, "learning_rate": 3.50510425716768e-06, "loss": 0.0001, "step": 34260 }, { "epoch": 7.443527367506516, "grad_norm": 0.00042686297092586756, "learning_rate": 3.491529105125978e-06, "loss": 0.0001, "step": 34270 }, { "epoch": 7.445699391833188, "grad_norm": 0.00045173687976785004, "learning_rate": 3.4779539530842744e-06, "loss": 0.0001, "step": 34280 }, { "epoch": 7.447871416159861, "grad_norm": 0.0004314729885663837, "learning_rate": 3.4643788010425718e-06, "loss": 0.0001, "step": 34290 }, { "epoch": 7.450043440486533, "grad_norm": 0.0004578959196805954, "learning_rate": 3.4508036490008687e-06, "loss": 0.0001, "step": 34300 }, { "epoch": 7.4522154648132055, "grad_norm": 0.0007546443957835436, "learning_rate": 3.437228496959166e-06, "loss": 0.0042, "step": 34310 }, { "epoch": 7.4543874891398785, "grad_norm": 0.0004262760339770466, "learning_rate": 3.423653344917463e-06, "loss": 0.0001, "step": 34320 }, { "epoch": 7.4565595134665505, "grad_norm": 0.0004256195097696036, "learning_rate": 3.4100781928757603e-06, "loss": 0.0, "step": 34330 }, { "epoch": 7.4587315377932235, "grad_norm": 0.0004252404614817351, "learning_rate": 3.3965030408340577e-06, "loss": 0.0001, "step": 34340 }, { "epoch": 7.460903562119896, "grad_norm": 0.0004220245173200965, "learning_rate": 3.3829278887923546e-06, "loss": 0.0, "step": 34350 }, { "epoch": 7.4630755864465685, "grad_norm": 0.00042326172115281224, "learning_rate": 3.369352736750652e-06, "loss": 0.0001, "step": 34360 }, { "epoch": 7.465247610773241, "grad_norm": 0.000528325152117759, "learning_rate": 3.355777584708949e-06, "loss": 0.0, "step": 34370 }, { "epoch": 7.467419635099914, "grad_norm": 0.00047799237654544413, "learning_rate": 3.3422024326672463e-06, "loss": 0.0061, "step": 34380 }, { "epoch": 7.469591659426586, "grad_norm": 0.00041863773367367685, "learning_rate": 3.328627280625543e-06, "loss": 0.0001, "step": 34390 }, { "epoch": 7.471763683753258, "grad_norm": 0.0021037731785327196, "learning_rate": 3.3150521285838406e-06, "loss": 0.0001, "step": 34400 }, { "epoch": 7.473935708079931, "grad_norm": 0.00042101697181351483, "learning_rate": 3.3014769765421375e-06, "loss": 0.0, "step": 34410 }, { "epoch": 7.476107732406603, "grad_norm": 0.0004177862429060042, "learning_rate": 3.287901824500435e-06, "loss": 0.0001, "step": 34420 }, { "epoch": 7.478279756733276, "grad_norm": 0.000812828540802002, "learning_rate": 3.2743266724587314e-06, "loss": 0.0001, "step": 34430 }, { "epoch": 7.480451781059948, "grad_norm": 0.00043613568414002657, "learning_rate": 3.260751520417029e-06, "loss": 0.0068, "step": 34440 }, { "epoch": 7.48262380538662, "grad_norm": 0.0004363965417724103, "learning_rate": 3.2471763683753256e-06, "loss": 0.0, "step": 34450 }, { "epoch": 7.484795829713293, "grad_norm": 0.0020837662741541862, "learning_rate": 3.2336012163336234e-06, "loss": 0.0001, "step": 34460 }, { "epoch": 7.486967854039965, "grad_norm": 0.0008054388454183936, "learning_rate": 3.22002606429192e-06, "loss": 0.0001, "step": 34470 }, { "epoch": 7.489139878366638, "grad_norm": 0.0004963803221471608, "learning_rate": 3.2064509122502173e-06, "loss": 0.0, "step": 34480 }, { "epoch": 7.49131190269331, "grad_norm": 0.0004414473660290241, "learning_rate": 3.1928757602085142e-06, "loss": 0.0073, "step": 34490 }, { "epoch": 7.493483927019983, "grad_norm": 0.00043333263602107763, "learning_rate": 3.1793006081668116e-06, "loss": 0.0001, "step": 34500 }, { "epoch": 7.495655951346655, "grad_norm": 0.00043653626926243305, "learning_rate": 3.1657254561251085e-06, "loss": 0.0005, "step": 34510 }, { "epoch": 7.497827975673328, "grad_norm": 0.00042178662260994315, "learning_rate": 3.152150304083406e-06, "loss": 0.0001, "step": 34520 }, { "epoch": 7.5, "grad_norm": 0.0012135066790506244, "learning_rate": 3.138575152041703e-06, "loss": 0.0001, "step": 34530 }, { "epoch": 7.502172024326672, "grad_norm": 0.00042127963388338685, "learning_rate": 3.125e-06, "loss": 0.0001, "step": 34540 }, { "epoch": 7.504344048653345, "grad_norm": 0.00042868778109550476, "learning_rate": 3.111424847958297e-06, "loss": 0.0041, "step": 34550 }, { "epoch": 7.506516072980017, "grad_norm": 0.00043172913137823343, "learning_rate": 3.0978496959165944e-06, "loss": 0.0, "step": 34560 }, { "epoch": 7.50868809730669, "grad_norm": 0.00043645326513797045, "learning_rate": 3.0842745438748914e-06, "loss": 0.0047, "step": 34570 }, { "epoch": 7.510860121633362, "grad_norm": 0.00042725811363197863, "learning_rate": 3.0706993918331887e-06, "loss": 0.0, "step": 34580 }, { "epoch": 7.513032145960035, "grad_norm": 0.0009361863485537469, "learning_rate": 3.0571242397914857e-06, "loss": 0.0001, "step": 34590 }, { "epoch": 7.515204170286707, "grad_norm": 0.0004268494958523661, "learning_rate": 3.043549087749783e-06, "loss": 0.0, "step": 34600 }, { "epoch": 7.51737619461338, "grad_norm": 0.22192446887493134, "learning_rate": 3.02997393570808e-06, "loss": 0.0052, "step": 34610 }, { "epoch": 7.519548218940052, "grad_norm": 0.0004318969149608165, "learning_rate": 3.016398783666377e-06, "loss": 0.0001, "step": 34620 }, { "epoch": 7.521720243266724, "grad_norm": 0.00042987792403437197, "learning_rate": 3.0028236316246742e-06, "loss": 0.0055, "step": 34630 }, { "epoch": 7.523892267593397, "grad_norm": 0.0035631656646728516, "learning_rate": 2.989248479582971e-06, "loss": 0.0001, "step": 34640 }, { "epoch": 7.526064291920069, "grad_norm": 0.00042544747702777386, "learning_rate": 2.975673327541269e-06, "loss": 0.0001, "step": 34650 }, { "epoch": 7.528236316246742, "grad_norm": 0.0004204209253657609, "learning_rate": 2.962098175499566e-06, "loss": 0.0001, "step": 34660 }, { "epoch": 7.530408340573414, "grad_norm": 0.0004276661202311516, "learning_rate": 2.948523023457863e-06, "loss": 0.0, "step": 34670 }, { "epoch": 7.532580364900086, "grad_norm": 0.00043516955338418484, "learning_rate": 2.93494787141616e-06, "loss": 0.0, "step": 34680 }, { "epoch": 7.534752389226759, "grad_norm": 0.000489513564389199, "learning_rate": 2.921372719374457e-06, "loss": 0.0001, "step": 34690 }, { "epoch": 7.5369244135534315, "grad_norm": 0.000425524078309536, "learning_rate": 2.9077975673327545e-06, "loss": 0.0, "step": 34700 }, { "epoch": 7.539096437880104, "grad_norm": 0.0004260186688043177, "learning_rate": 2.8942224152910514e-06, "loss": 0.0194, "step": 34710 }, { "epoch": 7.5412684622067765, "grad_norm": 0.0004336040292400867, "learning_rate": 2.8806472632493487e-06, "loss": 0.0, "step": 34720 }, { "epoch": 7.5434404865334495, "grad_norm": 0.00045299273915588856, "learning_rate": 2.8670721112076457e-06, "loss": 0.0048, "step": 34730 }, { "epoch": 7.5456125108601215, "grad_norm": 0.00044971954775974154, "learning_rate": 2.8534969591659426e-06, "loss": 0.0, "step": 34740 }, { "epoch": 7.5477845351867945, "grad_norm": 0.0007768584764562547, "learning_rate": 2.83992180712424e-06, "loss": 0.0001, "step": 34750 }, { "epoch": 7.549956559513467, "grad_norm": 0.0005349684506654739, "learning_rate": 2.826346655082537e-06, "loss": 0.0001, "step": 34760 }, { "epoch": 7.552128583840139, "grad_norm": 0.0004436474700924009, "learning_rate": 2.8127715030408342e-06, "loss": 0.0001, "step": 34770 }, { "epoch": 7.554300608166812, "grad_norm": 0.00044845970114693046, "learning_rate": 2.799196350999131e-06, "loss": 0.0, "step": 34780 }, { "epoch": 7.556472632493484, "grad_norm": 0.0012216611066833138, "learning_rate": 2.7856211989574285e-06, "loss": 0.0001, "step": 34790 }, { "epoch": 7.558644656820157, "grad_norm": 0.0007345577469095588, "learning_rate": 2.7720460469157255e-06, "loss": 0.0001, "step": 34800 }, { "epoch": 7.560816681146829, "grad_norm": 0.00043187016854062676, "learning_rate": 2.7584708948740224e-06, "loss": 0.0001, "step": 34810 }, { "epoch": 7.562988705473502, "grad_norm": 0.0004403771017678082, "learning_rate": 2.7448957428323198e-06, "loss": 0.0001, "step": 34820 }, { "epoch": 7.565160729800174, "grad_norm": 0.18770098686218262, "learning_rate": 2.7313205907906167e-06, "loss": 0.0043, "step": 34830 }, { "epoch": 7.567332754126847, "grad_norm": 0.000443107943283394, "learning_rate": 2.717745438748914e-06, "loss": 0.0051, "step": 34840 }, { "epoch": 7.569504778453519, "grad_norm": 0.0004438256728462875, "learning_rate": 2.7041702867072114e-06, "loss": 0.0001, "step": 34850 }, { "epoch": 7.571676802780191, "grad_norm": 0.001046639634296298, "learning_rate": 2.6905951346655083e-06, "loss": 0.0001, "step": 34860 }, { "epoch": 7.573848827106864, "grad_norm": 0.0004500243521761149, "learning_rate": 2.6770199826238057e-06, "loss": 0.0042, "step": 34870 }, { "epoch": 7.576020851433536, "grad_norm": 0.0009341577533632517, "learning_rate": 2.6634448305821026e-06, "loss": 0.0001, "step": 34880 }, { "epoch": 7.578192875760209, "grad_norm": 0.00043487714719958603, "learning_rate": 2.6498696785404e-06, "loss": 0.0, "step": 34890 }, { "epoch": 7.580364900086881, "grad_norm": 0.0004606035363394767, "learning_rate": 2.636294526498697e-06, "loss": 0.0004, "step": 34900 }, { "epoch": 7.582536924413553, "grad_norm": 0.0005366328987292945, "learning_rate": 2.6227193744569943e-06, "loss": 0.0002, "step": 34910 }, { "epoch": 7.584708948740226, "grad_norm": 0.0004259934357833117, "learning_rate": 2.609144222415291e-06, "loss": 0.0001, "step": 34920 }, { "epoch": 7.586880973066898, "grad_norm": 0.00042626031790859997, "learning_rate": 2.595569070373588e-06, "loss": 0.0001, "step": 34930 }, { "epoch": 7.589052997393571, "grad_norm": 0.009608348831534386, "learning_rate": 2.5819939183318855e-06, "loss": 0.0001, "step": 34940 }, { "epoch": 7.591225021720243, "grad_norm": 0.00042460497934371233, "learning_rate": 2.5684187662901824e-06, "loss": 0.0, "step": 34950 }, { "epoch": 7.593397046046916, "grad_norm": 0.0004247224424034357, "learning_rate": 2.5548436142484798e-06, "loss": 0.0, "step": 34960 }, { "epoch": 7.595569070373588, "grad_norm": 0.00045198260340839624, "learning_rate": 2.5412684622067767e-06, "loss": 0.0001, "step": 34970 }, { "epoch": 7.597741094700261, "grad_norm": 0.00043047365033999085, "learning_rate": 2.527693310165074e-06, "loss": 0.0, "step": 34980 }, { "epoch": 7.599913119026933, "grad_norm": 0.000425986509071663, "learning_rate": 2.514118158123371e-06, "loss": 0.0001, "step": 34990 }, { "epoch": 7.602085143353605, "grad_norm": 0.0004410984693095088, "learning_rate": 2.5005430060816683e-06, "loss": 0.0001, "step": 35000 }, { "epoch": 7.604257167680278, "grad_norm": 0.0004189326718915254, "learning_rate": 2.4869678540399653e-06, "loss": 0.0, "step": 35010 }, { "epoch": 7.60642919200695, "grad_norm": 0.0004251878126524389, "learning_rate": 2.473392701998262e-06, "loss": 0.0001, "step": 35020 }, { "epoch": 7.608601216333623, "grad_norm": 0.00041855184826999903, "learning_rate": 2.4598175499565596e-06, "loss": 0.0001, "step": 35030 }, { "epoch": 7.610773240660295, "grad_norm": 0.00042429458699189126, "learning_rate": 2.4462423979148565e-06, "loss": 0.0001, "step": 35040 }, { "epoch": 7.612945264986968, "grad_norm": 0.0004268327902536839, "learning_rate": 2.432667245873154e-06, "loss": 0.0001, "step": 35050 }, { "epoch": 7.61511728931364, "grad_norm": 0.014470481313765049, "learning_rate": 2.419092093831451e-06, "loss": 0.0001, "step": 35060 }, { "epoch": 7.617289313640313, "grad_norm": 0.00042645548819564283, "learning_rate": 2.405516941789748e-06, "loss": 0.0001, "step": 35070 }, { "epoch": 7.619461337966985, "grad_norm": 0.0004529398283921182, "learning_rate": 2.3919417897480455e-06, "loss": 0.0001, "step": 35080 }, { "epoch": 7.621633362293657, "grad_norm": 0.0004270094505045563, "learning_rate": 2.3783666377063424e-06, "loss": 0.0001, "step": 35090 }, { "epoch": 7.62380538662033, "grad_norm": 0.0005416472558863461, "learning_rate": 2.3647914856646398e-06, "loss": 0.0, "step": 35100 }, { "epoch": 7.625977410947002, "grad_norm": 0.00043836349504999816, "learning_rate": 2.3512163336229367e-06, "loss": 0.0047, "step": 35110 }, { "epoch": 7.628149435273675, "grad_norm": 0.00043431203812360764, "learning_rate": 2.337641181581234e-06, "loss": 0.0, "step": 35120 }, { "epoch": 7.6303214596003475, "grad_norm": 0.0008394691394641995, "learning_rate": 2.324066029539531e-06, "loss": 0.0001, "step": 35130 }, { "epoch": 7.6324934839270195, "grad_norm": 0.00043706296128220856, "learning_rate": 2.310490877497828e-06, "loss": 0.0, "step": 35140 }, { "epoch": 7.6346655082536925, "grad_norm": 0.012723601423203945, "learning_rate": 2.2969157254561253e-06, "loss": 0.0001, "step": 35150 }, { "epoch": 7.636837532580365, "grad_norm": 0.0009298368240706623, "learning_rate": 2.2833405734144222e-06, "loss": 0.0001, "step": 35160 }, { "epoch": 7.6390095569070375, "grad_norm": 0.000419062766013667, "learning_rate": 2.2697654213727196e-06, "loss": 0.0, "step": 35170 }, { "epoch": 7.64118158123371, "grad_norm": 0.0004180653195362538, "learning_rate": 2.2561902693310165e-06, "loss": 0.0047, "step": 35180 }, { "epoch": 7.643353605560383, "grad_norm": 0.0006930680829100311, "learning_rate": 2.242615117289314e-06, "loss": 0.0001, "step": 35190 }, { "epoch": 7.645525629887055, "grad_norm": 0.0004183394485153258, "learning_rate": 2.229039965247611e-06, "loss": 0.0001, "step": 35200 }, { "epoch": 7.647697654213728, "grad_norm": 0.00047878033365122974, "learning_rate": 2.2154648132059077e-06, "loss": 0.0001, "step": 35210 }, { "epoch": 7.6498696785404, "grad_norm": 0.0004129901062697172, "learning_rate": 2.201889661164205e-06, "loss": 0.0001, "step": 35220 }, { "epoch": 7.652041702867072, "grad_norm": 0.0007938410853967071, "learning_rate": 2.188314509122502e-06, "loss": 0.0, "step": 35230 }, { "epoch": 7.654213727193745, "grad_norm": 0.0012432819930836558, "learning_rate": 2.1747393570807994e-06, "loss": 0.0001, "step": 35240 }, { "epoch": 7.656385751520417, "grad_norm": 0.00042003538692370057, "learning_rate": 2.1611642050390967e-06, "loss": 0.0, "step": 35250 }, { "epoch": 7.65855777584709, "grad_norm": 0.0012501387391239405, "learning_rate": 2.1475890529973937e-06, "loss": 0.0049, "step": 35260 }, { "epoch": 7.660729800173762, "grad_norm": 0.00042245659278705716, "learning_rate": 2.134013900955691e-06, "loss": 0.0, "step": 35270 }, { "epoch": 7.662901824500435, "grad_norm": 0.00041626309393905103, "learning_rate": 2.120438748913988e-06, "loss": 0.0, "step": 35280 }, { "epoch": 7.665073848827107, "grad_norm": 0.0007968571735545993, "learning_rate": 2.1068635968722853e-06, "loss": 0.0, "step": 35290 }, { "epoch": 7.66724587315378, "grad_norm": 0.00041930555016733706, "learning_rate": 2.0932884448305822e-06, "loss": 0.0, "step": 35300 }, { "epoch": 7.669417897480452, "grad_norm": 0.00045020724064670503, "learning_rate": 2.0797132927888796e-06, "loss": 0.0, "step": 35310 }, { "epoch": 7.671589921807124, "grad_norm": 0.0006811002967879176, "learning_rate": 2.0661381407471765e-06, "loss": 0.0001, "step": 35320 }, { "epoch": 7.673761946133797, "grad_norm": 0.3847941756248474, "learning_rate": 2.0525629887054735e-06, "loss": 0.0053, "step": 35330 }, { "epoch": 7.675933970460469, "grad_norm": 0.0004168343439232558, "learning_rate": 2.038987836663771e-06, "loss": 0.0, "step": 35340 }, { "epoch": 7.678105994787142, "grad_norm": 0.0004148240841459483, "learning_rate": 2.0254126846220677e-06, "loss": 0.0003, "step": 35350 }, { "epoch": 7.680278019113814, "grad_norm": 0.0004149214073549956, "learning_rate": 2.011837532580365e-06, "loss": 0.0001, "step": 35360 }, { "epoch": 7.682450043440486, "grad_norm": 0.0011588763445615768, "learning_rate": 1.998262380538662e-06, "loss": 0.0001, "step": 35370 }, { "epoch": 7.684622067767159, "grad_norm": 0.0008375580073334277, "learning_rate": 1.9846872284969594e-06, "loss": 0.0001, "step": 35380 }, { "epoch": 7.686794092093831, "grad_norm": 0.0009266881970688701, "learning_rate": 1.9711120764552563e-06, "loss": 0.0001, "step": 35390 }, { "epoch": 7.688966116420504, "grad_norm": 0.0004148316220380366, "learning_rate": 1.9575369244135533e-06, "loss": 0.0001, "step": 35400 }, { "epoch": 7.691138140747176, "grad_norm": 0.00043054655543528497, "learning_rate": 1.9439617723718506e-06, "loss": 0.0, "step": 35410 }, { "epoch": 7.693310165073849, "grad_norm": 0.0004217229434289038, "learning_rate": 1.9303866203301475e-06, "loss": 0.0001, "step": 35420 }, { "epoch": 7.695482189400521, "grad_norm": 0.00040780851850286126, "learning_rate": 1.916811468288445e-06, "loss": 0.0001, "step": 35430 }, { "epoch": 7.697654213727194, "grad_norm": 0.0004132968606427312, "learning_rate": 1.9032363162467418e-06, "loss": 0.0001, "step": 35440 }, { "epoch": 7.699826238053866, "grad_norm": 0.0004265105235390365, "learning_rate": 1.8896611642050394e-06, "loss": 0.0058, "step": 35450 }, { "epoch": 7.701998262380538, "grad_norm": 0.0004204078286420554, "learning_rate": 1.8760860121633365e-06, "loss": 0.0001, "step": 35460 }, { "epoch": 7.704170286707211, "grad_norm": 0.0004155028727836907, "learning_rate": 1.8625108601216337e-06, "loss": 0.0, "step": 35470 }, { "epoch": 7.706342311033883, "grad_norm": 0.00041444695671088994, "learning_rate": 1.8489357080799306e-06, "loss": 0.0054, "step": 35480 }, { "epoch": 7.708514335360556, "grad_norm": 0.0004336989077273756, "learning_rate": 1.8353605560382278e-06, "loss": 0.0, "step": 35490 }, { "epoch": 7.710686359687228, "grad_norm": 0.0004059735219925642, "learning_rate": 1.821785403996525e-06, "loss": 0.0001, "step": 35500 }, { "epoch": 7.712858384013901, "grad_norm": 0.0004083770327270031, "learning_rate": 1.808210251954822e-06, "loss": 0.0, "step": 35510 }, { "epoch": 7.715030408340573, "grad_norm": 0.0004174058558419347, "learning_rate": 1.7946350999131192e-06, "loss": 0.0001, "step": 35520 }, { "epoch": 7.717202432667246, "grad_norm": 0.00041737474384717643, "learning_rate": 1.7810599478714163e-06, "loss": 0.0045, "step": 35530 }, { "epoch": 7.719374456993918, "grad_norm": 0.00040731337503530085, "learning_rate": 1.7674847958297135e-06, "loss": 0.0, "step": 35540 }, { "epoch": 7.7215464813205905, "grad_norm": 0.0004167399602010846, "learning_rate": 1.7539096437880104e-06, "loss": 0.0, "step": 35550 }, { "epoch": 7.7237185056472635, "grad_norm": 0.0004124731640331447, "learning_rate": 1.7403344917463076e-06, "loss": 0.0045, "step": 35560 }, { "epoch": 7.7258905299739355, "grad_norm": 0.0004200803814455867, "learning_rate": 1.7267593397046047e-06, "loss": 0.0038, "step": 35570 }, { "epoch": 7.7280625543006085, "grad_norm": 0.0004072072042617947, "learning_rate": 1.7131841876629018e-06, "loss": 0.0001, "step": 35580 }, { "epoch": 7.730234578627281, "grad_norm": 0.0004259563866071403, "learning_rate": 1.699609035621199e-06, "loss": 0.0001, "step": 35590 }, { "epoch": 7.732406602953953, "grad_norm": 0.00041089204023592174, "learning_rate": 1.6860338835794961e-06, "loss": 0.0001, "step": 35600 }, { "epoch": 7.734578627280626, "grad_norm": 0.0007605640566907823, "learning_rate": 1.6724587315377933e-06, "loss": 0.0, "step": 35610 }, { "epoch": 7.736750651607298, "grad_norm": 0.0004129525041207671, "learning_rate": 1.6588835794960904e-06, "loss": 0.0001, "step": 35620 }, { "epoch": 7.738922675933971, "grad_norm": 0.00040990023990161717, "learning_rate": 1.6453084274543873e-06, "loss": 0.0, "step": 35630 }, { "epoch": 7.741094700260643, "grad_norm": 0.0004054093733429909, "learning_rate": 1.6317332754126845e-06, "loss": 0.0, "step": 35640 }, { "epoch": 7.743266724587316, "grad_norm": 0.00047282635932788253, "learning_rate": 1.6181581233709816e-06, "loss": 0.0, "step": 35650 }, { "epoch": 7.745438748913988, "grad_norm": 0.0004282636509742588, "learning_rate": 1.6045829713292792e-06, "loss": 0.0001, "step": 35660 }, { "epoch": 7.747610773240661, "grad_norm": 0.0004196454829070717, "learning_rate": 1.5910078192875761e-06, "loss": 0.0, "step": 35670 }, { "epoch": 7.749782797567333, "grad_norm": 0.00041647738544270396, "learning_rate": 1.5774326672458733e-06, "loss": 0.0052, "step": 35680 }, { "epoch": 7.751954821894005, "grad_norm": 0.00042042401037178934, "learning_rate": 1.5638575152041704e-06, "loss": 0.0, "step": 35690 }, { "epoch": 7.754126846220678, "grad_norm": 0.00040815555257722735, "learning_rate": 1.5502823631624674e-06, "loss": 0.0109, "step": 35700 }, { "epoch": 7.75629887054735, "grad_norm": 0.0008912209304980934, "learning_rate": 1.5367072111207647e-06, "loss": 0.0001, "step": 35710 }, { "epoch": 7.758470894874023, "grad_norm": 0.0004114516486879438, "learning_rate": 1.5231320590790619e-06, "loss": 0.0001, "step": 35720 }, { "epoch": 7.760642919200695, "grad_norm": 0.0004173968336544931, "learning_rate": 1.509556907037359e-06, "loss": 0.0, "step": 35730 }, { "epoch": 7.762814943527368, "grad_norm": 0.0004173132765572518, "learning_rate": 1.4959817549956561e-06, "loss": 0.0001, "step": 35740 }, { "epoch": 7.76498696785404, "grad_norm": 0.0004356455756351352, "learning_rate": 1.482406602953953e-06, "loss": 0.0001, "step": 35750 }, { "epoch": 7.767158992180712, "grad_norm": 0.00044494314352050424, "learning_rate": 1.4688314509122502e-06, "loss": 0.0001, "step": 35760 }, { "epoch": 7.769331016507385, "grad_norm": 0.0004122602695133537, "learning_rate": 1.4552562988705474e-06, "loss": 0.0, "step": 35770 }, { "epoch": 7.771503040834057, "grad_norm": 0.00040964328218251467, "learning_rate": 1.4416811468288445e-06, "loss": 0.0001, "step": 35780 }, { "epoch": 7.77367506516073, "grad_norm": 0.0005291461129672825, "learning_rate": 1.4281059947871417e-06, "loss": 0.0, "step": 35790 }, { "epoch": 7.775847089487402, "grad_norm": 0.0004087795678060502, "learning_rate": 1.4145308427454388e-06, "loss": 0.0, "step": 35800 }, { "epoch": 7.778019113814075, "grad_norm": 0.0006858239066787064, "learning_rate": 1.400955690703736e-06, "loss": 0.0049, "step": 35810 }, { "epoch": 7.780191138140747, "grad_norm": 0.00041144140413962305, "learning_rate": 1.387380538662033e-06, "loss": 0.0001, "step": 35820 }, { "epoch": 7.782363162467419, "grad_norm": 0.00048641389003023505, "learning_rate": 1.3738053866203302e-06, "loss": 0.0001, "step": 35830 }, { "epoch": 7.784535186794092, "grad_norm": 0.0004075106990057975, "learning_rate": 1.3602302345786274e-06, "loss": 0.0034, "step": 35840 }, { "epoch": 7.786707211120764, "grad_norm": 0.0004153190820943564, "learning_rate": 1.3466550825369245e-06, "loss": 0.0001, "step": 35850 }, { "epoch": 7.788879235447437, "grad_norm": 0.0008529599872417748, "learning_rate": 1.3330799304952217e-06, "loss": 0.0001, "step": 35860 }, { "epoch": 7.791051259774109, "grad_norm": 0.00041363760828971863, "learning_rate": 1.3195047784535188e-06, "loss": 0.0, "step": 35870 }, { "epoch": 7.793223284100782, "grad_norm": 0.00042468035826459527, "learning_rate": 1.3059296264118157e-06, "loss": 0.0001, "step": 35880 }, { "epoch": 7.795395308427454, "grad_norm": 0.0013285571476444602, "learning_rate": 1.2923544743701129e-06, "loss": 0.0001, "step": 35890 }, { "epoch": 7.797567332754127, "grad_norm": 0.00040576845640316606, "learning_rate": 1.27877932232841e-06, "loss": 0.0, "step": 35900 }, { "epoch": 7.799739357080799, "grad_norm": 0.0004062264342792332, "learning_rate": 1.2652041702867074e-06, "loss": 0.0, "step": 35910 }, { "epoch": 7.801911381407471, "grad_norm": 0.0004351633251644671, "learning_rate": 1.2516290182450045e-06, "loss": 0.0, "step": 35920 }, { "epoch": 7.804083405734144, "grad_norm": 0.00041670544305816293, "learning_rate": 1.2380538662033017e-06, "loss": 0.0, "step": 35930 }, { "epoch": 7.8062554300608165, "grad_norm": 0.0004084085812792182, "learning_rate": 1.2244787141615986e-06, "loss": 0.0, "step": 35940 }, { "epoch": 7.808427454387489, "grad_norm": 0.0011126359459012747, "learning_rate": 1.2109035621198957e-06, "loss": 0.0049, "step": 35950 }, { "epoch": 7.8105994787141615, "grad_norm": 0.00042124345782212913, "learning_rate": 1.1973284100781929e-06, "loss": 0.0, "step": 35960 }, { "epoch": 7.812771503040834, "grad_norm": 0.00041270238580182195, "learning_rate": 1.18375325803649e-06, "loss": 0.0001, "step": 35970 }, { "epoch": 7.8149435273675065, "grad_norm": 0.00040624625398777425, "learning_rate": 1.1701781059947872e-06, "loss": 0.0001, "step": 35980 }, { "epoch": 7.817115551694179, "grad_norm": 0.0004104378167539835, "learning_rate": 1.1566029539530843e-06, "loss": 0.0001, "step": 35990 }, { "epoch": 7.819287576020852, "grad_norm": 0.0007016230374574661, "learning_rate": 1.1430278019113815e-06, "loss": 0.0047, "step": 36000 }, { "epoch": 7.821459600347524, "grad_norm": 0.0005074520013295114, "learning_rate": 1.1294526498696786e-06, "loss": 0.0001, "step": 36010 }, { "epoch": 7.823631624674197, "grad_norm": 0.0009288796572946012, "learning_rate": 1.1158774978279757e-06, "loss": 0.0001, "step": 36020 }, { "epoch": 7.825803649000869, "grad_norm": 0.0008313364232890308, "learning_rate": 1.1023023457862729e-06, "loss": 0.0, "step": 36030 }, { "epoch": 7.827975673327542, "grad_norm": 0.00041966530261561275, "learning_rate": 1.08872719374457e-06, "loss": 0.0001, "step": 36040 }, { "epoch": 7.830147697654214, "grad_norm": 0.0004062599618919194, "learning_rate": 1.0751520417028672e-06, "loss": 0.0, "step": 36050 }, { "epoch": 7.832319721980886, "grad_norm": 0.0004279286367818713, "learning_rate": 1.0615768896611643e-06, "loss": 0.0001, "step": 36060 }, { "epoch": 7.834491746307559, "grad_norm": 0.00041747227078303695, "learning_rate": 1.0480017376194613e-06, "loss": 0.0, "step": 36070 }, { "epoch": 7.836663770634231, "grad_norm": 0.0004111778107471764, "learning_rate": 1.0344265855777584e-06, "loss": 0.0, "step": 36080 }, { "epoch": 7.838835794960904, "grad_norm": 0.18399830162525177, "learning_rate": 1.0208514335360555e-06, "loss": 0.009, "step": 36090 }, { "epoch": 7.841007819287576, "grad_norm": 0.0004036433238070458, "learning_rate": 1.0072762814943527e-06, "loss": 0.0, "step": 36100 }, { "epoch": 7.843179843614249, "grad_norm": 0.0004178290255367756, "learning_rate": 9.9370112945265e-07, "loss": 0.0001, "step": 36110 }, { "epoch": 7.845351867940921, "grad_norm": 0.00040722807170823216, "learning_rate": 9.801259774109472e-07, "loss": 0.0, "step": 36120 }, { "epoch": 7.847523892267594, "grad_norm": 0.00040777475805953145, "learning_rate": 9.665508253692443e-07, "loss": 0.006, "step": 36130 }, { "epoch": 7.849695916594266, "grad_norm": 0.00040998373879119754, "learning_rate": 9.529756733275414e-07, "loss": 0.0001, "step": 36140 }, { "epoch": 7.851867940920938, "grad_norm": 0.00065061473287642, "learning_rate": 9.394005212858384e-07, "loss": 0.0001, "step": 36150 }, { "epoch": 7.854039965247611, "grad_norm": 0.0006581631605513394, "learning_rate": 9.258253692441356e-07, "loss": 0.0001, "step": 36160 }, { "epoch": 7.856211989574283, "grad_norm": 0.00040802801959216595, "learning_rate": 9.122502172024327e-07, "loss": 0.0, "step": 36170 }, { "epoch": 7.858384013900956, "grad_norm": 0.0004146042338106781, "learning_rate": 8.986750651607298e-07, "loss": 0.0051, "step": 36180 }, { "epoch": 7.860556038227628, "grad_norm": 0.0004056603938806802, "learning_rate": 8.850999131190269e-07, "loss": 0.0067, "step": 36190 }, { "epoch": 7.8627280625543, "grad_norm": 0.0006893807440064847, "learning_rate": 8.71524761077324e-07, "loss": 0.0001, "step": 36200 }, { "epoch": 7.864900086880973, "grad_norm": 0.00041687351767905056, "learning_rate": 8.579496090356213e-07, "loss": 0.0, "step": 36210 }, { "epoch": 7.867072111207645, "grad_norm": 0.0004174001805949956, "learning_rate": 8.443744569939184e-07, "loss": 0.0001, "step": 36220 }, { "epoch": 7.869244135534318, "grad_norm": 0.00040999799966812134, "learning_rate": 8.307993049522156e-07, "loss": 0.0, "step": 36230 }, { "epoch": 7.87141615986099, "grad_norm": 0.0004175525682512671, "learning_rate": 8.172241529105127e-07, "loss": 0.0, "step": 36240 }, { "epoch": 7.873588184187663, "grad_norm": 0.0010128975845873356, "learning_rate": 8.036490008688097e-07, "loss": 0.0001, "step": 36250 }, { "epoch": 7.875760208514335, "grad_norm": 0.0007056336617097259, "learning_rate": 7.900738488271069e-07, "loss": 0.0051, "step": 36260 }, { "epoch": 7.877932232841008, "grad_norm": 0.0004121058445889503, "learning_rate": 7.76498696785404e-07, "loss": 0.0001, "step": 36270 }, { "epoch": 7.88010425716768, "grad_norm": 0.0004146612773183733, "learning_rate": 7.629235447437012e-07, "loss": 0.0, "step": 36280 }, { "epoch": 7.882276281494352, "grad_norm": 0.00040536164306104183, "learning_rate": 7.493483927019983e-07, "loss": 0.0001, "step": 36290 }, { "epoch": 7.884448305821025, "grad_norm": 0.0005756246391683817, "learning_rate": 7.357732406602955e-07, "loss": 0.0001, "step": 36300 }, { "epoch": 7.886620330147697, "grad_norm": 0.0009296848438680172, "learning_rate": 7.221980886185926e-07, "loss": 0.0001, "step": 36310 }, { "epoch": 7.88879235447437, "grad_norm": 0.000410063483286649, "learning_rate": 7.086229365768896e-07, "loss": 0.0001, "step": 36320 }, { "epoch": 7.890964378801042, "grad_norm": 0.0007830615504644811, "learning_rate": 6.950477845351868e-07, "loss": 0.0, "step": 36330 }, { "epoch": 7.893136403127715, "grad_norm": 0.00046702235704287887, "learning_rate": 6.81472632493484e-07, "loss": 0.0, "step": 36340 }, { "epoch": 7.895308427454387, "grad_norm": 0.00041825513471849263, "learning_rate": 6.678974804517811e-07, "loss": 0.0, "step": 36350 }, { "epoch": 7.89748045178106, "grad_norm": 0.0004378632584121078, "learning_rate": 6.543223284100782e-07, "loss": 0.0, "step": 36360 }, { "epoch": 7.8996524761077325, "grad_norm": 0.00043384116725064814, "learning_rate": 6.407471763683754e-07, "loss": 0.0001, "step": 36370 }, { "epoch": 7.9018245004344045, "grad_norm": 0.00041194152436219156, "learning_rate": 6.271720243266724e-07, "loss": 0.0, "step": 36380 }, { "epoch": 7.9039965247610775, "grad_norm": 0.00041512143798172474, "learning_rate": 6.135968722849696e-07, "loss": 0.0001, "step": 36390 }, { "epoch": 7.90616854908775, "grad_norm": 0.0007027858518995345, "learning_rate": 6.000217202432668e-07, "loss": 0.0001, "step": 36400 }, { "epoch": 7.9083405734144225, "grad_norm": 0.0007233781507238746, "learning_rate": 5.864465682015638e-07, "loss": 0.0001, "step": 36410 }, { "epoch": 7.910512597741095, "grad_norm": 0.00040531178819946945, "learning_rate": 5.72871416159861e-07, "loss": 0.0063, "step": 36420 }, { "epoch": 7.912684622067767, "grad_norm": 0.0006673138123005629, "learning_rate": 5.592962641181581e-07, "loss": 0.0054, "step": 36430 }, { "epoch": 7.91485664639444, "grad_norm": 0.0004134076298214495, "learning_rate": 5.457211120764553e-07, "loss": 0.0, "step": 36440 }, { "epoch": 7.917028670721112, "grad_norm": 0.0006809644983150065, "learning_rate": 5.321459600347524e-07, "loss": 0.0001, "step": 36450 }, { "epoch": 7.919200695047785, "grad_norm": 0.0010297917760908604, "learning_rate": 5.185708079930495e-07, "loss": 0.0001, "step": 36460 }, { "epoch": 7.921372719374457, "grad_norm": 0.00040686517604626715, "learning_rate": 5.049956559513467e-07, "loss": 0.0, "step": 36470 }, { "epoch": 7.92354474370113, "grad_norm": 0.00040707376319915056, "learning_rate": 4.914205039096437e-07, "loss": 0.0, "step": 36480 }, { "epoch": 7.925716768027802, "grad_norm": 0.00041068848804570735, "learning_rate": 4.77845351867941e-07, "loss": 0.0, "step": 36490 }, { "epoch": 7.927888792354475, "grad_norm": 0.00040782021824270487, "learning_rate": 4.6427019982623807e-07, "loss": 0.0001, "step": 36500 }, { "epoch": 7.930060816681147, "grad_norm": 0.000491233600769192, "learning_rate": 4.506950477845352e-07, "loss": 0.0001, "step": 36510 }, { "epoch": 7.932232841007819, "grad_norm": 0.0004565462877508253, "learning_rate": 4.371198957428323e-07, "loss": 0.0042, "step": 36520 }, { "epoch": 7.934404865334492, "grad_norm": 0.00040316057857126, "learning_rate": 4.2354474370112945e-07, "loss": 0.0001, "step": 36530 }, { "epoch": 7.936576889661164, "grad_norm": 0.0011551212519407272, "learning_rate": 4.0996959165942665e-07, "loss": 0.0088, "step": 36540 }, { "epoch": 7.938748913987837, "grad_norm": 0.00041238832636736333, "learning_rate": 3.9639443961772374e-07, "loss": 0.0, "step": 36550 }, { "epoch": 7.940920938314509, "grad_norm": 0.0004119895747862756, "learning_rate": 3.828192875760209e-07, "loss": 0.0, "step": 36560 }, { "epoch": 7.943092962641182, "grad_norm": 0.00041164946742355824, "learning_rate": 3.6924413553431797e-07, "loss": 0.0001, "step": 36570 }, { "epoch": 7.945264986967854, "grad_norm": 0.00064078503055498, "learning_rate": 3.556689834926151e-07, "loss": 0.0001, "step": 36580 }, { "epoch": 7.947437011294527, "grad_norm": 0.00040421413723379374, "learning_rate": 3.4209383145091226e-07, "loss": 0.0001, "step": 36590 }, { "epoch": 7.949609035621199, "grad_norm": 0.0010148753644898534, "learning_rate": 3.285186794092094e-07, "loss": 0.0001, "step": 36600 }, { "epoch": 7.951781059947871, "grad_norm": 0.0004108991415705532, "learning_rate": 3.1494352736750655e-07, "loss": 0.0, "step": 36610 }, { "epoch": 7.953953084274544, "grad_norm": 0.0004116395430173725, "learning_rate": 3.0136837532580364e-07, "loss": 0.0, "step": 36620 }, { "epoch": 7.956125108601216, "grad_norm": 0.00040760665433481336, "learning_rate": 2.877932232841008e-07, "loss": 0.0, "step": 36630 }, { "epoch": 7.958297132927889, "grad_norm": 0.000540865701623261, "learning_rate": 2.7421807124239793e-07, "loss": 0.0, "step": 36640 }, { "epoch": 7.960469157254561, "grad_norm": 0.0004090243310201913, "learning_rate": 2.6064291920069507e-07, "loss": 0.0, "step": 36650 }, { "epoch": 7.962641181581233, "grad_norm": 0.0004173625202383846, "learning_rate": 2.470677671589922e-07, "loss": 0.0001, "step": 36660 }, { "epoch": 7.964813205907906, "grad_norm": 0.00040882351459003985, "learning_rate": 2.334926151172893e-07, "loss": 0.0, "step": 36670 }, { "epoch": 7.966985230234578, "grad_norm": 0.0013148378347977996, "learning_rate": 2.1991746307558648e-07, "loss": 0.0001, "step": 36680 }, { "epoch": 7.969157254561251, "grad_norm": 0.00040579578489996493, "learning_rate": 2.063423110338836e-07, "loss": 0.0049, "step": 36690 }, { "epoch": 7.971329278887923, "grad_norm": 0.0004032440483570099, "learning_rate": 1.927671589921807e-07, "loss": 0.0, "step": 36700 }, { "epoch": 7.973501303214596, "grad_norm": 0.00041121779941022396, "learning_rate": 1.7919200695047785e-07, "loss": 0.0001, "step": 36710 }, { "epoch": 7.975673327541268, "grad_norm": 0.000417822360759601, "learning_rate": 1.65616854908775e-07, "loss": 0.004, "step": 36720 }, { "epoch": 7.977845351867941, "grad_norm": 0.0004095873446203768, "learning_rate": 1.5204170286707212e-07, "loss": 0.0, "step": 36730 }, { "epoch": 7.980017376194613, "grad_norm": 0.0004114782204851508, "learning_rate": 1.3846655082536923e-07, "loss": 0.0101, "step": 36740 }, { "epoch": 7.9821894005212854, "grad_norm": 0.0004039192572236061, "learning_rate": 1.2489139878366638e-07, "loss": 0.0001, "step": 36750 }, { "epoch": 7.984361424847958, "grad_norm": 0.0007135707419365644, "learning_rate": 1.1131624674196352e-07, "loss": 0.0001, "step": 36760 }, { "epoch": 7.9865334491746305, "grad_norm": 0.0004933361196890473, "learning_rate": 9.774109470026065e-08, "loss": 0.0, "step": 36770 }, { "epoch": 7.9887054735013034, "grad_norm": 0.23448993265628815, "learning_rate": 8.416594265855778e-08, "loss": 0.0055, "step": 36780 }, { "epoch": 7.9908774978279755, "grad_norm": 0.0004083155654370785, "learning_rate": 7.059079061685491e-08, "loss": 0.0001, "step": 36790 }, { "epoch": 7.9930495221546485, "grad_norm": 0.000955652620177716, "learning_rate": 5.7015638575152043e-08, "loss": 0.0001, "step": 36800 }, { "epoch": 7.9952215464813206, "grad_norm": 0.0004135241615585983, "learning_rate": 4.3440486533449174e-08, "loss": 0.0001, "step": 36810 }, { "epoch": 7.9973935708079935, "grad_norm": 0.00041545607382431626, "learning_rate": 2.986533449174631e-08, "loss": 0.0001, "step": 36820 }, { "epoch": 7.999565595134666, "grad_norm": 0.00040375188109464943, "learning_rate": 1.6290182450043442e-08, "loss": 0.0, "step": 36830 }, { "epoch": 8.0, "eval_f1": 0.5962264150943396, "eval_loss": 0.08886083960533142, "eval_runtime": 84.061, "eval_samples_per_second": 118.664, "eval_steps_per_second": 7.423, "step": 36832 }, { "epoch": 8.001737619461338, "grad_norm": 0.0004186915175523609, "learning_rate": 1.6668476686938895e-05, "loss": 0.0, "step": 36840 }, { "epoch": 8.00390964378801, "grad_norm": 0.0004101640952285379, "learning_rate": 1.665942658557776e-05, "loss": 0.0, "step": 36850 }, { "epoch": 8.006081668114684, "grad_norm": 0.00040353488293476403, "learning_rate": 1.6650376484216623e-05, "loss": 0.004, "step": 36860 }, { "epoch": 8.008253692441356, "grad_norm": 0.0004050274728797376, "learning_rate": 1.6641326382855488e-05, "loss": 0.0, "step": 36870 }, { "epoch": 8.010425716768028, "grad_norm": 0.000409485975978896, "learning_rate": 1.6632276281494352e-05, "loss": 0.0, "step": 36880 }, { "epoch": 8.0125977410947, "grad_norm": 0.0004029431729577482, "learning_rate": 1.662322618013322e-05, "loss": 0.0, "step": 36890 }, { "epoch": 8.014769765421372, "grad_norm": 0.0004054335586261004, "learning_rate": 1.6614176078772084e-05, "loss": 0.0001, "step": 36900 }, { "epoch": 8.016941789748046, "grad_norm": 0.001848601852543652, "learning_rate": 1.660512597741095e-05, "loss": 0.0, "step": 36910 }, { "epoch": 8.019113814074718, "grad_norm": 0.00040781169082038105, "learning_rate": 1.6596075876049813e-05, "loss": 0.0001, "step": 36920 }, { "epoch": 8.02128583840139, "grad_norm": 0.0006863618618808687, "learning_rate": 1.6587025774688677e-05, "loss": 0.0, "step": 36930 }, { "epoch": 8.023457862728062, "grad_norm": 0.0003956135187763721, "learning_rate": 1.657797567332754e-05, "loss": 0.0001, "step": 36940 }, { "epoch": 8.025629887054736, "grad_norm": 0.0007481368957087398, "learning_rate": 1.6568925571966406e-05, "loss": 0.0, "step": 36950 }, { "epoch": 8.027801911381408, "grad_norm": 0.0006480406154878438, "learning_rate": 1.655987547060527e-05, "loss": 0.0001, "step": 36960 }, { "epoch": 8.02997393570808, "grad_norm": 0.00040580949280411005, "learning_rate": 1.6550825369244134e-05, "loss": 0.0, "step": 36970 }, { "epoch": 8.032145960034752, "grad_norm": 0.0004226068267598748, "learning_rate": 1.6541775267883e-05, "loss": 0.0, "step": 36980 }, { "epoch": 8.034317984361424, "grad_norm": 0.0007375205168500543, "learning_rate": 1.6532725166521863e-05, "loss": 0.0034, "step": 36990 }, { "epoch": 8.036490008688098, "grad_norm": 0.0003932391991838813, "learning_rate": 1.652367506516073e-05, "loss": 0.0, "step": 37000 }, { "epoch": 8.03866203301477, "grad_norm": 0.00039350654697045684, "learning_rate": 1.6514624963799595e-05, "loss": 0.0, "step": 37010 }, { "epoch": 8.040834057341442, "grad_norm": 0.0003966047952417284, "learning_rate": 1.650557486243846e-05, "loss": 0.0, "step": 37020 }, { "epoch": 8.043006081668114, "grad_norm": 0.00040773957152850926, "learning_rate": 1.6496524761077324e-05, "loss": 0.0001, "step": 37030 }, { "epoch": 8.045178105994788, "grad_norm": 0.0004274372477084398, "learning_rate": 1.648747465971619e-05, "loss": 0.0, "step": 37040 }, { "epoch": 8.04735013032146, "grad_norm": 0.0003937635920010507, "learning_rate": 1.6478424558355056e-05, "loss": 0.0001, "step": 37050 }, { "epoch": 8.049522154648132, "grad_norm": 0.0006448153289966285, "learning_rate": 1.646937445699392e-05, "loss": 0.0001, "step": 37060 }, { "epoch": 8.051694178974804, "grad_norm": 0.3185959756374359, "learning_rate": 1.6460324355632785e-05, "loss": 0.005, "step": 37070 }, { "epoch": 8.053866203301476, "grad_norm": 0.00040226714918389916, "learning_rate": 1.645127425427165e-05, "loss": 0.0064, "step": 37080 }, { "epoch": 8.05603822762815, "grad_norm": 0.00040559517219662666, "learning_rate": 1.6442224152910513e-05, "loss": 0.0028, "step": 37090 }, { "epoch": 8.058210251954822, "grad_norm": 0.0003909075167030096, "learning_rate": 1.6433174051549378e-05, "loss": 0.0, "step": 37100 }, { "epoch": 8.060382276281494, "grad_norm": 0.0004615616344381124, "learning_rate": 1.6424123950188242e-05, "loss": 0.0001, "step": 37110 }, { "epoch": 8.062554300608166, "grad_norm": 0.0003968609671574086, "learning_rate": 1.641507384882711e-05, "loss": 0.0049, "step": 37120 }, { "epoch": 8.064726324934838, "grad_norm": 0.00038003415102139115, "learning_rate": 1.6406023747465974e-05, "loss": 0.0, "step": 37130 }, { "epoch": 8.066898349261512, "grad_norm": 0.00037840873119421303, "learning_rate": 1.639697364610484e-05, "loss": 0.0, "step": 37140 }, { "epoch": 8.069070373588184, "grad_norm": 0.0003778162645176053, "learning_rate": 1.6387923544743703e-05, "loss": 0.0025, "step": 37150 }, { "epoch": 8.071242397914856, "grad_norm": 0.00038293536636047065, "learning_rate": 1.6378873443382567e-05, "loss": 0.0, "step": 37160 }, { "epoch": 8.073414422241528, "grad_norm": 0.0005740802153013647, "learning_rate": 1.636982334202143e-05, "loss": 0.0002, "step": 37170 }, { "epoch": 8.075586446568202, "grad_norm": 0.00036886456655338407, "learning_rate": 1.6360773240660296e-05, "loss": 0.0, "step": 37180 }, { "epoch": 8.077758470894874, "grad_norm": 0.0003802602586802095, "learning_rate": 1.635172313929916e-05, "loss": 0.0, "step": 37190 }, { "epoch": 8.079930495221546, "grad_norm": 0.0006068776128813624, "learning_rate": 1.6342673037938024e-05, "loss": 0.0001, "step": 37200 }, { "epoch": 8.082102519548219, "grad_norm": 0.0003752399352379143, "learning_rate": 1.633362293657689e-05, "loss": 0.0001, "step": 37210 }, { "epoch": 8.08427454387489, "grad_norm": 0.00037461266038008034, "learning_rate": 1.6324572835215753e-05, "loss": 0.0107, "step": 37220 }, { "epoch": 8.086446568201564, "grad_norm": 0.000390580331441015, "learning_rate": 1.631552273385462e-05, "loss": 0.0, "step": 37230 }, { "epoch": 8.088618592528237, "grad_norm": 0.0003792881325352937, "learning_rate": 1.6306472632493485e-05, "loss": 0.0, "step": 37240 }, { "epoch": 8.090790616854909, "grad_norm": 0.00038958745426498353, "learning_rate": 1.629742253113235e-05, "loss": 0.0065, "step": 37250 }, { "epoch": 8.09296264118158, "grad_norm": 0.0004001133784186095, "learning_rate": 1.6288372429771214e-05, "loss": 0.0, "step": 37260 }, { "epoch": 8.095134665508255, "grad_norm": 0.00038350385148078203, "learning_rate": 1.6279322328410078e-05, "loss": 0.0065, "step": 37270 }, { "epoch": 8.097306689834927, "grad_norm": 0.0003897896967828274, "learning_rate": 1.6270272227048942e-05, "loss": 0.0, "step": 37280 }, { "epoch": 8.099478714161599, "grad_norm": 0.0007525041582994163, "learning_rate": 1.6261222125687807e-05, "loss": 0.0, "step": 37290 }, { "epoch": 8.10165073848827, "grad_norm": 0.00039500088314525783, "learning_rate": 1.6252172024326675e-05, "loss": 0.0, "step": 37300 }, { "epoch": 8.103822762814943, "grad_norm": 0.000395832525100559, "learning_rate": 1.624312192296554e-05, "loss": 0.0, "step": 37310 }, { "epoch": 8.105994787141617, "grad_norm": 0.0004039072955492884, "learning_rate": 1.6234071821604403e-05, "loss": 0.0, "step": 37320 }, { "epoch": 8.108166811468289, "grad_norm": 0.00039355579065158963, "learning_rate": 1.6225021720243268e-05, "loss": 0.0044, "step": 37330 }, { "epoch": 8.11033883579496, "grad_norm": 0.0004178201488684863, "learning_rate": 1.6215971618882132e-05, "loss": 0.0016, "step": 37340 }, { "epoch": 8.112510860121633, "grad_norm": 0.00037369929486885667, "learning_rate": 1.6206921517521e-05, "loss": 0.0, "step": 37350 }, { "epoch": 8.114682884448305, "grad_norm": 0.0003739323001354933, "learning_rate": 1.6197871416159864e-05, "loss": 0.0001, "step": 37360 }, { "epoch": 8.116854908774979, "grad_norm": 0.00037694178172387183, "learning_rate": 1.6188821314798728e-05, "loss": 0.0, "step": 37370 }, { "epoch": 8.119026933101651, "grad_norm": 0.0003731514443643391, "learning_rate": 1.6179771213437593e-05, "loss": 0.0, "step": 37380 }, { "epoch": 8.121198957428323, "grad_norm": 0.00037904002238065004, "learning_rate": 1.6170721112076457e-05, "loss": 0.0, "step": 37390 }, { "epoch": 8.123370981754995, "grad_norm": 0.00039728908450342715, "learning_rate": 1.616167101071532e-05, "loss": 0.0086, "step": 37400 }, { "epoch": 8.125543006081669, "grad_norm": 0.00038232721271924675, "learning_rate": 1.6152620909354186e-05, "loss": 0.0033, "step": 37410 }, { "epoch": 8.127715030408341, "grad_norm": 1.541982650756836, "learning_rate": 1.614357080799305e-05, "loss": 0.0008, "step": 37420 }, { "epoch": 8.129887054735013, "grad_norm": 0.00039564137114211917, "learning_rate": 1.6134520706631914e-05, "loss": 0.035, "step": 37430 }, { "epoch": 8.132059079061685, "grad_norm": 0.0007069563725963235, "learning_rate": 1.612547060527078e-05, "loss": 0.1284, "step": 37440 }, { "epoch": 8.134231103388357, "grad_norm": 1.2225791215896606, "learning_rate": 1.6116420503909643e-05, "loss": 0.0416, "step": 37450 }, { "epoch": 8.136403127715031, "grad_norm": 0.23964758217334747, "learning_rate": 1.610737040254851e-05, "loss": 0.0642, "step": 37460 }, { "epoch": 8.138575152041703, "grad_norm": 0.3129146099090576, "learning_rate": 1.6098320301187375e-05, "loss": 0.0416, "step": 37470 }, { "epoch": 8.140747176368375, "grad_norm": 0.00330977700650692, "learning_rate": 1.608927019982624e-05, "loss": 0.0241, "step": 37480 }, { "epoch": 8.142919200695047, "grad_norm": 0.0025991355068981647, "learning_rate": 1.6080220098465104e-05, "loss": 0.0003, "step": 37490 }, { "epoch": 8.145091225021721, "grad_norm": 0.00236403476446867, "learning_rate": 1.6071169997103968e-05, "loss": 0.0165, "step": 37500 }, { "epoch": 8.147263249348393, "grad_norm": 0.6998372077941895, "learning_rate": 1.6062119895742832e-05, "loss": 0.0019, "step": 37510 }, { "epoch": 8.149435273675065, "grad_norm": 2.8501572608947754, "learning_rate": 1.6053069794381697e-05, "loss": 0.0155, "step": 37520 }, { "epoch": 8.151607298001737, "grad_norm": 0.19623400270938873, "learning_rate": 1.604401969302056e-05, "loss": 0.0515, "step": 37530 }, { "epoch": 8.15377932232841, "grad_norm": 0.010287540033459663, "learning_rate": 1.6034969591659425e-05, "loss": 0.0021, "step": 37540 }, { "epoch": 8.155951346655083, "grad_norm": 0.07258111238479614, "learning_rate": 1.602591949029829e-05, "loss": 0.0462, "step": 37550 }, { "epoch": 8.158123370981755, "grad_norm": 0.013868864625692368, "learning_rate": 1.6016869388937154e-05, "loss": 0.0072, "step": 37560 }, { "epoch": 8.160295395308427, "grad_norm": 0.027811523526906967, "learning_rate": 1.6007819287576022e-05, "loss": 0.0009, "step": 37570 }, { "epoch": 8.1624674196351, "grad_norm": 0.003218551864847541, "learning_rate": 1.5998769186214886e-05, "loss": 0.0004, "step": 37580 }, { "epoch": 8.164639443961772, "grad_norm": 0.0022526709362864494, "learning_rate": 1.598971908485375e-05, "loss": 0.0002, "step": 37590 }, { "epoch": 8.166811468288445, "grad_norm": 0.002147641032934189, "learning_rate": 1.5980668983492615e-05, "loss": 0.0003, "step": 37600 }, { "epoch": 8.168983492615117, "grad_norm": 0.0033935250248759985, "learning_rate": 1.5971618882131483e-05, "loss": 0.0004, "step": 37610 }, { "epoch": 8.17115551694179, "grad_norm": 0.001787520362995565, "learning_rate": 1.5962568780770347e-05, "loss": 0.0002, "step": 37620 }, { "epoch": 8.173327541268462, "grad_norm": 0.0018216808093711734, "learning_rate": 1.595351867940921e-05, "loss": 0.0012, "step": 37630 }, { "epoch": 8.175499565595135, "grad_norm": 0.0020013265311717987, "learning_rate": 1.5944468578048076e-05, "loss": 0.0003, "step": 37640 }, { "epoch": 8.177671589921808, "grad_norm": 0.0020018210634589195, "learning_rate": 1.593541847668694e-05, "loss": 0.0002, "step": 37650 }, { "epoch": 8.17984361424848, "grad_norm": 0.0015087234787642956, "learning_rate": 1.5926368375325804e-05, "loss": 0.0006, "step": 37660 }, { "epoch": 8.182015638575152, "grad_norm": 0.005730865523219109, "learning_rate": 1.591731827396467e-05, "loss": 0.0013, "step": 37670 }, { "epoch": 8.184187662901824, "grad_norm": 0.0019866772927343845, "learning_rate": 1.5908268172603533e-05, "loss": 0.0031, "step": 37680 }, { "epoch": 8.186359687228498, "grad_norm": 0.001469621085561812, "learning_rate": 1.58992180712424e-05, "loss": 0.0216, "step": 37690 }, { "epoch": 8.18853171155517, "grad_norm": 0.0011489508906379342, "learning_rate": 1.5890167969881265e-05, "loss": 0.0001, "step": 37700 }, { "epoch": 8.190703735881842, "grad_norm": 0.0011148941703140736, "learning_rate": 1.588111786852013e-05, "loss": 0.0045, "step": 37710 }, { "epoch": 8.192875760208514, "grad_norm": 0.001104211900383234, "learning_rate": 1.5872067767158994e-05, "loss": 0.0006, "step": 37720 }, { "epoch": 8.195047784535188, "grad_norm": 0.001130530028603971, "learning_rate": 1.5863017665797858e-05, "loss": 0.0002, "step": 37730 }, { "epoch": 8.19721980886186, "grad_norm": 0.0012663186062127352, "learning_rate": 1.5853967564436722e-05, "loss": 0.0001, "step": 37740 }, { "epoch": 8.199391833188532, "grad_norm": 0.0013724026503041387, "learning_rate": 1.5844917463075587e-05, "loss": 0.0001, "step": 37750 }, { "epoch": 8.201563857515204, "grad_norm": 0.0024533451069146395, "learning_rate": 1.583586736171445e-05, "loss": 0.0001, "step": 37760 }, { "epoch": 8.203735881841876, "grad_norm": 0.001503446139395237, "learning_rate": 1.5826817260353315e-05, "loss": 0.0211, "step": 37770 }, { "epoch": 8.20590790616855, "grad_norm": 0.000942540296819061, "learning_rate": 1.581776715899218e-05, "loss": 0.0001, "step": 37780 }, { "epoch": 8.208079930495222, "grad_norm": 0.001057905494235456, "learning_rate": 1.5808717057631044e-05, "loss": 0.039, "step": 37790 }, { "epoch": 8.210251954821894, "grad_norm": 0.009021256119012833, "learning_rate": 1.5799666956269912e-05, "loss": 0.0001, "step": 37800 }, { "epoch": 8.212423979148566, "grad_norm": 0.0017717586597427726, "learning_rate": 1.5790616854908776e-05, "loss": 0.0002, "step": 37810 }, { "epoch": 8.214596003475238, "grad_norm": 1.0658934116363525, "learning_rate": 1.578156675354764e-05, "loss": 0.0011, "step": 37820 }, { "epoch": 8.216768027801912, "grad_norm": 0.0011891911271959543, "learning_rate": 1.5772516652186505e-05, "loss": 0.0001, "step": 37830 }, { "epoch": 8.218940052128584, "grad_norm": 0.001057581976056099, "learning_rate": 1.576346655082537e-05, "loss": 0.0001, "step": 37840 }, { "epoch": 8.221112076455256, "grad_norm": 0.0015104110352694988, "learning_rate": 1.5754416449464234e-05, "loss": 0.0001, "step": 37850 }, { "epoch": 8.223284100781928, "grad_norm": 0.001172617427073419, "learning_rate": 1.5745366348103098e-05, "loss": 0.0001, "step": 37860 }, { "epoch": 8.225456125108602, "grad_norm": 0.0010320774745196104, "learning_rate": 1.5736316246741966e-05, "loss": 0.0002, "step": 37870 }, { "epoch": 8.227628149435274, "grad_norm": 0.0011193450773134828, "learning_rate": 1.572726614538083e-05, "loss": 0.0001, "step": 37880 }, { "epoch": 8.229800173761946, "grad_norm": 0.0009711516322568059, "learning_rate": 1.5718216044019694e-05, "loss": 0.0001, "step": 37890 }, { "epoch": 8.231972198088618, "grad_norm": 0.0009323352132923901, "learning_rate": 1.570916594265856e-05, "loss": 0.0001, "step": 37900 }, { "epoch": 8.23414422241529, "grad_norm": 0.0009416076354682446, "learning_rate": 1.5700115841297423e-05, "loss": 0.0037, "step": 37910 }, { "epoch": 8.236316246741964, "grad_norm": 0.0008424947736784816, "learning_rate": 1.569106573993629e-05, "loss": 0.0001, "step": 37920 }, { "epoch": 8.238488271068636, "grad_norm": 0.36671534180641174, "learning_rate": 1.5682015638575155e-05, "loss": 0.0035, "step": 37930 }, { "epoch": 8.240660295395308, "grad_norm": 0.0008022770052775741, "learning_rate": 1.567296553721402e-05, "loss": 0.0001, "step": 37940 }, { "epoch": 8.24283231972198, "grad_norm": 0.0008299489854834974, "learning_rate": 1.5663915435852884e-05, "loss": 0.0001, "step": 37950 }, { "epoch": 8.245004344048652, "grad_norm": 0.0008080134284682572, "learning_rate": 1.5654865334491748e-05, "loss": 0.0001, "step": 37960 }, { "epoch": 8.247176368375326, "grad_norm": 0.0008203451288864017, "learning_rate": 1.5645815233130612e-05, "loss": 0.0001, "step": 37970 }, { "epoch": 8.249348392701998, "grad_norm": 0.0008341401698999107, "learning_rate": 1.5636765131769477e-05, "loss": 0.0001, "step": 37980 }, { "epoch": 8.25152041702867, "grad_norm": 0.0007914734305813909, "learning_rate": 1.562771503040834e-05, "loss": 0.0001, "step": 37990 }, { "epoch": 8.253692441355343, "grad_norm": 0.0007827221415936947, "learning_rate": 1.5618664929047205e-05, "loss": 0.0001, "step": 38000 }, { "epoch": 8.255864465682016, "grad_norm": 0.000880706706084311, "learning_rate": 1.560961482768607e-05, "loss": 0.0001, "step": 38010 }, { "epoch": 8.258036490008688, "grad_norm": 0.0007812479743734002, "learning_rate": 1.5600564726324934e-05, "loss": 0.0001, "step": 38020 }, { "epoch": 8.26020851433536, "grad_norm": 0.0007665276643820107, "learning_rate": 1.5591514624963802e-05, "loss": 0.0001, "step": 38030 }, { "epoch": 8.262380538662033, "grad_norm": 0.0007834586431272328, "learning_rate": 1.5582464523602666e-05, "loss": 0.0001, "step": 38040 }, { "epoch": 8.264552562988705, "grad_norm": 0.000701862561982125, "learning_rate": 1.557341442224153e-05, "loss": 0.0052, "step": 38050 }, { "epoch": 8.266724587315379, "grad_norm": 0.0008326490060426295, "learning_rate": 1.5564364320880395e-05, "loss": 0.0001, "step": 38060 }, { "epoch": 8.26889661164205, "grad_norm": 0.0007477857870981097, "learning_rate": 1.555531421951926e-05, "loss": 0.0001, "step": 38070 }, { "epoch": 8.271068635968723, "grad_norm": 0.0017798726912587881, "learning_rate": 1.5546264118158123e-05, "loss": 0.001, "step": 38080 }, { "epoch": 8.273240660295395, "grad_norm": 0.0012174885487183928, "learning_rate": 1.5537214016796988e-05, "loss": 0.0001, "step": 38090 }, { "epoch": 8.275412684622069, "grad_norm": 0.0007682919967919588, "learning_rate": 1.5528163915435852e-05, "loss": 0.001, "step": 38100 }, { "epoch": 8.27758470894874, "grad_norm": 0.0007978286594152451, "learning_rate": 1.5519113814074716e-05, "loss": 0.0001, "step": 38110 }, { "epoch": 8.279756733275413, "grad_norm": 0.0008347800467163324, "learning_rate": 1.551006371271358e-05, "loss": 0.0001, "step": 38120 }, { "epoch": 8.281928757602085, "grad_norm": 0.0007353154360316694, "learning_rate": 1.5501013611352445e-05, "loss": 0.0001, "step": 38130 }, { "epoch": 8.284100781928757, "grad_norm": 0.0007409664103761315, "learning_rate": 1.5491963509991313e-05, "loss": 0.0002, "step": 38140 }, { "epoch": 8.28627280625543, "grad_norm": 0.000824456918053329, "learning_rate": 1.5482913408630177e-05, "loss": 0.0001, "step": 38150 }, { "epoch": 8.288444830582103, "grad_norm": 0.0006615397287532687, "learning_rate": 1.547386330726904e-05, "loss": 0.0001, "step": 38160 }, { "epoch": 8.290616854908775, "grad_norm": 0.000863892724737525, "learning_rate": 1.546481320590791e-05, "loss": 0.0025, "step": 38170 }, { "epoch": 8.292788879235447, "grad_norm": 0.002789223100990057, "learning_rate": 1.5455763104546774e-05, "loss": 0.0001, "step": 38180 }, { "epoch": 8.29496090356212, "grad_norm": 0.0010462104110047221, "learning_rate": 1.5446713003185638e-05, "loss": 0.0001, "step": 38190 }, { "epoch": 8.297132927888793, "grad_norm": 0.0007007081294432282, "learning_rate": 1.5437662901824502e-05, "loss": 0.0001, "step": 38200 }, { "epoch": 8.299304952215465, "grad_norm": 0.0006954580312594771, "learning_rate": 1.5428612800463367e-05, "loss": 0.0001, "step": 38210 }, { "epoch": 8.301476976542137, "grad_norm": 0.0007824657950550318, "learning_rate": 1.541956269910223e-05, "loss": 0.0001, "step": 38220 }, { "epoch": 8.303649000868809, "grad_norm": 0.0009597129537723958, "learning_rate": 1.5410512597741095e-05, "loss": 0.0001, "step": 38230 }, { "epoch": 8.305821025195483, "grad_norm": 0.0038668003398925066, "learning_rate": 1.540146249637996e-05, "loss": 0.0001, "step": 38240 }, { "epoch": 8.307993049522155, "grad_norm": 0.0006443361635319889, "learning_rate": 1.5392412395018824e-05, "loss": 0.0001, "step": 38250 }, { "epoch": 8.310165073848827, "grad_norm": 0.0010253982618451118, "learning_rate": 1.5383362293657692e-05, "loss": 0.0008, "step": 38260 }, { "epoch": 8.3123370981755, "grad_norm": 0.0017492685001343489, "learning_rate": 1.5374312192296556e-05, "loss": 0.0001, "step": 38270 }, { "epoch": 8.314509122502171, "grad_norm": 0.1790028214454651, "learning_rate": 1.536526209093542e-05, "loss": 0.0002, "step": 38280 }, { "epoch": 8.316681146828845, "grad_norm": 0.002354201627895236, "learning_rate": 1.5356211989574285e-05, "loss": 0.0002, "step": 38290 }, { "epoch": 8.318853171155517, "grad_norm": 0.004823198076337576, "learning_rate": 1.534716188821315e-05, "loss": 0.0001, "step": 38300 }, { "epoch": 8.32102519548219, "grad_norm": 0.0006782126147300005, "learning_rate": 1.5338111786852013e-05, "loss": 0.0073, "step": 38310 }, { "epoch": 8.323197219808861, "grad_norm": 0.0016151006566360593, "learning_rate": 1.5329061685490878e-05, "loss": 0.0001, "step": 38320 }, { "epoch": 8.325369244135535, "grad_norm": 0.0009404784650541842, "learning_rate": 1.5320011584129742e-05, "loss": 0.0001, "step": 38330 }, { "epoch": 8.327541268462207, "grad_norm": 0.002452719956636429, "learning_rate": 1.5310961482768606e-05, "loss": 0.0121, "step": 38340 }, { "epoch": 8.32971329278888, "grad_norm": 0.0007282031583599746, "learning_rate": 1.530191138140747e-05, "loss": 0.0259, "step": 38350 }, { "epoch": 8.331885317115551, "grad_norm": 0.0006568600074388087, "learning_rate": 1.5292861280046335e-05, "loss": 0.0081, "step": 38360 }, { "epoch": 8.334057341442223, "grad_norm": 0.0006093371193856001, "learning_rate": 1.52838111786852e-05, "loss": 0.0391, "step": 38370 }, { "epoch": 8.336229365768897, "grad_norm": 0.0006100427708588541, "learning_rate": 1.5274761077324067e-05, "loss": 0.0001, "step": 38380 }, { "epoch": 8.33840139009557, "grad_norm": 0.0006650561117567122, "learning_rate": 1.526571097596293e-05, "loss": 0.0001, "step": 38390 }, { "epoch": 8.340573414422241, "grad_norm": 0.0009484770707786083, "learning_rate": 1.5256660874601798e-05, "loss": 0.0001, "step": 38400 }, { "epoch": 8.342745438748914, "grad_norm": 0.0006599361076951027, "learning_rate": 1.5247610773240662e-05, "loss": 0.0001, "step": 38410 }, { "epoch": 8.344917463075586, "grad_norm": 0.000644295010715723, "learning_rate": 1.5238560671879526e-05, "loss": 0.0021, "step": 38420 }, { "epoch": 8.34708948740226, "grad_norm": 0.0008500678814016283, "learning_rate": 1.522951057051839e-05, "loss": 0.0001, "step": 38430 }, { "epoch": 8.349261511728931, "grad_norm": 0.0006609293050132692, "learning_rate": 1.5220460469157255e-05, "loss": 0.0001, "step": 38440 }, { "epoch": 8.351433536055604, "grad_norm": 0.0006641658837907016, "learning_rate": 1.521141036779612e-05, "loss": 0.0001, "step": 38450 }, { "epoch": 8.353605560382276, "grad_norm": 0.000987962819635868, "learning_rate": 1.5202360266434984e-05, "loss": 0.0001, "step": 38460 }, { "epoch": 8.35577758470895, "grad_norm": 0.003739980747923255, "learning_rate": 1.5193310165073848e-05, "loss": 0.0001, "step": 38470 }, { "epoch": 8.357949609035622, "grad_norm": 0.0006407461478374898, "learning_rate": 1.5184260063712712e-05, "loss": 0.0001, "step": 38480 }, { "epoch": 8.360121633362294, "grad_norm": 0.0005970309721305966, "learning_rate": 1.517520996235158e-05, "loss": 0.0054, "step": 38490 }, { "epoch": 8.362293657688966, "grad_norm": 0.0006966181681491435, "learning_rate": 1.5166159860990444e-05, "loss": 0.0001, "step": 38500 }, { "epoch": 8.364465682015638, "grad_norm": 0.0006113156559877098, "learning_rate": 1.5157109759629309e-05, "loss": 0.0001, "step": 38510 }, { "epoch": 8.366637706342312, "grad_norm": 0.0006402576109394431, "learning_rate": 1.5148059658268173e-05, "loss": 0.0001, "step": 38520 }, { "epoch": 8.368809730668984, "grad_norm": 0.0006629484705626965, "learning_rate": 1.5139009556907039e-05, "loss": 0.0001, "step": 38530 }, { "epoch": 8.370981754995656, "grad_norm": 0.0009653762681409717, "learning_rate": 1.5129959455545903e-05, "loss": 0.0001, "step": 38540 }, { "epoch": 8.373153779322328, "grad_norm": 0.000561116321478039, "learning_rate": 1.5120909354184768e-05, "loss": 0.0001, "step": 38550 }, { "epoch": 8.375325803649002, "grad_norm": 0.0005761196371167898, "learning_rate": 1.5111859252823632e-05, "loss": 0.0197, "step": 38560 }, { "epoch": 8.377497827975674, "grad_norm": 0.0005579580320045352, "learning_rate": 1.5102809151462496e-05, "loss": 0.0001, "step": 38570 }, { "epoch": 8.379669852302346, "grad_norm": 0.0008516975794918835, "learning_rate": 1.509375905010136e-05, "loss": 0.0315, "step": 38580 }, { "epoch": 8.381841876629018, "grad_norm": 0.002303760964423418, "learning_rate": 1.5084708948740225e-05, "loss": 0.0001, "step": 38590 }, { "epoch": 8.38401390095569, "grad_norm": 0.000604830333031714, "learning_rate": 1.5075658847379093e-05, "loss": 0.0001, "step": 38600 }, { "epoch": 8.386185925282364, "grad_norm": 0.0026536278892308474, "learning_rate": 1.5066608746017957e-05, "loss": 0.0068, "step": 38610 }, { "epoch": 8.388357949609036, "grad_norm": 0.0028301740530878305, "learning_rate": 1.5057558644656821e-05, "loss": 0.0001, "step": 38620 }, { "epoch": 8.390529973935708, "grad_norm": 0.0008189657819457352, "learning_rate": 1.5048508543295686e-05, "loss": 0.0002, "step": 38630 }, { "epoch": 8.39270199826238, "grad_norm": 0.0007581166573800147, "learning_rate": 1.503945844193455e-05, "loss": 0.0088, "step": 38640 }, { "epoch": 8.394874022589054, "grad_norm": 0.000769811449572444, "learning_rate": 1.5030408340573415e-05, "loss": 0.0062, "step": 38650 }, { "epoch": 8.397046046915726, "grad_norm": 0.0007086714031174779, "learning_rate": 1.502135823921228e-05, "loss": 0.0001, "step": 38660 }, { "epoch": 8.399218071242398, "grad_norm": 0.0005801831721328199, "learning_rate": 1.5012308137851145e-05, "loss": 0.0001, "step": 38670 }, { "epoch": 8.40139009556907, "grad_norm": 0.0008001170353963971, "learning_rate": 1.500325803649001e-05, "loss": 0.0001, "step": 38680 }, { "epoch": 8.403562119895742, "grad_norm": 0.0005618112627416849, "learning_rate": 1.4994207935128874e-05, "loss": 0.0461, "step": 38690 }, { "epoch": 8.405734144222416, "grad_norm": 0.0006344440625980496, "learning_rate": 1.4985157833767738e-05, "loss": 0.0001, "step": 38700 }, { "epoch": 8.407906168549088, "grad_norm": 0.0007707632030360401, "learning_rate": 1.4976107732406602e-05, "loss": 0.0001, "step": 38710 }, { "epoch": 8.41007819287576, "grad_norm": 0.001076050684787333, "learning_rate": 1.496705763104547e-05, "loss": 0.0003, "step": 38720 }, { "epoch": 8.412250217202432, "grad_norm": 0.0006171344430185854, "learning_rate": 1.4958007529684334e-05, "loss": 0.0092, "step": 38730 }, { "epoch": 8.414422241529104, "grad_norm": 0.0005999759305268526, "learning_rate": 1.4948957428323199e-05, "loss": 0.0009, "step": 38740 }, { "epoch": 8.416594265855778, "grad_norm": 0.005478974897414446, "learning_rate": 1.4939907326962063e-05, "loss": 0.0002, "step": 38750 }, { "epoch": 8.41876629018245, "grad_norm": 0.0006091848481446505, "learning_rate": 1.4930857225600927e-05, "loss": 0.0001, "step": 38760 }, { "epoch": 8.420938314509122, "grad_norm": 0.0005783461383543909, "learning_rate": 1.4921807124239792e-05, "loss": 0.0001, "step": 38770 }, { "epoch": 8.423110338835794, "grad_norm": 0.0005965415039099753, "learning_rate": 1.4912757022878656e-05, "loss": 0.0002, "step": 38780 }, { "epoch": 8.425282363162468, "grad_norm": 0.0006767417071387172, "learning_rate": 1.4903706921517522e-05, "loss": 0.0075, "step": 38790 }, { "epoch": 8.42745438748914, "grad_norm": 0.0005308814579620957, "learning_rate": 1.4894656820156386e-05, "loss": 0.0001, "step": 38800 }, { "epoch": 8.429626411815812, "grad_norm": 0.0010998549405485392, "learning_rate": 1.488560671879525e-05, "loss": 0.0001, "step": 38810 }, { "epoch": 8.431798436142484, "grad_norm": 0.000694752496201545, "learning_rate": 1.4876556617434115e-05, "loss": 0.0002, "step": 38820 }, { "epoch": 8.433970460469157, "grad_norm": 0.0007519248174503446, "learning_rate": 1.4867506516072983e-05, "loss": 0.0001, "step": 38830 }, { "epoch": 8.43614248479583, "grad_norm": 0.0005111406790092587, "learning_rate": 1.4858456414711847e-05, "loss": 0.0001, "step": 38840 }, { "epoch": 8.438314509122502, "grad_norm": 0.0005285285878926516, "learning_rate": 1.4849406313350711e-05, "loss": 0.0001, "step": 38850 }, { "epoch": 8.440486533449175, "grad_norm": 0.0014295554719865322, "learning_rate": 1.4840356211989576e-05, "loss": 0.0325, "step": 38860 }, { "epoch": 8.442658557775847, "grad_norm": 0.04170700162649155, "learning_rate": 1.483130611062844e-05, "loss": 0.0001, "step": 38870 }, { "epoch": 8.444830582102519, "grad_norm": 0.0023596552200615406, "learning_rate": 1.4822256009267304e-05, "loss": 0.0001, "step": 38880 }, { "epoch": 8.447002606429193, "grad_norm": 0.0032503660768270493, "learning_rate": 1.4813205907906169e-05, "loss": 0.0001, "step": 38890 }, { "epoch": 8.449174630755865, "grad_norm": 0.0007728934870101511, "learning_rate": 1.4804155806545033e-05, "loss": 0.0001, "step": 38900 }, { "epoch": 8.451346655082537, "grad_norm": 0.3552645146846771, "learning_rate": 1.4795105705183897e-05, "loss": 0.0047, "step": 38910 }, { "epoch": 8.453518679409209, "grad_norm": 0.0006022463203407824, "learning_rate": 1.4786055603822762e-05, "loss": 0.0001, "step": 38920 }, { "epoch": 8.455690703735883, "grad_norm": 0.000813568476587534, "learning_rate": 1.4777005502461628e-05, "loss": 0.0001, "step": 38930 }, { "epoch": 8.457862728062555, "grad_norm": 0.0005289530963636935, "learning_rate": 1.4767955401100492e-05, "loss": 0.0001, "step": 38940 }, { "epoch": 8.460034752389227, "grad_norm": 0.0014709793031215668, "learning_rate": 1.4758905299739358e-05, "loss": 0.0054, "step": 38950 }, { "epoch": 8.462206776715899, "grad_norm": 0.0005715846200473607, "learning_rate": 1.4749855198378224e-05, "loss": 0.0045, "step": 38960 }, { "epoch": 8.464378801042571, "grad_norm": 0.0005603663739748299, "learning_rate": 1.4740805097017089e-05, "loss": 0.0001, "step": 38970 }, { "epoch": 8.466550825369245, "grad_norm": 0.0005445944261737168, "learning_rate": 1.4731754995655953e-05, "loss": 0.0001, "step": 38980 }, { "epoch": 8.468722849695917, "grad_norm": 0.1283123642206192, "learning_rate": 1.4722704894294817e-05, "loss": 0.0002, "step": 38990 }, { "epoch": 8.470894874022589, "grad_norm": 0.0005378610803745687, "learning_rate": 1.4713654792933682e-05, "loss": 0.0001, "step": 39000 }, { "epoch": 8.473066898349261, "grad_norm": 0.0007877520401962101, "learning_rate": 1.4704604691572546e-05, "loss": 0.0001, "step": 39010 }, { "epoch": 8.475238922675935, "grad_norm": 0.0006150746485218406, "learning_rate": 1.469555459021141e-05, "loss": 0.0001, "step": 39020 }, { "epoch": 8.477410947002607, "grad_norm": 0.0017679219599813223, "learning_rate": 1.4686504488850275e-05, "loss": 0.0059, "step": 39030 }, { "epoch": 8.479582971329279, "grad_norm": 0.018884725868701935, "learning_rate": 1.4677454387489139e-05, "loss": 0.0001, "step": 39040 }, { "epoch": 8.481754995655951, "grad_norm": 0.0010196593357250094, "learning_rate": 1.4668404286128003e-05, "loss": 0.0017, "step": 39050 }, { "epoch": 8.483927019982623, "grad_norm": 0.0005284012877382338, "learning_rate": 1.4659354184766871e-05, "loss": 0.0001, "step": 39060 }, { "epoch": 8.486099044309297, "grad_norm": 0.008003398776054382, "learning_rate": 1.4650304083405735e-05, "loss": 0.0001, "step": 39070 }, { "epoch": 8.488271068635969, "grad_norm": 0.00324247102253139, "learning_rate": 1.46412539820446e-05, "loss": 0.0001, "step": 39080 }, { "epoch": 8.490443092962641, "grad_norm": 0.0009854629170149565, "learning_rate": 1.4633108890819578e-05, "loss": 0.0232, "step": 39090 }, { "epoch": 8.492615117289313, "grad_norm": 0.0005126107134856284, "learning_rate": 1.4624058789458442e-05, "loss": 0.0001, "step": 39100 }, { "epoch": 8.494787141615985, "grad_norm": 0.0005185718182474375, "learning_rate": 1.4615008688097306e-05, "loss": 0.0001, "step": 39110 }, { "epoch": 8.496959165942659, "grad_norm": 0.0007868999964557588, "learning_rate": 1.460595858673617e-05, "loss": 0.0001, "step": 39120 }, { "epoch": 8.499131190269331, "grad_norm": 0.0005237551522441208, "learning_rate": 1.4596908485375038e-05, "loss": 0.0001, "step": 39130 }, { "epoch": 8.501303214596003, "grad_norm": 0.001091836835257709, "learning_rate": 1.4587858384013903e-05, "loss": 0.0001, "step": 39140 }, { "epoch": 8.503475238922675, "grad_norm": 0.000753571861423552, "learning_rate": 1.4578808282652767e-05, "loss": 0.0001, "step": 39150 }, { "epoch": 8.50564726324935, "grad_norm": 0.0010601403191685677, "learning_rate": 1.4569758181291631e-05, "loss": 0.0001, "step": 39160 }, { "epoch": 8.507819287576021, "grad_norm": 0.0004902264918200672, "learning_rate": 1.4560708079930496e-05, "loss": 0.0001, "step": 39170 }, { "epoch": 8.509991311902693, "grad_norm": 0.0004899317282252014, "learning_rate": 1.455165797856936e-05, "loss": 0.0001, "step": 39180 }, { "epoch": 8.512163336229365, "grad_norm": 0.00047692423686385155, "learning_rate": 1.4542607877208226e-05, "loss": 0.0053, "step": 39190 }, { "epoch": 8.514335360556037, "grad_norm": 0.006296246778219938, "learning_rate": 1.453355777584709e-05, "loss": 0.0072, "step": 39200 }, { "epoch": 8.516507384882711, "grad_norm": 0.0026722548063844442, "learning_rate": 1.4524507674485955e-05, "loss": 0.0001, "step": 39210 }, { "epoch": 8.518679409209383, "grad_norm": 0.0005234789568930864, "learning_rate": 1.4515457573124819e-05, "loss": 0.0001, "step": 39220 }, { "epoch": 8.520851433536055, "grad_norm": 0.0007127950084395707, "learning_rate": 1.4506407471763683e-05, "loss": 0.0018, "step": 39230 }, { "epoch": 8.523023457862728, "grad_norm": 0.0004754096153192222, "learning_rate": 1.4497357370402548e-05, "loss": 0.0001, "step": 39240 }, { "epoch": 8.5251954821894, "grad_norm": 0.0004704821913037449, "learning_rate": 1.4488307269041416e-05, "loss": 0.0001, "step": 39250 }, { "epoch": 8.527367506516073, "grad_norm": 0.0004992211470380425, "learning_rate": 1.447925716768028e-05, "loss": 0.0062, "step": 39260 }, { "epoch": 8.529539530842746, "grad_norm": 0.0004700243007391691, "learning_rate": 1.4470207066319144e-05, "loss": 0.006, "step": 39270 }, { "epoch": 8.531711555169418, "grad_norm": 0.0009023443562909961, "learning_rate": 1.4461156964958009e-05, "loss": 0.0001, "step": 39280 }, { "epoch": 8.53388357949609, "grad_norm": 0.00045638513984158635, "learning_rate": 1.4452106863596873e-05, "loss": 0.0088, "step": 39290 }, { "epoch": 8.536055603822764, "grad_norm": 0.0004613750206772238, "learning_rate": 1.4443056762235737e-05, "loss": 0.0, "step": 39300 }, { "epoch": 8.538227628149436, "grad_norm": 0.0005079872789792717, "learning_rate": 1.4434006660874602e-05, "loss": 0.0001, "step": 39310 }, { "epoch": 8.540399652476108, "grad_norm": 0.00045794787001796067, "learning_rate": 1.4424956559513466e-05, "loss": 0.0, "step": 39320 }, { "epoch": 8.54257167680278, "grad_norm": 0.0005387124256230891, "learning_rate": 1.4415906458152332e-05, "loss": 0.0001, "step": 39330 }, { "epoch": 8.544743701129452, "grad_norm": 0.0005204430781304836, "learning_rate": 1.4406856356791196e-05, "loss": 0.0001, "step": 39340 }, { "epoch": 8.546915725456126, "grad_norm": 0.0004614073259290308, "learning_rate": 1.439780625543006e-05, "loss": 0.0, "step": 39350 }, { "epoch": 8.549087749782798, "grad_norm": 0.00045747487456537783, "learning_rate": 1.4388756154068928e-05, "loss": 0.0001, "step": 39360 }, { "epoch": 8.55125977410947, "grad_norm": 0.00136624276638031, "learning_rate": 1.4379706052707793e-05, "loss": 0.0, "step": 39370 }, { "epoch": 8.553431798436142, "grad_norm": 0.00045062758726999164, "learning_rate": 1.4370655951346657e-05, "loss": 0.004, "step": 39380 }, { "epoch": 8.555603822762816, "grad_norm": 0.00045702551142312586, "learning_rate": 1.4361605849985521e-05, "loss": 0.0001, "step": 39390 }, { "epoch": 8.557775847089488, "grad_norm": 0.0004522506205830723, "learning_rate": 1.4352555748624386e-05, "loss": 0.0, "step": 39400 }, { "epoch": 8.55994787141616, "grad_norm": 0.0004662805295083672, "learning_rate": 1.434350564726325e-05, "loss": 0.0001, "step": 39410 }, { "epoch": 8.562119895742832, "grad_norm": 0.0004481975338421762, "learning_rate": 1.4334455545902114e-05, "loss": 0.0001, "step": 39420 }, { "epoch": 8.564291920069504, "grad_norm": 0.0004583086702041328, "learning_rate": 1.4325405444540979e-05, "loss": 0.0, "step": 39430 }, { "epoch": 8.566463944396178, "grad_norm": 0.0004621725529432297, "learning_rate": 1.4316355343179843e-05, "loss": 0.0, "step": 39440 }, { "epoch": 8.56863596872285, "grad_norm": 0.00045265990775078535, "learning_rate": 1.4307305241818707e-05, "loss": 0.0, "step": 39450 }, { "epoch": 8.570807993049522, "grad_norm": 0.0004928920534439385, "learning_rate": 1.4298255140457573e-05, "loss": 0.0001, "step": 39460 }, { "epoch": 8.572980017376194, "grad_norm": 0.15685199201107025, "learning_rate": 1.428920503909644e-05, "loss": 0.0036, "step": 39470 }, { "epoch": 8.575152041702868, "grad_norm": 0.0004423794453032315, "learning_rate": 1.4280154937735304e-05, "loss": 0.0001, "step": 39480 }, { "epoch": 8.57732406602954, "grad_norm": 0.00043715888750739396, "learning_rate": 1.4271104836374168e-05, "loss": 0.0, "step": 39490 }, { "epoch": 8.579496090356212, "grad_norm": 0.00045279006008058786, "learning_rate": 1.4262054735013034e-05, "loss": 0.0, "step": 39500 }, { "epoch": 8.581668114682884, "grad_norm": 0.00045085386955179274, "learning_rate": 1.4253004633651899e-05, "loss": 0.0001, "step": 39510 }, { "epoch": 8.583840139009556, "grad_norm": 0.0004693444352596998, "learning_rate": 1.4243954532290763e-05, "loss": 0.0001, "step": 39520 }, { "epoch": 8.58601216333623, "grad_norm": 0.0004297494888305664, "learning_rate": 1.4234904430929627e-05, "loss": 0.0, "step": 39530 }, { "epoch": 8.588184187662902, "grad_norm": 0.0005316234892234206, "learning_rate": 1.4225854329568492e-05, "loss": 0.0, "step": 39540 }, { "epoch": 8.590356211989574, "grad_norm": 0.0004413987626321614, "learning_rate": 1.4216804228207356e-05, "loss": 0.0001, "step": 39550 }, { "epoch": 8.592528236316246, "grad_norm": 0.0004459246410988271, "learning_rate": 1.420775412684622e-05, "loss": 0.0058, "step": 39560 }, { "epoch": 8.59470026064292, "grad_norm": 0.0004368575755506754, "learning_rate": 1.4198704025485085e-05, "loss": 0.0001, "step": 39570 }, { "epoch": 8.596872284969592, "grad_norm": 0.0004350426606833935, "learning_rate": 1.4189653924123949e-05, "loss": 0.0001, "step": 39580 }, { "epoch": 8.599044309296264, "grad_norm": 0.0005037335213273764, "learning_rate": 1.4180603822762817e-05, "loss": 0.0001, "step": 39590 }, { "epoch": 8.601216333622936, "grad_norm": 0.04642053693532944, "learning_rate": 1.4171553721401681e-05, "loss": 0.0001, "step": 39600 }, { "epoch": 8.603388357949608, "grad_norm": 0.00044416176388040185, "learning_rate": 1.4162503620040545e-05, "loss": 0.0, "step": 39610 }, { "epoch": 8.605560382276282, "grad_norm": 0.0013276516692712903, "learning_rate": 1.415345351867941e-05, "loss": 0.0001, "step": 39620 }, { "epoch": 8.607732406602954, "grad_norm": 0.0006277307402342558, "learning_rate": 1.4144403417318276e-05, "loss": 0.0, "step": 39630 }, { "epoch": 8.609904430929626, "grad_norm": 0.0004260788264218718, "learning_rate": 1.413535331595714e-05, "loss": 0.0, "step": 39640 }, { "epoch": 8.612076455256299, "grad_norm": 0.0004270559875294566, "learning_rate": 1.4126303214596004e-05, "loss": 0.0, "step": 39650 }, { "epoch": 8.61424847958297, "grad_norm": 0.0004411809495650232, "learning_rate": 1.4117253113234869e-05, "loss": 0.0, "step": 39660 }, { "epoch": 8.616420503909644, "grad_norm": 0.00043373851804062724, "learning_rate": 1.4108203011873733e-05, "loss": 0.0, "step": 39670 }, { "epoch": 8.618592528236316, "grad_norm": 0.0004543520917650312, "learning_rate": 1.4099152910512597e-05, "loss": 0.0001, "step": 39680 }, { "epoch": 8.620764552562989, "grad_norm": 0.00047740931040607393, "learning_rate": 1.4090102809151462e-05, "loss": 0.0035, "step": 39690 }, { "epoch": 8.62293657688966, "grad_norm": 0.0004208452010061592, "learning_rate": 1.408105270779033e-05, "loss": 0.0, "step": 39700 }, { "epoch": 8.625108601216333, "grad_norm": 0.0012495043920353055, "learning_rate": 1.4072002606429194e-05, "loss": 0.0001, "step": 39710 }, { "epoch": 8.627280625543007, "grad_norm": 0.00042732080328278244, "learning_rate": 1.4062952505068058e-05, "loss": 0.0, "step": 39720 }, { "epoch": 8.629452649869679, "grad_norm": 0.00045733177103102207, "learning_rate": 1.4053902403706922e-05, "loss": 0.0001, "step": 39730 }, { "epoch": 8.63162467419635, "grad_norm": 0.00040695929783396423, "learning_rate": 1.4044852302345787e-05, "loss": 0.001, "step": 39740 }, { "epoch": 8.633796698523023, "grad_norm": 0.0004219406400807202, "learning_rate": 1.4035802200984651e-05, "loss": 0.0002, "step": 39750 }, { "epoch": 8.635968722849697, "grad_norm": 0.0004689696943387389, "learning_rate": 1.4026752099623517e-05, "loss": 0.0001, "step": 39760 }, { "epoch": 8.638140747176369, "grad_norm": 0.0004632935451809317, "learning_rate": 1.4017701998262381e-05, "loss": 0.0001, "step": 39770 }, { "epoch": 8.64031277150304, "grad_norm": 0.01873939484357834, "learning_rate": 1.4008651896901246e-05, "loss": 0.0001, "step": 39780 }, { "epoch": 8.642484795829713, "grad_norm": 0.00048569723730906844, "learning_rate": 1.399960179554011e-05, "loss": 0.0057, "step": 39790 }, { "epoch": 8.644656820156385, "grad_norm": 0.0009552659466862679, "learning_rate": 1.3990551694178974e-05, "loss": 0.0011, "step": 39800 }, { "epoch": 8.646828844483059, "grad_norm": 34.08624267578125, "learning_rate": 1.3981501592817839e-05, "loss": 0.0244, "step": 39810 }, { "epoch": 8.64900086880973, "grad_norm": 0.000415660731960088, "learning_rate": 1.3972451491456707e-05, "loss": 0.0, "step": 39820 }, { "epoch": 8.651172893136403, "grad_norm": 0.0006440936122089624, "learning_rate": 1.3963401390095571e-05, "loss": 0.0001, "step": 39830 }, { "epoch": 8.653344917463075, "grad_norm": 0.0013137052301317453, "learning_rate": 1.3954351288734435e-05, "loss": 0.0064, "step": 39840 }, { "epoch": 8.655516941789749, "grad_norm": 0.0004171818436589092, "learning_rate": 1.39453011873733e-05, "loss": 0.0001, "step": 39850 }, { "epoch": 8.657688966116421, "grad_norm": 0.00041515124030411243, "learning_rate": 1.3936251086012164e-05, "loss": 0.0, "step": 39860 }, { "epoch": 8.659860990443093, "grad_norm": 0.00483085447922349, "learning_rate": 1.3927200984651028e-05, "loss": 0.0001, "step": 39870 }, { "epoch": 8.662033014769765, "grad_norm": 0.00041427038377150893, "learning_rate": 1.3918150883289893e-05, "loss": 0.0, "step": 39880 }, { "epoch": 8.664205039096437, "grad_norm": 0.0004123119288124144, "learning_rate": 1.3909100781928757e-05, "loss": 0.0, "step": 39890 }, { "epoch": 8.666377063423111, "grad_norm": 0.0004082611412741244, "learning_rate": 1.3900050680567623e-05, "loss": 0.0042, "step": 39900 }, { "epoch": 8.668549087749783, "grad_norm": 0.00040656846249476075, "learning_rate": 1.3891000579206487e-05, "loss": 0.0, "step": 39910 }, { "epoch": 8.670721112076455, "grad_norm": 0.00041427111136727035, "learning_rate": 1.3881950477845352e-05, "loss": 0.0, "step": 39920 }, { "epoch": 8.672893136403127, "grad_norm": 0.0007840920588932931, "learning_rate": 1.387290037648422e-05, "loss": 0.0611, "step": 39930 }, { "epoch": 8.675065160729801, "grad_norm": 0.0017191257793456316, "learning_rate": 1.3863850275123084e-05, "loss": 0.0057, "step": 39940 }, { "epoch": 8.677237185056473, "grad_norm": 0.0021400810219347477, "learning_rate": 1.3854800173761948e-05, "loss": 0.0064, "step": 39950 }, { "epoch": 8.679409209383145, "grad_norm": 0.002642604988068342, "learning_rate": 1.3845750072400812e-05, "loss": 0.0002, "step": 39960 }, { "epoch": 8.681581233709817, "grad_norm": 0.0015774896601215005, "learning_rate": 1.3836699971039677e-05, "loss": 0.0002, "step": 39970 }, { "epoch": 8.68375325803649, "grad_norm": 0.0011439747177064419, "learning_rate": 1.3827649869678541e-05, "loss": 0.0001, "step": 39980 }, { "epoch": 8.685925282363163, "grad_norm": 0.0010642379056662321, "learning_rate": 1.3818599768317405e-05, "loss": 0.0053, "step": 39990 }, { "epoch": 8.688097306689835, "grad_norm": 0.0014020922826603055, "learning_rate": 1.380954966695627e-05, "loss": 0.0043, "step": 40000 }, { "epoch": 8.690269331016507, "grad_norm": 0.001087503507733345, "learning_rate": 1.3800499565595134e-05, "loss": 0.0001, "step": 40010 }, { "epoch": 8.69244135534318, "grad_norm": 0.0009494571713730693, "learning_rate": 1.3791449464233998e-05, "loss": 0.0001, "step": 40020 }, { "epoch": 8.694613379669851, "grad_norm": 0.0008690246613696218, "learning_rate": 1.3782399362872864e-05, "loss": 0.0001, "step": 40030 }, { "epoch": 8.696785403996525, "grad_norm": 0.0007888951804488897, "learning_rate": 1.3773349261511729e-05, "loss": 0.0001, "step": 40040 }, { "epoch": 8.698957428323197, "grad_norm": 0.0010448385728523135, "learning_rate": 1.3764299160150595e-05, "loss": 0.0001, "step": 40050 }, { "epoch": 8.70112945264987, "grad_norm": 0.0006909267976880074, "learning_rate": 1.375524905878946e-05, "loss": 0.0001, "step": 40060 }, { "epoch": 8.703301476976542, "grad_norm": 0.000706880702637136, "learning_rate": 1.3746198957428325e-05, "loss": 0.0052, "step": 40070 }, { "epoch": 8.705473501303215, "grad_norm": 0.0007796403951942921, "learning_rate": 1.373714885606719e-05, "loss": 0.0001, "step": 40080 }, { "epoch": 8.707645525629887, "grad_norm": 0.0005805023247376084, "learning_rate": 1.3728098754706054e-05, "loss": 0.0054, "step": 40090 }, { "epoch": 8.70981754995656, "grad_norm": 0.0006314498605206609, "learning_rate": 1.3719048653344918e-05, "loss": 0.0001, "step": 40100 }, { "epoch": 8.711989574283232, "grad_norm": 0.0006966400542296469, "learning_rate": 1.3709998551983783e-05, "loss": 0.0001, "step": 40110 }, { "epoch": 8.714161598609904, "grad_norm": 0.0006841762224212289, "learning_rate": 1.3700948450622647e-05, "loss": 0.0054, "step": 40120 }, { "epoch": 8.716333622936578, "grad_norm": 0.0008951672934927046, "learning_rate": 1.3691898349261511e-05, "loss": 0.0001, "step": 40130 }, { "epoch": 8.71850564726325, "grad_norm": 0.0005166791379451752, "learning_rate": 1.3682848247900376e-05, "loss": 0.0001, "step": 40140 }, { "epoch": 8.720677671589922, "grad_norm": 0.0013270946219563484, "learning_rate": 1.367379814653924e-05, "loss": 0.0001, "step": 40150 }, { "epoch": 8.722849695916594, "grad_norm": 0.0004784400516655296, "learning_rate": 1.3664748045178108e-05, "loss": 0.0001, "step": 40160 }, { "epoch": 8.725021720243266, "grad_norm": 0.0004924449021928012, "learning_rate": 1.3655697943816972e-05, "loss": 0.0001, "step": 40170 }, { "epoch": 8.72719374456994, "grad_norm": 0.0005478410166688263, "learning_rate": 1.3646647842455836e-05, "loss": 0.0001, "step": 40180 }, { "epoch": 8.729365768896612, "grad_norm": 0.00045741075882688165, "learning_rate": 1.3638502751230814e-05, "loss": 0.008, "step": 40190 }, { "epoch": 8.731537793223284, "grad_norm": 0.00053097412455827, "learning_rate": 1.3629452649869679e-05, "loss": 0.0001, "step": 40200 }, { "epoch": 8.733709817549956, "grad_norm": 0.00047605347936041653, "learning_rate": 1.3620402548508543e-05, "loss": 0.0049, "step": 40210 }, { "epoch": 8.73588184187663, "grad_norm": 0.00046191230649128556, "learning_rate": 1.3611352447147407e-05, "loss": 0.0001, "step": 40220 }, { "epoch": 8.738053866203302, "grad_norm": 0.0006621324573643506, "learning_rate": 1.3602302345786275e-05, "loss": 0.0042, "step": 40230 }, { "epoch": 8.740225890529974, "grad_norm": 0.0006459571304731071, "learning_rate": 1.359325224442514e-05, "loss": 0.0001, "step": 40240 }, { "epoch": 8.742397914856646, "grad_norm": 0.0004514079191721976, "learning_rate": 1.3584202143064004e-05, "loss": 0.0001, "step": 40250 }, { "epoch": 8.744569939183318, "grad_norm": 0.0008339316700585186, "learning_rate": 1.3575152041702868e-05, "loss": 0.0001, "step": 40260 }, { "epoch": 8.746741963509992, "grad_norm": 0.0004699197015725076, "learning_rate": 1.3567006950477846e-05, "loss": 0.0176, "step": 40270 }, { "epoch": 8.748913987836664, "grad_norm": 0.0005045742727816105, "learning_rate": 1.355795684911671e-05, "loss": 0.0, "step": 40280 }, { "epoch": 8.751086012163336, "grad_norm": 0.0014669718220829964, "learning_rate": 1.3548906747755575e-05, "loss": 0.0001, "step": 40290 }, { "epoch": 8.753258036490008, "grad_norm": 0.0008972100913524628, "learning_rate": 1.3539856646394442e-05, "loss": 0.0346, "step": 40300 }, { "epoch": 8.755430060816682, "grad_norm": 0.1194867491722107, "learning_rate": 1.3530806545033307e-05, "loss": 0.0004, "step": 40310 }, { "epoch": 8.757602085143354, "grad_norm": 0.00045046405284665525, "learning_rate": 1.3521756443672171e-05, "loss": 0.0023, "step": 40320 }, { "epoch": 8.759774109470026, "grad_norm": 0.005810149013996124, "learning_rate": 1.3512706342311035e-05, "loss": 0.0001, "step": 40330 }, { "epoch": 8.761946133796698, "grad_norm": 0.0006038735737092793, "learning_rate": 1.35036562409499e-05, "loss": 0.0, "step": 40340 }, { "epoch": 8.76411815812337, "grad_norm": 0.001365828444249928, "learning_rate": 1.3494606139588764e-05, "loss": 0.0001, "step": 40350 }, { "epoch": 8.766290182450044, "grad_norm": 0.0006109884125180542, "learning_rate": 1.3485556038227628e-05, "loss": 0.0001, "step": 40360 }, { "epoch": 8.768462206776716, "grad_norm": 0.00043821826693601906, "learning_rate": 1.3476505936866493e-05, "loss": 0.0002, "step": 40370 }, { "epoch": 8.770634231103388, "grad_norm": 0.038904059678316116, "learning_rate": 1.3467455835505357e-05, "loss": 0.0002, "step": 40380 }, { "epoch": 8.77280625543006, "grad_norm": 0.00041763324406929314, "learning_rate": 1.3458405734144221e-05, "loss": 0.0049, "step": 40390 }, { "epoch": 8.774978279756734, "grad_norm": 0.0004849474353250116, "learning_rate": 1.3449355632783087e-05, "loss": 0.0001, "step": 40400 }, { "epoch": 8.777150304083406, "grad_norm": 0.0006203539669513702, "learning_rate": 1.3440305531421953e-05, "loss": 0.0001, "step": 40410 }, { "epoch": 8.779322328410078, "grad_norm": 0.0029206108301877975, "learning_rate": 1.3431255430060818e-05, "loss": 0.0001, "step": 40420 }, { "epoch": 8.78149435273675, "grad_norm": 0.00042043565190397203, "learning_rate": 1.3422205328699684e-05, "loss": 0.0042, "step": 40430 }, { "epoch": 8.783666377063422, "grad_norm": 0.00043287023436278105, "learning_rate": 1.3413155227338548e-05, "loss": 0.0001, "step": 40440 }, { "epoch": 8.785838401390096, "grad_norm": 0.00042107931221835315, "learning_rate": 1.3404105125977412e-05, "loss": 0.0001, "step": 40450 }, { "epoch": 8.788010425716768, "grad_norm": 0.00041855682502500713, "learning_rate": 1.3395055024616277e-05, "loss": 0.0001, "step": 40460 }, { "epoch": 8.79018245004344, "grad_norm": 0.0005362842348404229, "learning_rate": 1.3386004923255141e-05, "loss": 0.0, "step": 40470 }, { "epoch": 8.792354474370113, "grad_norm": 0.00041211023926734924, "learning_rate": 1.3376954821894005e-05, "loss": 0.0001, "step": 40480 }, { "epoch": 8.794526498696785, "grad_norm": 0.00043726610601879656, "learning_rate": 1.336790472053287e-05, "loss": 0.0, "step": 40490 }, { "epoch": 8.796698523023458, "grad_norm": 0.0005625045741908252, "learning_rate": 1.3358854619171734e-05, "loss": 0.0001, "step": 40500 }, { "epoch": 8.79887054735013, "grad_norm": 0.0004177498340141028, "learning_rate": 1.3349804517810599e-05, "loss": 0.0001, "step": 40510 }, { "epoch": 8.801042571676803, "grad_norm": 0.0007373658008873463, "learning_rate": 1.3340754416449463e-05, "loss": 0.0036, "step": 40520 }, { "epoch": 8.803214596003475, "grad_norm": 0.00042282009962946177, "learning_rate": 1.333170431508833e-05, "loss": 0.0001, "step": 40530 }, { "epoch": 8.805386620330149, "grad_norm": 0.00047931907465681434, "learning_rate": 1.3322654213727195e-05, "loss": 0.0, "step": 40540 }, { "epoch": 8.80755864465682, "grad_norm": 0.0020538109820336103, "learning_rate": 1.331360411236606e-05, "loss": 0.0001, "step": 40550 }, { "epoch": 8.809730668983493, "grad_norm": 0.00045984104508534074, "learning_rate": 1.3304554011004924e-05, "loss": 0.0001, "step": 40560 }, { "epoch": 8.811902693310165, "grad_norm": 0.00040731808985583484, "learning_rate": 1.329550390964379e-05, "loss": 0.0303, "step": 40570 }, { "epoch": 8.814074717636837, "grad_norm": 0.0009267780114896595, "learning_rate": 1.3286453808282654e-05, "loss": 0.0001, "step": 40580 }, { "epoch": 8.81624674196351, "grad_norm": 0.0004015139420516789, "learning_rate": 1.3277403706921518e-05, "loss": 0.0049, "step": 40590 }, { "epoch": 8.818418766290183, "grad_norm": 0.044160980731248856, "learning_rate": 1.3268353605560383e-05, "loss": 0.006, "step": 40600 }, { "epoch": 8.820590790616855, "grad_norm": 0.00041005350067280233, "learning_rate": 1.3259303504199247e-05, "loss": 0.0036, "step": 40610 }, { "epoch": 8.822762814943527, "grad_norm": 0.00040421911398880184, "learning_rate": 1.3250253402838111e-05, "loss": 0.0, "step": 40620 }, { "epoch": 8.824934839270199, "grad_norm": 0.00039571928209625185, "learning_rate": 1.3241203301476976e-05, "loss": 0.0, "step": 40630 }, { "epoch": 8.827106863596873, "grad_norm": 0.00040001931483857334, "learning_rate": 1.3232153200115843e-05, "loss": 0.0046, "step": 40640 }, { "epoch": 8.829278887923545, "grad_norm": 0.0003898143768310547, "learning_rate": 1.3223103098754708e-05, "loss": 0.0, "step": 40650 }, { "epoch": 8.831450912250217, "grad_norm": 0.00039375320193357766, "learning_rate": 1.3214052997393572e-05, "loss": 0.0001, "step": 40660 }, { "epoch": 8.833622936576889, "grad_norm": 0.0003916964342352003, "learning_rate": 1.3205002896032436e-05, "loss": 0.0, "step": 40670 }, { "epoch": 8.835794960903563, "grad_norm": 0.00039076615939848125, "learning_rate": 1.31959527946713e-05, "loss": 0.0, "step": 40680 }, { "epoch": 8.837966985230235, "grad_norm": 0.00039495810051448643, "learning_rate": 1.3186902693310165e-05, "loss": 0.0002, "step": 40690 }, { "epoch": 8.840139009556907, "grad_norm": 0.0003964413481298834, "learning_rate": 1.3177852591949031e-05, "loss": 0.0, "step": 40700 }, { "epoch": 8.842311033883579, "grad_norm": 0.00039823653060011566, "learning_rate": 1.3168802490587895e-05, "loss": 0.0, "step": 40710 }, { "epoch": 8.844483058210251, "grad_norm": 0.00038708062493242323, "learning_rate": 1.315975238922676e-05, "loss": 0.0062, "step": 40720 }, { "epoch": 8.846655082536925, "grad_norm": 0.00040895427810028195, "learning_rate": 1.3150702287865624e-05, "loss": 0.0, "step": 40730 }, { "epoch": 8.848827106863597, "grad_norm": 0.00038919615326449275, "learning_rate": 1.3141652186504488e-05, "loss": 0.0, "step": 40740 }, { "epoch": 8.85099913119027, "grad_norm": 0.0006516836583614349, "learning_rate": 1.3132602085143353e-05, "loss": 0.0, "step": 40750 }, { "epoch": 8.853171155516941, "grad_norm": 0.00040941167389974, "learning_rate": 1.312355198378222e-05, "loss": 0.0036, "step": 40760 }, { "epoch": 8.855343179843615, "grad_norm": 0.0003876253613270819, "learning_rate": 1.3114501882421085e-05, "loss": 0.0, "step": 40770 }, { "epoch": 8.857515204170287, "grad_norm": 0.0003910251543857157, "learning_rate": 1.310545178105995e-05, "loss": 0.0, "step": 40780 }, { "epoch": 8.85968722849696, "grad_norm": 0.0004716122057288885, "learning_rate": 1.3096401679698814e-05, "loss": 0.0, "step": 40790 }, { "epoch": 8.861859252823631, "grad_norm": 0.00038500205846503377, "learning_rate": 1.3087351578337678e-05, "loss": 0.0001, "step": 40800 }, { "epoch": 8.864031277150303, "grad_norm": 0.0003877005656249821, "learning_rate": 1.3078301476976542e-05, "loss": 0.0052, "step": 40810 }, { "epoch": 8.866203301476977, "grad_norm": 0.00038981385296210647, "learning_rate": 1.3069251375615407e-05, "loss": 0.0053, "step": 40820 }, { "epoch": 8.86837532580365, "grad_norm": 0.00039689007098786533, "learning_rate": 1.3060201274254273e-05, "loss": 0.0001, "step": 40830 }, { "epoch": 8.870547350130321, "grad_norm": 0.00038903128006495535, "learning_rate": 1.3051151172893137e-05, "loss": 0.0004, "step": 40840 }, { "epoch": 8.872719374456993, "grad_norm": 0.0003967289230786264, "learning_rate": 1.3042101071532001e-05, "loss": 0.0, "step": 40850 }, { "epoch": 8.874891398783667, "grad_norm": 0.00039026138256303966, "learning_rate": 1.3033050970170866e-05, "loss": 0.0, "step": 40860 }, { "epoch": 8.87706342311034, "grad_norm": 0.00043888072832487524, "learning_rate": 1.3024000868809733e-05, "loss": 0.0018, "step": 40870 }, { "epoch": 8.879235447437011, "grad_norm": 0.0013480924535542727, "learning_rate": 1.3014950767448598e-05, "loss": 0.0089, "step": 40880 }, { "epoch": 8.881407471763684, "grad_norm": 0.00042966246837750077, "learning_rate": 1.3005900666087462e-05, "loss": 0.0001, "step": 40890 }, { "epoch": 8.883579496090356, "grad_norm": 0.0003938440349884331, "learning_rate": 1.2996850564726326e-05, "loss": 0.0001, "step": 40900 }, { "epoch": 8.88575152041703, "grad_norm": 0.0004265749885234982, "learning_rate": 1.298780046336519e-05, "loss": 0.0024, "step": 40910 }, { "epoch": 8.887923544743701, "grad_norm": 0.00041098997462540865, "learning_rate": 1.2978750362004055e-05, "loss": 0.0001, "step": 40920 }, { "epoch": 8.890095569070374, "grad_norm": 0.0007312253001146019, "learning_rate": 1.296970026064292e-05, "loss": 0.0, "step": 40930 }, { "epoch": 8.892267593397046, "grad_norm": 0.0003898316062986851, "learning_rate": 1.2960650159281784e-05, "loss": 0.0, "step": 40940 }, { "epoch": 8.894439617723718, "grad_norm": 0.0003812254872173071, "learning_rate": 1.2951600057920648e-05, "loss": 0.0001, "step": 40950 }, { "epoch": 8.896611642050392, "grad_norm": 0.0003883030731230974, "learning_rate": 1.2942549956559512e-05, "loss": 0.0, "step": 40960 }, { "epoch": 8.898783666377064, "grad_norm": 0.000394894159398973, "learning_rate": 1.2933499855198378e-05, "loss": 0.0, "step": 40970 }, { "epoch": 8.900955690703736, "grad_norm": 0.0003820984566118568, "learning_rate": 1.2924449753837243e-05, "loss": 0.0355, "step": 40980 }, { "epoch": 8.903127715030408, "grad_norm": 0.0004881360218860209, "learning_rate": 1.2915399652476109e-05, "loss": 0.0001, "step": 40990 }, { "epoch": 8.90529973935708, "grad_norm": 0.00045840549864806235, "learning_rate": 1.2906349551114975e-05, "loss": 0.0002, "step": 41000 }, { "epoch": 8.907471763683754, "grad_norm": 0.0004596387152560055, "learning_rate": 1.289729944975384e-05, "loss": 0.0, "step": 41010 }, { "epoch": 8.909643788010426, "grad_norm": 0.00038116611540317535, "learning_rate": 1.2888249348392704e-05, "loss": 0.0047, "step": 41020 }, { "epoch": 8.911815812337098, "grad_norm": 0.0007415604195557535, "learning_rate": 1.2879199247031568e-05, "loss": 0.0057, "step": 41030 }, { "epoch": 8.91398783666377, "grad_norm": 0.0012384551810100675, "learning_rate": 1.2870149145670432e-05, "loss": 0.0, "step": 41040 }, { "epoch": 8.916159860990444, "grad_norm": 0.00042603438487276435, "learning_rate": 1.2861099044309297e-05, "loss": 0.0, "step": 41050 }, { "epoch": 8.918331885317116, "grad_norm": 0.00038078860961832106, "learning_rate": 1.2852048942948161e-05, "loss": 0.0001, "step": 41060 }, { "epoch": 8.920503909643788, "grad_norm": 0.0003853098605759442, "learning_rate": 1.2842998841587025e-05, "loss": 0.0, "step": 41070 }, { "epoch": 8.92267593397046, "grad_norm": 0.000514326267875731, "learning_rate": 1.283394874022589e-05, "loss": 0.0, "step": 41080 }, { "epoch": 8.924847958297132, "grad_norm": 0.00038332334952428937, "learning_rate": 1.2824898638864754e-05, "loss": 0.0046, "step": 41090 }, { "epoch": 8.927019982623806, "grad_norm": 0.0004047720576636493, "learning_rate": 1.2815848537503622e-05, "loss": 0.0, "step": 41100 }, { "epoch": 8.929192006950478, "grad_norm": 0.0003732458280865103, "learning_rate": 1.2806798436142486e-05, "loss": 0.0048, "step": 41110 }, { "epoch": 8.93136403127715, "grad_norm": 0.0007458085892722011, "learning_rate": 1.279774833478135e-05, "loss": 0.0, "step": 41120 }, { "epoch": 8.933536055603822, "grad_norm": 0.0003851301735267043, "learning_rate": 1.2788698233420215e-05, "loss": 0.0, "step": 41130 }, { "epoch": 8.935708079930496, "grad_norm": 0.0003756080404855311, "learning_rate": 1.277964813205908e-05, "loss": 0.0, "step": 41140 }, { "epoch": 8.937880104257168, "grad_norm": 0.0003725398564711213, "learning_rate": 1.2770598030697945e-05, "loss": 0.0, "step": 41150 }, { "epoch": 8.94005212858384, "grad_norm": 0.00038527336437255144, "learning_rate": 1.276154792933681e-05, "loss": 0.0001, "step": 41160 }, { "epoch": 8.942224152910512, "grad_norm": 0.006219358649104834, "learning_rate": 1.2752497827975674e-05, "loss": 0.0001, "step": 41170 }, { "epoch": 8.944396177237184, "grad_norm": 0.00037608726415783167, "learning_rate": 1.2743447726614538e-05, "loss": 0.0001, "step": 41180 }, { "epoch": 8.946568201563858, "grad_norm": 0.0003803640138357878, "learning_rate": 1.2734397625253402e-05, "loss": 0.0, "step": 41190 }, { "epoch": 8.94874022589053, "grad_norm": 0.0003750100440811366, "learning_rate": 1.2725347523892267e-05, "loss": 0.0, "step": 41200 }, { "epoch": 8.950912250217202, "grad_norm": 0.0003808810724876821, "learning_rate": 1.2716297422531134e-05, "loss": 0.0, "step": 41210 }, { "epoch": 8.953084274543874, "grad_norm": 0.0003753194469027221, "learning_rate": 1.2707247321169999e-05, "loss": 0.0, "step": 41220 }, { "epoch": 8.955256298870548, "grad_norm": 0.0005497315432876348, "learning_rate": 1.2698197219808863e-05, "loss": 0.0, "step": 41230 }, { "epoch": 8.95742832319722, "grad_norm": 0.0004073216987308115, "learning_rate": 1.2689147118447727e-05, "loss": 0.0042, "step": 41240 }, { "epoch": 8.959600347523892, "grad_norm": 0.005487049464136362, "learning_rate": 1.2680097017086592e-05, "loss": 0.0001, "step": 41250 }, { "epoch": 8.961772371850564, "grad_norm": 0.0003656859917100519, "learning_rate": 1.2671046915725456e-05, "loss": 0.0, "step": 41260 }, { "epoch": 8.963944396177236, "grad_norm": 0.00038666161708533764, "learning_rate": 1.2661996814364322e-05, "loss": 0.0, "step": 41270 }, { "epoch": 8.96611642050391, "grad_norm": 0.00037817287375219166, "learning_rate": 1.2652946713003186e-05, "loss": 0.027, "step": 41280 }, { "epoch": 8.968288444830582, "grad_norm": 0.0003763465501833707, "learning_rate": 1.264389661164205e-05, "loss": 0.0001, "step": 41290 }, { "epoch": 8.970460469157254, "grad_norm": 0.0003695207415148616, "learning_rate": 1.2634846510280915e-05, "loss": 0.0001, "step": 41300 }, { "epoch": 8.972632493483927, "grad_norm": 0.0009629792766645551, "learning_rate": 1.262579640891978e-05, "loss": 0.0, "step": 41310 }, { "epoch": 8.9748045178106, "grad_norm": 0.0003619254275690764, "learning_rate": 1.2616746307558644e-05, "loss": 0.0, "step": 41320 }, { "epoch": 8.976976542137272, "grad_norm": 0.00037999844062142074, "learning_rate": 1.2607696206197512e-05, "loss": 0.0365, "step": 41330 }, { "epoch": 8.979148566463945, "grad_norm": 0.0004289183998480439, "learning_rate": 1.2598646104836376e-05, "loss": 0.0001, "step": 41340 }, { "epoch": 8.981320590790617, "grad_norm": 0.004200364463031292, "learning_rate": 1.258959600347524e-05, "loss": 0.0001, "step": 41350 }, { "epoch": 8.983492615117289, "grad_norm": 0.0006380841950885952, "learning_rate": 1.2580545902114105e-05, "loss": 0.0002, "step": 41360 }, { "epoch": 8.985664639443963, "grad_norm": 3.8622334003448486, "learning_rate": 1.2571495800752969e-05, "loss": 0.0493, "step": 41370 }, { "epoch": 8.987836663770635, "grad_norm": 0.0026256830897182226, "learning_rate": 1.2562445699391833e-05, "loss": 0.0002, "step": 41380 }, { "epoch": 8.990008688097307, "grad_norm": 0.00362397450953722, "learning_rate": 1.2553395598030698e-05, "loss": 0.0006, "step": 41390 }, { "epoch": 8.992180712423979, "grad_norm": 0.0019957926124334335, "learning_rate": 1.2544345496669564e-05, "loss": 0.0002, "step": 41400 }, { "epoch": 8.99435273675065, "grad_norm": 0.007589931599795818, "learning_rate": 1.2535295395308428e-05, "loss": 0.0002, "step": 41410 }, { "epoch": 8.996524761077325, "grad_norm": 0.002436482347548008, "learning_rate": 1.2526245293947292e-05, "loss": 0.0002, "step": 41420 }, { "epoch": 8.998696785403997, "grad_norm": 0.0034616016782820225, "learning_rate": 1.2517195192586157e-05, "loss": 0.0006, "step": 41430 }, { "epoch": 9.0, "eval_f1": 0.5344129554655871, "eval_loss": 0.07725337892770767, "eval_runtime": 83.1311, "eval_samples_per_second": 119.991, "eval_steps_per_second": 7.506, "step": 41436 }, { "epoch": 9.000868809730669, "grad_norm": 0.0007821933249942958, "learning_rate": 1.2508145091225024e-05, "loss": 0.0001, "step": 41440 }, { "epoch": 9.003040834057341, "grad_norm": 0.0006520500173792243, "learning_rate": 1.2499094989863887e-05, "loss": 0.0009, "step": 41450 }, { "epoch": 9.005212858384015, "grad_norm": 0.17783880233764648, "learning_rate": 1.2490044888502751e-05, "loss": 0.0102, "step": 41460 }, { "epoch": 9.007384882710687, "grad_norm": 0.0009389424230903387, "learning_rate": 1.2480994787141617e-05, "loss": 0.0001, "step": 41470 }, { "epoch": 9.009556907037359, "grad_norm": 0.0015950956149026752, "learning_rate": 1.2471944685780482e-05, "loss": 0.0089, "step": 41480 }, { "epoch": 9.011728931364031, "grad_norm": 0.00043575678137131035, "learning_rate": 1.2462894584419346e-05, "loss": 0.0001, "step": 41490 }, { "epoch": 9.013900955690703, "grad_norm": 0.00048693272401578724, "learning_rate": 1.245384448305821e-05, "loss": 0.0001, "step": 41500 }, { "epoch": 9.016072980017377, "grad_norm": 0.0023798400070518255, "learning_rate": 1.2444794381697075e-05, "loss": 0.0214, "step": 41510 }, { "epoch": 9.018245004344049, "grad_norm": 0.0006145633524283767, "learning_rate": 1.2435744280335939e-05, "loss": 0.0029, "step": 41520 }, { "epoch": 9.020417028670721, "grad_norm": 0.00047548647853545845, "learning_rate": 1.2426694178974805e-05, "loss": 0.0003, "step": 41530 }, { "epoch": 9.022589052997393, "grad_norm": 0.0006101019098423421, "learning_rate": 1.241764407761367e-05, "loss": 0.0001, "step": 41540 }, { "epoch": 9.024761077324065, "grad_norm": 0.000736563524696976, "learning_rate": 1.2408593976252536e-05, "loss": 0.0001, "step": 41550 }, { "epoch": 9.026933101650739, "grad_norm": 0.0004573471669573337, "learning_rate": 1.23995438748914e-05, "loss": 0.0, "step": 41560 }, { "epoch": 9.029105125977411, "grad_norm": 0.00043995672604069114, "learning_rate": 1.2390493773530264e-05, "loss": 0.0007, "step": 41570 }, { "epoch": 9.031277150304083, "grad_norm": 0.0004224168078508228, "learning_rate": 1.2381443672169129e-05, "loss": 0.0, "step": 41580 }, { "epoch": 9.033449174630755, "grad_norm": 0.00042783026583492756, "learning_rate": 1.2372393570807995e-05, "loss": 0.0001, "step": 41590 }, { "epoch": 9.035621198957429, "grad_norm": 0.003369641024619341, "learning_rate": 1.2363343469446859e-05, "loss": 0.0003, "step": 41600 }, { "epoch": 9.037793223284101, "grad_norm": 0.0005451114848256111, "learning_rate": 1.2354293368085723e-05, "loss": 0.0001, "step": 41610 }, { "epoch": 9.039965247610773, "grad_norm": 0.0005068237660452724, "learning_rate": 1.2345243266724588e-05, "loss": 0.0001, "step": 41620 }, { "epoch": 9.042137271937445, "grad_norm": 0.0003632347797974944, "learning_rate": 1.2336193165363452e-05, "loss": 0.0001, "step": 41630 }, { "epoch": 9.044309296264117, "grad_norm": 0.0005446246359497309, "learning_rate": 1.2327143064002318e-05, "loss": 0.0001, "step": 41640 }, { "epoch": 9.046481320590791, "grad_norm": 0.0005631268722936511, "learning_rate": 1.2318092962641182e-05, "loss": 0.0, "step": 41650 }, { "epoch": 9.048653344917463, "grad_norm": 0.0007002344354987144, "learning_rate": 1.2309042861280047e-05, "loss": 0.0001, "step": 41660 }, { "epoch": 9.050825369244135, "grad_norm": 0.0005598576390184462, "learning_rate": 1.2299992759918911e-05, "loss": 0.0042, "step": 41670 }, { "epoch": 9.052997393570807, "grad_norm": 0.0006812529754824936, "learning_rate": 1.2290942658557775e-05, "loss": 0.0038, "step": 41680 }, { "epoch": 9.055169417897481, "grad_norm": 0.0003943377232644707, "learning_rate": 1.2281892557196641e-05, "loss": 0.0153, "step": 41690 }, { "epoch": 9.057341442224153, "grad_norm": 0.00037149019772186875, "learning_rate": 1.2272842455835506e-05, "loss": 0.0, "step": 41700 }, { "epoch": 9.059513466550825, "grad_norm": 0.0005129647324793041, "learning_rate": 1.2263792354474372e-05, "loss": 0.0, "step": 41710 }, { "epoch": 9.061685490877498, "grad_norm": 0.00036699813790619373, "learning_rate": 1.2254742253113236e-05, "loss": 0.0, "step": 41720 }, { "epoch": 9.06385751520417, "grad_norm": 0.00037562238867394626, "learning_rate": 1.22456921517521e-05, "loss": 0.0047, "step": 41730 }, { "epoch": 9.066029539530843, "grad_norm": 0.00037983700167387724, "learning_rate": 1.2236642050390965e-05, "loss": 0.0184, "step": 41740 }, { "epoch": 9.068201563857516, "grad_norm": 0.0007805681088939309, "learning_rate": 1.2227591949029829e-05, "loss": 0.0046, "step": 41750 }, { "epoch": 9.070373588184188, "grad_norm": 0.00037100425106473267, "learning_rate": 1.2218541847668695e-05, "loss": 0.0001, "step": 41760 }, { "epoch": 9.07254561251086, "grad_norm": 0.0004620937106665224, "learning_rate": 1.220949174630756e-05, "loss": 0.0001, "step": 41770 }, { "epoch": 9.074717636837532, "grad_norm": 0.00037084464565850794, "learning_rate": 1.2200441644946424e-05, "loss": 0.0, "step": 41780 }, { "epoch": 9.076889661164206, "grad_norm": 0.00036705503589473665, "learning_rate": 1.2191391543585288e-05, "loss": 0.0, "step": 41790 }, { "epoch": 9.079061685490878, "grad_norm": 0.022258194163441658, "learning_rate": 1.2182341442224152e-05, "loss": 0.0001, "step": 41800 }, { "epoch": 9.08123370981755, "grad_norm": 0.0010294326348230243, "learning_rate": 1.2173291340863018e-05, "loss": 0.0, "step": 41810 }, { "epoch": 9.083405734144222, "grad_norm": 0.00035624494194053113, "learning_rate": 1.2164241239501883e-05, "loss": 0.0101, "step": 41820 }, { "epoch": 9.085577758470896, "grad_norm": 0.00036939565325155854, "learning_rate": 1.2155191138140747e-05, "loss": 0.0, "step": 41830 }, { "epoch": 9.087749782797568, "grad_norm": 0.0003831423236988485, "learning_rate": 1.2146141036779613e-05, "loss": 0.0, "step": 41840 }, { "epoch": 9.08992180712424, "grad_norm": 0.0003595768066588789, "learning_rate": 1.2137090935418478e-05, "loss": 0.0, "step": 41850 }, { "epoch": 9.092093831450912, "grad_norm": 0.00042781129013746977, "learning_rate": 1.2128040834057342e-05, "loss": 0.0, "step": 41860 }, { "epoch": 9.094265855777584, "grad_norm": 0.00036690133856609464, "learning_rate": 1.2118990732696208e-05, "loss": 0.0, "step": 41870 }, { "epoch": 9.096437880104258, "grad_norm": 0.00036495571839623153, "learning_rate": 1.2109940631335072e-05, "loss": 0.0, "step": 41880 }, { "epoch": 9.09860990443093, "grad_norm": 0.00035877502523362637, "learning_rate": 1.2100890529973937e-05, "loss": 0.0, "step": 41890 }, { "epoch": 9.100781928757602, "grad_norm": 0.0006629744893871248, "learning_rate": 1.2091840428612801e-05, "loss": 0.0, "step": 41900 }, { "epoch": 9.102953953084274, "grad_norm": 0.0003537225420586765, "learning_rate": 1.2082790327251665e-05, "loss": 0.0034, "step": 41910 }, { "epoch": 9.105125977410948, "grad_norm": 0.0003657103516161442, "learning_rate": 1.207374022589053e-05, "loss": 0.0, "step": 41920 }, { "epoch": 9.10729800173762, "grad_norm": 0.0004622082051355392, "learning_rate": 1.2064690124529396e-05, "loss": 0.0, "step": 41930 }, { "epoch": 9.109470026064292, "grad_norm": 0.0004466444079298526, "learning_rate": 1.205564002316826e-05, "loss": 0.0011, "step": 41940 }, { "epoch": 9.111642050390964, "grad_norm": 0.0007034140289761126, "learning_rate": 1.2046589921807124e-05, "loss": 0.0489, "step": 41950 }, { "epoch": 9.113814074717636, "grad_norm": 0.0019044640939682722, "learning_rate": 1.2037539820445989e-05, "loss": 0.0001, "step": 41960 }, { "epoch": 9.11598609904431, "grad_norm": 0.0009093311382457614, "learning_rate": 1.2028489719084855e-05, "loss": 0.0292, "step": 41970 }, { "epoch": 9.118158123370982, "grad_norm": 0.0011715837754309177, "learning_rate": 1.2019439617723719e-05, "loss": 0.0001, "step": 41980 }, { "epoch": 9.120330147697654, "grad_norm": 0.0005745973321609199, "learning_rate": 1.2010389516362585e-05, "loss": 0.0001, "step": 41990 }, { "epoch": 9.122502172024326, "grad_norm": 0.000761601550038904, "learning_rate": 1.200133941500145e-05, "loss": 0.0001, "step": 42000 }, { "epoch": 9.124674196350998, "grad_norm": 0.000572199176531285, "learning_rate": 1.1992289313640314e-05, "loss": 0.0001, "step": 42010 }, { "epoch": 9.126846220677672, "grad_norm": 0.00113860541023314, "learning_rate": 1.1983239212279178e-05, "loss": 0.0001, "step": 42020 }, { "epoch": 9.129018245004344, "grad_norm": 0.000481676310300827, "learning_rate": 1.1974189110918042e-05, "loss": 0.0001, "step": 42030 }, { "epoch": 9.131190269331016, "grad_norm": 0.0013643910642713308, "learning_rate": 1.1965139009556908e-05, "loss": 0.0001, "step": 42040 }, { "epoch": 9.133362293657688, "grad_norm": 0.026149902492761612, "learning_rate": 1.1956088908195773e-05, "loss": 0.0086, "step": 42050 }, { "epoch": 9.135534317984362, "grad_norm": 0.0005695584695786238, "learning_rate": 1.1947038806834637e-05, "loss": 0.0061, "step": 42060 }, { "epoch": 9.137706342311034, "grad_norm": 0.0012668960262089968, "learning_rate": 1.1937988705473501e-05, "loss": 0.0001, "step": 42070 }, { "epoch": 9.139878366637706, "grad_norm": 0.0003972501726821065, "learning_rate": 1.1928938604112366e-05, "loss": 0.0001, "step": 42080 }, { "epoch": 9.142050390964378, "grad_norm": 0.0004158159426879138, "learning_rate": 1.191988850275123e-05, "loss": 0.0, "step": 42090 }, { "epoch": 9.14422241529105, "grad_norm": 0.0015036650002002716, "learning_rate": 1.1910838401390096e-05, "loss": 0.0001, "step": 42100 }, { "epoch": 9.146394439617724, "grad_norm": 0.011430252343416214, "learning_rate": 1.190178830002896e-05, "loss": 0.0001, "step": 42110 }, { "epoch": 9.148566463944396, "grad_norm": 0.0003950317041017115, "learning_rate": 1.1892738198667827e-05, "loss": 0.0001, "step": 42120 }, { "epoch": 9.150738488271069, "grad_norm": 0.0005114611121825874, "learning_rate": 1.1883688097306691e-05, "loss": 0.0033, "step": 42130 }, { "epoch": 9.15291051259774, "grad_norm": 0.00044346958748064935, "learning_rate": 1.1874637995945555e-05, "loss": 0.0001, "step": 42140 }, { "epoch": 9.155082536924414, "grad_norm": 0.00038853855221532285, "learning_rate": 1.186558789458442e-05, "loss": 0.0, "step": 42150 }, { "epoch": 9.157254561251086, "grad_norm": 0.0003854171955026686, "learning_rate": 1.1856537793223286e-05, "loss": 0.0, "step": 42160 }, { "epoch": 9.159426585577759, "grad_norm": 0.0006933953263796866, "learning_rate": 1.184748769186215e-05, "loss": 0.0, "step": 42170 }, { "epoch": 9.16159860990443, "grad_norm": 0.00041202042484655976, "learning_rate": 1.1838437590501014e-05, "loss": 0.0001, "step": 42180 }, { "epoch": 9.163770634231103, "grad_norm": 0.0004293081583455205, "learning_rate": 1.1829387489139879e-05, "loss": 0.0, "step": 42190 }, { "epoch": 9.165942658557777, "grad_norm": 0.0003802312712650746, "learning_rate": 1.1820337387778743e-05, "loss": 0.0, "step": 42200 }, { "epoch": 9.168114682884449, "grad_norm": 0.15740595757961273, "learning_rate": 1.1811287286417609e-05, "loss": 0.0001, "step": 42210 }, { "epoch": 9.17028670721112, "grad_norm": 0.00046438779099844396, "learning_rate": 1.1802237185056473e-05, "loss": 0.0034, "step": 42220 }, { "epoch": 9.172458731537793, "grad_norm": 0.06625476479530334, "learning_rate": 1.1793187083695338e-05, "loss": 0.0001, "step": 42230 }, { "epoch": 9.174630755864465, "grad_norm": 0.0003445383335929364, "learning_rate": 1.1784136982334202e-05, "loss": 0.0, "step": 42240 }, { "epoch": 9.176802780191139, "grad_norm": 0.00036485432065092027, "learning_rate": 1.1775086880973066e-05, "loss": 0.0016, "step": 42250 }, { "epoch": 9.17897480451781, "grad_norm": 0.0023723444901406765, "learning_rate": 1.1766036779611932e-05, "loss": 0.0001, "step": 42260 }, { "epoch": 9.181146828844483, "grad_norm": 0.0036938569974154234, "learning_rate": 1.1756986678250798e-05, "loss": 0.0001, "step": 42270 }, { "epoch": 9.183318853171155, "grad_norm": 0.0008719200850464404, "learning_rate": 1.1747936576889663e-05, "loss": 0.0001, "step": 42280 }, { "epoch": 9.185490877497829, "grad_norm": 0.0011817128397524357, "learning_rate": 1.1738886475528527e-05, "loss": 0.0047, "step": 42290 }, { "epoch": 9.1876629018245, "grad_norm": 0.0005411884048953652, "learning_rate": 1.1729836374167391e-05, "loss": 0.0056, "step": 42300 }, { "epoch": 9.189834926151173, "grad_norm": 0.0003679801884572953, "learning_rate": 1.1720786272806256e-05, "loss": 0.0, "step": 42310 }, { "epoch": 9.192006950477845, "grad_norm": 0.0003470522933639586, "learning_rate": 1.171173617144512e-05, "loss": 0.0, "step": 42320 }, { "epoch": 9.194178974804517, "grad_norm": 0.0004540376248769462, "learning_rate": 1.1702686070083986e-05, "loss": 0.0, "step": 42330 }, { "epoch": 9.196350999131191, "grad_norm": 0.0004474143497645855, "learning_rate": 1.169363596872285e-05, "loss": 0.0239, "step": 42340 }, { "epoch": 9.198523023457863, "grad_norm": 0.0003767700691241771, "learning_rate": 1.1684585867361715e-05, "loss": 0.0001, "step": 42350 }, { "epoch": 9.200695047784535, "grad_norm": 0.006673621945083141, "learning_rate": 1.167553576600058e-05, "loss": 0.0001, "step": 42360 }, { "epoch": 9.202867072111207, "grad_norm": 0.00039552341331727803, "learning_rate": 1.1666485664639443e-05, "loss": 0.0, "step": 42370 }, { "epoch": 9.20503909643788, "grad_norm": 0.004886478651314974, "learning_rate": 1.1657435563278308e-05, "loss": 0.0001, "step": 42380 }, { "epoch": 9.207211120764553, "grad_norm": 0.0011676917783915997, "learning_rate": 1.1648385461917174e-05, "loss": 0.0001, "step": 42390 }, { "epoch": 9.209383145091225, "grad_norm": 0.0006531733088195324, "learning_rate": 1.1639335360556038e-05, "loss": 0.0001, "step": 42400 }, { "epoch": 9.211555169417897, "grad_norm": 0.00045200990280136466, "learning_rate": 1.1630285259194904e-05, "loss": 0.0062, "step": 42410 }, { "epoch": 9.21372719374457, "grad_norm": 0.0005120881251059473, "learning_rate": 1.1621235157833769e-05, "loss": 0.0, "step": 42420 }, { "epoch": 9.215899218071243, "grad_norm": 0.001150411320850253, "learning_rate": 1.1612185056472633e-05, "loss": 0.0001, "step": 42430 }, { "epoch": 9.218071242397915, "grad_norm": 0.0007097712368704379, "learning_rate": 1.1603134955111499e-05, "loss": 0.0001, "step": 42440 }, { "epoch": 9.220243266724587, "grad_norm": 0.00039890228072181344, "learning_rate": 1.1594084853750363e-05, "loss": 0.0061, "step": 42450 }, { "epoch": 9.22241529105126, "grad_norm": 0.0003842746955342591, "learning_rate": 1.1585034752389228e-05, "loss": 0.0001, "step": 42460 }, { "epoch": 9.224587315377931, "grad_norm": 0.0003777845704462379, "learning_rate": 1.1575984651028092e-05, "loss": 0.0, "step": 42470 }, { "epoch": 9.226759339704605, "grad_norm": 0.0031357433181256056, "learning_rate": 1.1566934549666956e-05, "loss": 0.0036, "step": 42480 }, { "epoch": 9.228931364031277, "grad_norm": 0.0004119759250897914, "learning_rate": 1.155788444830582e-05, "loss": 0.0001, "step": 42490 }, { "epoch": 9.23110338835795, "grad_norm": 0.0005053351633250713, "learning_rate": 1.1548834346944687e-05, "loss": 0.0014, "step": 42500 }, { "epoch": 9.233275412684621, "grad_norm": 0.00035216548712924123, "learning_rate": 1.1539784245583551e-05, "loss": 0.0094, "step": 42510 }, { "epoch": 9.235447437011295, "grad_norm": 0.00039408242446370423, "learning_rate": 1.1530734144222415e-05, "loss": 0.0, "step": 42520 }, { "epoch": 9.237619461337967, "grad_norm": 0.25304147601127625, "learning_rate": 1.152168404286128e-05, "loss": 0.0001, "step": 42530 }, { "epoch": 9.23979148566464, "grad_norm": 0.0003541614569257945, "learning_rate": 1.1512633941500146e-05, "loss": 0.0044, "step": 42540 }, { "epoch": 9.241963509991312, "grad_norm": 0.0003510701353661716, "learning_rate": 1.150358384013901e-05, "loss": 0.0002, "step": 42550 }, { "epoch": 9.244135534317984, "grad_norm": 0.00037902756594121456, "learning_rate": 1.1494533738777876e-05, "loss": 0.0001, "step": 42560 }, { "epoch": 9.246307558644657, "grad_norm": 0.0019823191687464714, "learning_rate": 1.148548363741674e-05, "loss": 0.0001, "step": 42570 }, { "epoch": 9.24847958297133, "grad_norm": 0.0004513237508945167, "learning_rate": 1.1476433536055605e-05, "loss": 0.0, "step": 42580 }, { "epoch": 9.250651607298002, "grad_norm": 0.0013202824629843235, "learning_rate": 1.1467383434694469e-05, "loss": 0.0038, "step": 42590 }, { "epoch": 9.252823631624674, "grad_norm": 0.00035054978798143566, "learning_rate": 1.1458333333333333e-05, "loss": 0.0182, "step": 42600 }, { "epoch": 9.254995655951348, "grad_norm": 0.0003493396216072142, "learning_rate": 1.14492832319722e-05, "loss": 0.0001, "step": 42610 }, { "epoch": 9.25716768027802, "grad_norm": 0.0005488857277669013, "learning_rate": 1.1440233130611064e-05, "loss": 0.0, "step": 42620 }, { "epoch": 9.259339704604692, "grad_norm": 0.0003464900655671954, "learning_rate": 1.1431183029249928e-05, "loss": 0.0068, "step": 42630 }, { "epoch": 9.261511728931364, "grad_norm": 0.00038118616794236004, "learning_rate": 1.1422132927888793e-05, "loss": 0.0, "step": 42640 }, { "epoch": 9.263683753258036, "grad_norm": 0.0003550401597749442, "learning_rate": 1.1413082826527657e-05, "loss": 0.0005, "step": 42650 }, { "epoch": 9.26585577758471, "grad_norm": 0.0003692580503411591, "learning_rate": 1.1404032725166521e-05, "loss": 0.0001, "step": 42660 }, { "epoch": 9.268027801911382, "grad_norm": 0.00035148989991284907, "learning_rate": 1.1394982623805387e-05, "loss": 0.0, "step": 42670 }, { "epoch": 9.270199826238054, "grad_norm": 0.000443141907453537, "learning_rate": 1.1385932522444252e-05, "loss": 0.0, "step": 42680 }, { "epoch": 9.272371850564726, "grad_norm": 0.0008299656328745186, "learning_rate": 1.1376882421083118e-05, "loss": 0.0312, "step": 42690 }, { "epoch": 9.274543874891398, "grad_norm": 0.0032954856287688017, "learning_rate": 1.1367832319721982e-05, "loss": 0.0002, "step": 42700 }, { "epoch": 9.276715899218072, "grad_norm": 0.0026082557160407305, "learning_rate": 1.1358782218360846e-05, "loss": 0.0057, "step": 42710 }, { "epoch": 9.278887923544744, "grad_norm": 0.0017727608792483807, "learning_rate": 1.134973211699971e-05, "loss": 0.0001, "step": 42720 }, { "epoch": 9.281059947871416, "grad_norm": 0.2710913121700287, "learning_rate": 1.1340682015638577e-05, "loss": 0.0049, "step": 42730 }, { "epoch": 9.283231972198088, "grad_norm": 0.03456174209713936, "learning_rate": 1.1331631914277441e-05, "loss": 0.0001, "step": 42740 }, { "epoch": 9.285403996524762, "grad_norm": 0.0008368910639546812, "learning_rate": 1.1322581812916305e-05, "loss": 0.0001, "step": 42750 }, { "epoch": 9.287576020851434, "grad_norm": 0.00035930657759308815, "learning_rate": 1.131353171155517e-05, "loss": 0.0001, "step": 42760 }, { "epoch": 9.289748045178106, "grad_norm": 0.0006045507034286857, "learning_rate": 1.1304481610194034e-05, "loss": 0.0, "step": 42770 }, { "epoch": 9.291920069504778, "grad_norm": 0.0003401483118068427, "learning_rate": 1.1295431508832898e-05, "loss": 0.0001, "step": 42780 }, { "epoch": 9.29409209383145, "grad_norm": 0.0005668572848662734, "learning_rate": 1.1286381407471764e-05, "loss": 0.0, "step": 42790 }, { "epoch": 9.296264118158124, "grad_norm": 0.00042387290159240365, "learning_rate": 1.1277331306110629e-05, "loss": 0.0046, "step": 42800 }, { "epoch": 9.298436142484796, "grad_norm": 0.0019156066700816154, "learning_rate": 1.1268281204749493e-05, "loss": 0.0, "step": 42810 }, { "epoch": 9.300608166811468, "grad_norm": 0.0004135738417971879, "learning_rate": 1.1259231103388359e-05, "loss": 0.0006, "step": 42820 }, { "epoch": 9.30278019113814, "grad_norm": 0.0003466247289907187, "learning_rate": 1.1250181002027223e-05, "loss": 0.0, "step": 42830 }, { "epoch": 9.304952215464812, "grad_norm": 0.0003484385379124433, "learning_rate": 1.124113090066609e-05, "loss": 0.0001, "step": 42840 }, { "epoch": 9.307124239791486, "grad_norm": 0.0003456638951320201, "learning_rate": 1.1232080799304954e-05, "loss": 0.0039, "step": 42850 }, { "epoch": 9.309296264118158, "grad_norm": 0.00045184456394053996, "learning_rate": 1.1223030697943818e-05, "loss": 0.0, "step": 42860 }, { "epoch": 9.31146828844483, "grad_norm": 0.0007712736260145903, "learning_rate": 1.1213980596582682e-05, "loss": 0.0, "step": 42870 }, { "epoch": 9.313640312771502, "grad_norm": 0.00035233920789323747, "learning_rate": 1.1204930495221547e-05, "loss": 0.0, "step": 42880 }, { "epoch": 9.315812337098176, "grad_norm": 0.0008341504144482315, "learning_rate": 1.1195880393860411e-05, "loss": 0.0, "step": 42890 }, { "epoch": 9.317984361424848, "grad_norm": 0.0003776673402171582, "learning_rate": 1.1186830292499277e-05, "loss": 0.0, "step": 42900 }, { "epoch": 9.32015638575152, "grad_norm": 0.00034413248067721725, "learning_rate": 1.1177780191138142e-05, "loss": 0.0, "step": 42910 }, { "epoch": 9.322328410078192, "grad_norm": 0.0004014780279248953, "learning_rate": 1.1168730089777006e-05, "loss": 0.0, "step": 42920 }, { "epoch": 9.324500434404865, "grad_norm": 0.00033698673360049725, "learning_rate": 1.115967998841587e-05, "loss": 0.0001, "step": 42930 }, { "epoch": 9.326672458731538, "grad_norm": 0.0003441080334596336, "learning_rate": 1.1150629887054735e-05, "loss": 0.0, "step": 42940 }, { "epoch": 9.32884448305821, "grad_norm": 0.0003518610610626638, "learning_rate": 1.1141579785693599e-05, "loss": 0.0, "step": 42950 }, { "epoch": 9.331016507384883, "grad_norm": 0.0003449947398621589, "learning_rate": 1.1132529684332465e-05, "loss": 0.0, "step": 42960 }, { "epoch": 9.333188531711555, "grad_norm": 0.0003442805027589202, "learning_rate": 1.112347958297133e-05, "loss": 0.0034, "step": 42970 }, { "epoch": 9.335360556038228, "grad_norm": 0.0005408208235166967, "learning_rate": 1.1114429481610195e-05, "loss": 0.0, "step": 42980 }, { "epoch": 9.3375325803649, "grad_norm": 0.0003692790924105793, "learning_rate": 1.110537938024906e-05, "loss": 0.0, "step": 42990 }, { "epoch": 9.339704604691573, "grad_norm": 0.00034044485073536634, "learning_rate": 1.1096329278887924e-05, "loss": 0.0, "step": 43000 }, { "epoch": 9.341876629018245, "grad_norm": 0.0026759139727801085, "learning_rate": 1.108727917752679e-05, "loss": 0.0, "step": 43010 }, { "epoch": 9.344048653344917, "grad_norm": 0.0003308370942249894, "learning_rate": 1.1078229076165654e-05, "loss": 0.0056, "step": 43020 }, { "epoch": 9.34622067767159, "grad_norm": 0.00035013555316254497, "learning_rate": 1.1069178974804519e-05, "loss": 0.0, "step": 43030 }, { "epoch": 9.348392701998263, "grad_norm": 0.00034313698415644467, "learning_rate": 1.1060128873443383e-05, "loss": 0.0, "step": 43040 }, { "epoch": 9.350564726324935, "grad_norm": 0.00045629485975950956, "learning_rate": 1.1051078772082247e-05, "loss": 0.0001, "step": 43050 }, { "epoch": 9.352736750651607, "grad_norm": 0.00033453330979682505, "learning_rate": 1.1042028670721112e-05, "loss": 0.0, "step": 43060 }, { "epoch": 9.35490877497828, "grad_norm": 0.0017623642925173044, "learning_rate": 1.1032978569359978e-05, "loss": 0.0006, "step": 43070 }, { "epoch": 9.357080799304953, "grad_norm": 0.0007430663681589067, "learning_rate": 1.1023928467998842e-05, "loss": 0.0001, "step": 43080 }, { "epoch": 9.359252823631625, "grad_norm": 0.005379736889153719, "learning_rate": 1.1014878366637706e-05, "loss": 0.0002, "step": 43090 }, { "epoch": 9.361424847958297, "grad_norm": 1.7879000902175903, "learning_rate": 1.100582826527657e-05, "loss": 0.0005, "step": 43100 }, { "epoch": 9.363596872284969, "grad_norm": 0.000462701718788594, "learning_rate": 1.0996778163915437e-05, "loss": 0.0, "step": 43110 }, { "epoch": 9.365768896611643, "grad_norm": 0.00035364291397854686, "learning_rate": 1.0987728062554301e-05, "loss": 0.0047, "step": 43120 }, { "epoch": 9.367940920938315, "grad_norm": 0.00048209200031124055, "learning_rate": 1.0978677961193167e-05, "loss": 0.0, "step": 43130 }, { "epoch": 9.370112945264987, "grad_norm": 0.0003437796258367598, "learning_rate": 1.0969627859832031e-05, "loss": 0.0, "step": 43140 }, { "epoch": 9.372284969591659, "grad_norm": 0.00034089843393303454, "learning_rate": 1.0960577758470896e-05, "loss": 0.0, "step": 43150 }, { "epoch": 9.374456993918331, "grad_norm": 0.00034152084845118225, "learning_rate": 1.095152765710976e-05, "loss": 0.0, "step": 43160 }, { "epoch": 9.376629018245005, "grad_norm": 0.00033195436117239296, "learning_rate": 1.0942477555748625e-05, "loss": 0.0, "step": 43170 }, { "epoch": 9.378801042571677, "grad_norm": 0.00033621469628997147, "learning_rate": 1.0933427454387489e-05, "loss": 0.0, "step": 43180 }, { "epoch": 9.380973066898349, "grad_norm": 0.0003306234139017761, "learning_rate": 1.0924377353026355e-05, "loss": 0.0, "step": 43190 }, { "epoch": 9.383145091225021, "grad_norm": 0.0003234837204217911, "learning_rate": 1.091532725166522e-05, "loss": 0.0, "step": 43200 }, { "epoch": 9.385317115551695, "grad_norm": 0.0003395713574718684, "learning_rate": 1.0906277150304084e-05, "loss": 0.0, "step": 43210 }, { "epoch": 9.387489139878367, "grad_norm": 0.00036190488026477396, "learning_rate": 1.0897227048942948e-05, "loss": 0.0, "step": 43220 }, { "epoch": 9.38966116420504, "grad_norm": 0.00032648214255459607, "learning_rate": 1.0888176947581812e-05, "loss": 0.0, "step": 43230 }, { "epoch": 9.391833188531711, "grad_norm": 0.0004206506710033864, "learning_rate": 1.0879126846220678e-05, "loss": 0.0, "step": 43240 }, { "epoch": 9.394005212858383, "grad_norm": 0.0004962489474564791, "learning_rate": 1.0870076744859543e-05, "loss": 0.0, "step": 43250 }, { "epoch": 9.396177237185057, "grad_norm": 0.0003266745188739151, "learning_rate": 1.0861026643498409e-05, "loss": 0.0, "step": 43260 }, { "epoch": 9.39834926151173, "grad_norm": 0.0003320554969832301, "learning_rate": 1.0851976542137273e-05, "loss": 0.0, "step": 43270 }, { "epoch": 9.400521285838401, "grad_norm": 0.00033323868410661817, "learning_rate": 1.0842926440776137e-05, "loss": 0.0042, "step": 43280 }, { "epoch": 9.402693310165073, "grad_norm": 0.0003688148863147944, "learning_rate": 1.0833876339415002e-05, "loss": 0.0, "step": 43290 }, { "epoch": 9.404865334491745, "grad_norm": 0.0004509967693593353, "learning_rate": 1.0824826238053868e-05, "loss": 0.0001, "step": 43300 }, { "epoch": 9.40703735881842, "grad_norm": 0.0004598804807756096, "learning_rate": 1.0815776136692732e-05, "loss": 0.0, "step": 43310 }, { "epoch": 9.409209383145091, "grad_norm": 0.0003177253529429436, "learning_rate": 1.0806726035331596e-05, "loss": 0.0, "step": 43320 }, { "epoch": 9.411381407471763, "grad_norm": 0.00031996675534173846, "learning_rate": 1.079767593397046e-05, "loss": 0.0, "step": 43330 }, { "epoch": 9.413553431798436, "grad_norm": 0.0003233585739508271, "learning_rate": 1.0788625832609325e-05, "loss": 0.0, "step": 43340 }, { "epoch": 9.41572545612511, "grad_norm": 0.00032162151183001697, "learning_rate": 1.077957573124819e-05, "loss": 0.0, "step": 43350 }, { "epoch": 9.417897480451781, "grad_norm": 0.00042038553510792553, "learning_rate": 1.0770525629887055e-05, "loss": 0.0038, "step": 43360 }, { "epoch": 9.420069504778454, "grad_norm": 0.00036600345629267395, "learning_rate": 1.076147552852592e-05, "loss": 0.0, "step": 43370 }, { "epoch": 9.422241529105126, "grad_norm": 0.00032596764503978193, "learning_rate": 1.0752425427164784e-05, "loss": 0.0001, "step": 43380 }, { "epoch": 9.424413553431798, "grad_norm": 0.00031555883469991386, "learning_rate": 1.074337532580365e-05, "loss": 0.0, "step": 43390 }, { "epoch": 9.426585577758472, "grad_norm": 0.0003250258741900325, "learning_rate": 1.0734325224442514e-05, "loss": 0.0, "step": 43400 }, { "epoch": 9.428757602085144, "grad_norm": 0.0003132763667963445, "learning_rate": 1.072527512308138e-05, "loss": 0.0, "step": 43410 }, { "epoch": 9.430929626411816, "grad_norm": 0.0003153624420519918, "learning_rate": 1.0716225021720245e-05, "loss": 0.0039, "step": 43420 }, { "epoch": 9.433101650738488, "grad_norm": 0.00031512047280557454, "learning_rate": 1.070717492035911e-05, "loss": 0.0, "step": 43430 }, { "epoch": 9.435273675065162, "grad_norm": 0.00031897996086627245, "learning_rate": 1.0698124818997974e-05, "loss": 0.0036, "step": 43440 }, { "epoch": 9.437445699391834, "grad_norm": 0.0005444668349809945, "learning_rate": 1.0689074717636838e-05, "loss": 0.0, "step": 43450 }, { "epoch": 9.439617723718506, "grad_norm": 0.00032076716888695955, "learning_rate": 1.0680024616275702e-05, "loss": 0.0, "step": 43460 }, { "epoch": 9.441789748045178, "grad_norm": 0.0004752585955429822, "learning_rate": 1.0670974514914568e-05, "loss": 0.0, "step": 43470 }, { "epoch": 9.44396177237185, "grad_norm": 0.00048087709001265466, "learning_rate": 1.0661924413553433e-05, "loss": 0.0, "step": 43480 }, { "epoch": 9.446133796698524, "grad_norm": 0.00044686091132462025, "learning_rate": 1.0652874312192297e-05, "loss": 0.0, "step": 43490 }, { "epoch": 9.448305821025196, "grad_norm": 0.00031458461307920516, "learning_rate": 1.0643824210831161e-05, "loss": 0.0, "step": 43500 }, { "epoch": 9.450477845351868, "grad_norm": 0.0003263599646743387, "learning_rate": 1.0634774109470026e-05, "loss": 0.0001, "step": 43510 }, { "epoch": 9.45264986967854, "grad_norm": 0.00030845761648379266, "learning_rate": 1.062572400810889e-05, "loss": 0.0067, "step": 43520 }, { "epoch": 9.454821894005212, "grad_norm": 0.00030990008963271976, "learning_rate": 1.0616673906747756e-05, "loss": 0.0, "step": 43530 }, { "epoch": 9.456993918331886, "grad_norm": 0.000313706899760291, "learning_rate": 1.0607623805386622e-05, "loss": 0.0, "step": 43540 }, { "epoch": 9.459165942658558, "grad_norm": 0.0003094021521974355, "learning_rate": 1.0598573704025486e-05, "loss": 0.0, "step": 43550 }, { "epoch": 9.46133796698523, "grad_norm": 0.0004979989607818425, "learning_rate": 1.058952360266435e-05, "loss": 0.0058, "step": 43560 }, { "epoch": 9.463509991311902, "grad_norm": 0.0003103738999925554, "learning_rate": 1.0580473501303215e-05, "loss": 0.0, "step": 43570 }, { "epoch": 9.465682015638576, "grad_norm": 0.00030864804284647107, "learning_rate": 1.057142339994208e-05, "loss": 0.0, "step": 43580 }, { "epoch": 9.467854039965248, "grad_norm": 0.00030936236726120114, "learning_rate": 1.0562373298580945e-05, "loss": 0.0, "step": 43590 }, { "epoch": 9.47002606429192, "grad_norm": 0.0003084845084231347, "learning_rate": 1.055332319721981e-05, "loss": 0.0, "step": 43600 }, { "epoch": 9.472198088618592, "grad_norm": 0.00030784294358454645, "learning_rate": 1.0544273095858674e-05, "loss": 0.0, "step": 43610 }, { "epoch": 9.474370112945264, "grad_norm": 0.0003053463879041374, "learning_rate": 1.0535222994497538e-05, "loss": 0.0, "step": 43620 }, { "epoch": 9.476542137271938, "grad_norm": 0.00030371634056791663, "learning_rate": 1.0526172893136403e-05, "loss": 0.0, "step": 43630 }, { "epoch": 9.47871416159861, "grad_norm": 0.0034877255093306303, "learning_rate": 1.0517122791775269e-05, "loss": 0.0049, "step": 43640 }, { "epoch": 9.480886185925282, "grad_norm": 0.00030639933538623154, "learning_rate": 1.0508072690414133e-05, "loss": 0.0043, "step": 43650 }, { "epoch": 9.483058210251954, "grad_norm": 0.0003288072475697845, "learning_rate": 1.0499022589052997e-05, "loss": 0.0, "step": 43660 }, { "epoch": 9.485230234578628, "grad_norm": 0.0003033705288544297, "learning_rate": 1.0489972487691862e-05, "loss": 0.0, "step": 43670 }, { "epoch": 9.4874022589053, "grad_norm": 0.00031130280694924295, "learning_rate": 1.0480922386330728e-05, "loss": 0.0, "step": 43680 }, { "epoch": 9.489574283231972, "grad_norm": 0.003796252654865384, "learning_rate": 1.0471872284969592e-05, "loss": 0.0, "step": 43690 }, { "epoch": 9.491746307558644, "grad_norm": 0.0003003615129273385, "learning_rate": 1.0462822183608458e-05, "loss": 0.0058, "step": 43700 }, { "epoch": 9.493918331885316, "grad_norm": 0.00043470592936500907, "learning_rate": 1.0453772082247323e-05, "loss": 0.0, "step": 43710 }, { "epoch": 9.49609035621199, "grad_norm": 0.0003017736307810992, "learning_rate": 1.0444721980886187e-05, "loss": 0.0157, "step": 43720 }, { "epoch": 9.498262380538662, "grad_norm": 0.00031047649099491537, "learning_rate": 1.0435671879525051e-05, "loss": 0.0, "step": 43730 }, { "epoch": 9.500434404865334, "grad_norm": 0.27765092253685, "learning_rate": 1.0426621778163916e-05, "loss": 0.0052, "step": 43740 }, { "epoch": 9.502606429192006, "grad_norm": 0.00031226209830492735, "learning_rate": 1.041757167680278e-05, "loss": 0.0, "step": 43750 }, { "epoch": 9.504778453518679, "grad_norm": 0.00032370752887800336, "learning_rate": 1.0408521575441646e-05, "loss": 0.0001, "step": 43760 }, { "epoch": 9.506950477845352, "grad_norm": 0.0005092357750982046, "learning_rate": 1.039947147408051e-05, "loss": 0.0, "step": 43770 }, { "epoch": 9.509122502172024, "grad_norm": 0.0003147079551126808, "learning_rate": 1.0390421372719375e-05, "loss": 0.0047, "step": 43780 }, { "epoch": 9.511294526498697, "grad_norm": 0.00042028294410556555, "learning_rate": 1.0381371271358239e-05, "loss": 0.0, "step": 43790 }, { "epoch": 9.513466550825369, "grad_norm": 0.0003288674633949995, "learning_rate": 1.0372321169997103e-05, "loss": 0.0001, "step": 43800 }, { "epoch": 9.515638575152042, "grad_norm": 0.0003080323222093284, "learning_rate": 1.036327106863597e-05, "loss": 0.0, "step": 43810 }, { "epoch": 9.517810599478715, "grad_norm": 0.00030454035731963813, "learning_rate": 1.0354220967274834e-05, "loss": 0.0, "step": 43820 }, { "epoch": 9.519982623805387, "grad_norm": 0.00030711121507920325, "learning_rate": 1.03451708659137e-05, "loss": 0.0, "step": 43830 }, { "epoch": 9.522154648132059, "grad_norm": 0.00031087957904674113, "learning_rate": 1.0336120764552564e-05, "loss": 0.0068, "step": 43840 }, { "epoch": 9.52432667245873, "grad_norm": 0.0006094526033848524, "learning_rate": 1.0327070663191428e-05, "loss": 0.0, "step": 43850 }, { "epoch": 9.526498696785405, "grad_norm": 0.00036623451160266995, "learning_rate": 1.0318020561830293e-05, "loss": 0.0, "step": 43860 }, { "epoch": 9.528670721112077, "grad_norm": 0.0005215948331169784, "learning_rate": 1.0308970460469159e-05, "loss": 0.0, "step": 43870 }, { "epoch": 9.530842745438749, "grad_norm": 0.0004186656151432544, "learning_rate": 1.0299920359108023e-05, "loss": 0.0004, "step": 43880 }, { "epoch": 9.53301476976542, "grad_norm": 0.0003144047223031521, "learning_rate": 1.0290870257746887e-05, "loss": 0.0, "step": 43890 }, { "epoch": 9.535186794092095, "grad_norm": 0.00030454093939624727, "learning_rate": 1.0281820156385752e-05, "loss": 0.0, "step": 43900 }, { "epoch": 9.537358818418767, "grad_norm": 0.0003108079545199871, "learning_rate": 1.0272770055024616e-05, "loss": 0.0, "step": 43910 }, { "epoch": 9.539530842745439, "grad_norm": 0.0003084295312874019, "learning_rate": 1.026371995366348e-05, "loss": 0.0, "step": 43920 }, { "epoch": 9.541702867072111, "grad_norm": 0.00048682757187634706, "learning_rate": 1.0254669852302346e-05, "loss": 0.0, "step": 43930 }, { "epoch": 9.543874891398783, "grad_norm": 0.000300971616525203, "learning_rate": 1.024561975094121e-05, "loss": 0.0432, "step": 43940 }, { "epoch": 9.546046915725457, "grad_norm": 0.0003139590844511986, "learning_rate": 1.0236569649580075e-05, "loss": 0.0001, "step": 43950 }, { "epoch": 9.548218940052129, "grad_norm": 8.009101867675781, "learning_rate": 1.0227519548218941e-05, "loss": 0.0572, "step": 43960 }, { "epoch": 9.550390964378801, "grad_norm": 0.0003988199750892818, "learning_rate": 1.0218469446857806e-05, "loss": 0.0, "step": 43970 }, { "epoch": 9.552562988705473, "grad_norm": 0.0009871821384876966, "learning_rate": 1.020941934549667e-05, "loss": 0.0, "step": 43980 }, { "epoch": 9.554735013032147, "grad_norm": 0.0007358947768807411, "learning_rate": 1.0200369244135536e-05, "loss": 0.005, "step": 43990 }, { "epoch": 9.556907037358819, "grad_norm": 0.0010422732448205352, "learning_rate": 1.01913191427744e-05, "loss": 0.0001, "step": 44000 }, { "epoch": 9.559079061685491, "grad_norm": 0.0018725309055298567, "learning_rate": 1.0182269041413265e-05, "loss": 0.0002, "step": 44010 }, { "epoch": 9.561251086012163, "grad_norm": 0.0004772630927618593, "learning_rate": 1.0173218940052129e-05, "loss": 0.0041, "step": 44020 }, { "epoch": 9.563423110338835, "grad_norm": 0.0005594859248958528, "learning_rate": 1.0164168838690993e-05, "loss": 0.0086, "step": 44030 }, { "epoch": 9.565595134665509, "grad_norm": 0.00434377184137702, "learning_rate": 1.015511873732986e-05, "loss": 0.0001, "step": 44040 }, { "epoch": 9.567767158992181, "grad_norm": 0.0006473969551734626, "learning_rate": 1.0146068635968724e-05, "loss": 0.0036, "step": 44050 }, { "epoch": 9.569939183318853, "grad_norm": 0.0010716840624809265, "learning_rate": 1.0137018534607588e-05, "loss": 0.0, "step": 44060 }, { "epoch": 9.572111207645525, "grad_norm": 0.0012535667046904564, "learning_rate": 1.0127968433246452e-05, "loss": 0.0, "step": 44070 }, { "epoch": 9.574283231972197, "grad_norm": 0.0003759227111004293, "learning_rate": 1.0118918331885317e-05, "loss": 0.0057, "step": 44080 }, { "epoch": 9.576455256298871, "grad_norm": 0.000605809735134244, "learning_rate": 1.0109868230524181e-05, "loss": 0.0, "step": 44090 }, { "epoch": 9.578627280625543, "grad_norm": 0.0003496602294035256, "learning_rate": 1.0100818129163047e-05, "loss": 0.0, "step": 44100 }, { "epoch": 9.580799304952215, "grad_norm": 0.00036835906212218106, "learning_rate": 1.0091768027801913e-05, "loss": 0.0, "step": 44110 }, { "epoch": 9.582971329278887, "grad_norm": 0.0003797638928517699, "learning_rate": 1.0082717926440777e-05, "loss": 0.0, "step": 44120 }, { "epoch": 9.58514335360556, "grad_norm": 0.0003261095262132585, "learning_rate": 1.0073667825079642e-05, "loss": 0.0, "step": 44130 }, { "epoch": 9.587315377932233, "grad_norm": 0.00031275799847207963, "learning_rate": 1.0064617723718506e-05, "loss": 0.0, "step": 44140 }, { "epoch": 9.589487402258905, "grad_norm": 0.0005705951480194926, "learning_rate": 1.005556762235737e-05, "loss": 0.0001, "step": 44150 }, { "epoch": 9.591659426585577, "grad_norm": 0.0004992802278138697, "learning_rate": 1.0046517520996236e-05, "loss": 0.0, "step": 44160 }, { "epoch": 9.59383145091225, "grad_norm": 0.000599740247707814, "learning_rate": 1.00374674196351e-05, "loss": 0.0001, "step": 44170 }, { "epoch": 9.596003475238923, "grad_norm": 0.1466888040304184, "learning_rate": 1.0028417318273965e-05, "loss": 0.0036, "step": 44180 }, { "epoch": 9.598175499565595, "grad_norm": 0.0003275081980973482, "learning_rate": 1.001936721691283e-05, "loss": 0.0, "step": 44190 }, { "epoch": 9.600347523892268, "grad_norm": 0.00044672199874185026, "learning_rate": 1.0010317115551694e-05, "loss": 0.0, "step": 44200 }, { "epoch": 9.60251954821894, "grad_norm": 0.00030891658389009535, "learning_rate": 1.0001267014190558e-05, "loss": 0.0, "step": 44210 }, { "epoch": 9.604691572545612, "grad_norm": 0.0003040438750758767, "learning_rate": 9.992216912829424e-06, "loss": 0.0001, "step": 44220 }, { "epoch": 9.606863596872286, "grad_norm": 0.007986625656485558, "learning_rate": 9.983166811468288e-06, "loss": 0.0, "step": 44230 }, { "epoch": 9.609035621198958, "grad_norm": 0.0003891444648616016, "learning_rate": 9.974116710107153e-06, "loss": 0.0, "step": 44240 }, { "epoch": 9.61120764552563, "grad_norm": 0.0005247259396128356, "learning_rate": 9.965066608746019e-06, "loss": 0.0057, "step": 44250 }, { "epoch": 9.613379669852302, "grad_norm": 0.0003096143191214651, "learning_rate": 9.956016507384883e-06, "loss": 0.0, "step": 44260 }, { "epoch": 9.615551694178976, "grad_norm": 0.0003360881528351456, "learning_rate": 9.94696640602375e-06, "loss": 0.0, "step": 44270 }, { "epoch": 9.617723718505648, "grad_norm": 0.00029326791991479695, "learning_rate": 9.937916304662614e-06, "loss": 0.0, "step": 44280 }, { "epoch": 9.61989574283232, "grad_norm": 0.0003966445801779628, "learning_rate": 9.928866203301478e-06, "loss": 0.0, "step": 44290 }, { "epoch": 9.622067767158992, "grad_norm": 0.0006641658837907016, "learning_rate": 9.919816101940342e-06, "loss": 0.0001, "step": 44300 }, { "epoch": 9.624239791485664, "grad_norm": 0.0003245220868848264, "learning_rate": 9.910766000579207e-06, "loss": 0.0045, "step": 44310 }, { "epoch": 9.626411815812338, "grad_norm": 0.0003133430436719209, "learning_rate": 9.901715899218071e-06, "loss": 0.0, "step": 44320 }, { "epoch": 9.62858384013901, "grad_norm": 0.0003544339269865304, "learning_rate": 9.892665797856937e-06, "loss": 0.0409, "step": 44330 }, { "epoch": 9.630755864465682, "grad_norm": 0.0011449995217844844, "learning_rate": 9.883615696495801e-06, "loss": 0.0001, "step": 44340 }, { "epoch": 9.632927888792354, "grad_norm": 0.0006344979046843946, "learning_rate": 9.874565595134666e-06, "loss": 0.0001, "step": 44350 }, { "epoch": 9.635099913119028, "grad_norm": 0.01246078684926033, "learning_rate": 9.86551549377353e-06, "loss": 0.0054, "step": 44360 }, { "epoch": 9.6372719374457, "grad_norm": 0.0012406118912622333, "learning_rate": 9.856465392412394e-06, "loss": 0.0001, "step": 44370 }, { "epoch": 9.639443961772372, "grad_norm": 0.00043785336310975254, "learning_rate": 9.84741529105126e-06, "loss": 0.0, "step": 44380 }, { "epoch": 9.641615986099044, "grad_norm": 0.00039675500011071563, "learning_rate": 9.838365189690125e-06, "loss": 0.0001, "step": 44390 }, { "epoch": 9.643788010425716, "grad_norm": 0.0011241411557421088, "learning_rate": 9.82931508832899e-06, "loss": 0.0001, "step": 44400 }, { "epoch": 9.64596003475239, "grad_norm": 0.00045028634485788643, "learning_rate": 9.820264986967855e-06, "loss": 0.0001, "step": 44410 }, { "epoch": 9.648132059079062, "grad_norm": 0.0003788200847338885, "learning_rate": 9.81121488560672e-06, "loss": 0.0, "step": 44420 }, { "epoch": 9.650304083405734, "grad_norm": 0.00032590579940006137, "learning_rate": 9.802164784245584e-06, "loss": 0.0001, "step": 44430 }, { "epoch": 9.652476107732406, "grad_norm": 0.0004667961038649082, "learning_rate": 9.79311468288445e-06, "loss": 0.0001, "step": 44440 }, { "epoch": 9.654648132059078, "grad_norm": 0.0004204972938168794, "learning_rate": 9.784064581523314e-06, "loss": 0.0, "step": 44450 }, { "epoch": 9.656820156385752, "grad_norm": 0.22058351337909698, "learning_rate": 9.775014480162178e-06, "loss": 0.0052, "step": 44460 }, { "epoch": 9.658992180712424, "grad_norm": 0.0003753335040528327, "learning_rate": 9.765964378801043e-06, "loss": 0.0001, "step": 44470 }, { "epoch": 9.661164205039096, "grad_norm": 0.0003022757591679692, "learning_rate": 9.756914277439907e-06, "loss": 0.0, "step": 44480 }, { "epoch": 9.663336229365768, "grad_norm": 0.0008022760157473385, "learning_rate": 9.747864176078771e-06, "loss": 0.0, "step": 44490 }, { "epoch": 9.665508253692442, "grad_norm": 0.0003668908029794693, "learning_rate": 9.738814074717638e-06, "loss": 0.0001, "step": 44500 }, { "epoch": 9.667680278019114, "grad_norm": 0.00030293557210825384, "learning_rate": 9.729763973356502e-06, "loss": 0.0, "step": 44510 }, { "epoch": 9.669852302345786, "grad_norm": 0.00043993687722831964, "learning_rate": 9.720713871995366e-06, "loss": 0.0, "step": 44520 }, { "epoch": 9.672024326672458, "grad_norm": 0.0004386714135762304, "learning_rate": 9.711663770634232e-06, "loss": 0.0059, "step": 44530 }, { "epoch": 9.67419635099913, "grad_norm": 0.001005807425826788, "learning_rate": 9.702613669273097e-06, "loss": 0.0, "step": 44540 }, { "epoch": 9.676368375325804, "grad_norm": 0.0004584961279761046, "learning_rate": 9.693563567911961e-06, "loss": 0.0045, "step": 44550 }, { "epoch": 9.678540399652476, "grad_norm": 0.0004154407943133265, "learning_rate": 9.684513466550827e-06, "loss": 0.0, "step": 44560 }, { "epoch": 9.680712423979148, "grad_norm": 0.15402913093566895, "learning_rate": 9.675463365189691e-06, "loss": 0.0038, "step": 44570 }, { "epoch": 9.68288444830582, "grad_norm": 0.0003599036717787385, "learning_rate": 9.666413263828556e-06, "loss": 0.0045, "step": 44580 }, { "epoch": 9.685056472632493, "grad_norm": 0.0002962095313705504, "learning_rate": 9.65736316246742e-06, "loss": 0.0, "step": 44590 }, { "epoch": 9.687228496959166, "grad_norm": 0.0003011043299920857, "learning_rate": 9.648313061106284e-06, "loss": 0.0004, "step": 44600 }, { "epoch": 9.689400521285839, "grad_norm": 0.0003057145804632455, "learning_rate": 9.639262959745149e-06, "loss": 0.0, "step": 44610 }, { "epoch": 9.69157254561251, "grad_norm": 0.004641965497285128, "learning_rate": 9.630212858384015e-06, "loss": 0.0131, "step": 44620 }, { "epoch": 9.693744569939183, "grad_norm": 0.001000256510451436, "learning_rate": 9.621162757022879e-06, "loss": 0.0001, "step": 44630 }, { "epoch": 9.695916594265857, "grad_norm": 0.0003455234400462359, "learning_rate": 9.612112655661743e-06, "loss": 0.0038, "step": 44640 }, { "epoch": 9.698088618592529, "grad_norm": 0.0002961833088193089, "learning_rate": 9.603062554300608e-06, "loss": 0.0011, "step": 44650 }, { "epoch": 9.7002606429192, "grad_norm": 0.0002967560722026974, "learning_rate": 9.594012452939474e-06, "loss": 0.0, "step": 44660 }, { "epoch": 9.702432667245873, "grad_norm": 0.0003102968621533364, "learning_rate": 9.584962351578338e-06, "loss": 0.0, "step": 44670 }, { "epoch": 9.704604691572545, "grad_norm": 0.00029592696228064597, "learning_rate": 9.575912250217204e-06, "loss": 0.0, "step": 44680 }, { "epoch": 9.706776715899219, "grad_norm": 0.00031772974762134254, "learning_rate": 9.566862148856068e-06, "loss": 0.0, "step": 44690 }, { "epoch": 9.70894874022589, "grad_norm": 0.00029936572536826134, "learning_rate": 9.557812047494933e-06, "loss": 0.0, "step": 44700 }, { "epoch": 9.711120764552563, "grad_norm": 0.00072198745328933, "learning_rate": 9.548761946133797e-06, "loss": 0.0001, "step": 44710 }, { "epoch": 9.713292788879235, "grad_norm": 0.00028860653401352465, "learning_rate": 9.539711844772661e-06, "loss": 0.005, "step": 44720 }, { "epoch": 9.715464813205909, "grad_norm": 15.439363479614258, "learning_rate": 9.530661743411527e-06, "loss": 0.0426, "step": 44730 }, { "epoch": 9.71763683753258, "grad_norm": 0.0002893265336751938, "learning_rate": 9.521611642050392e-06, "loss": 0.0035, "step": 44740 }, { "epoch": 9.719808861859253, "grad_norm": 0.0003055331180803478, "learning_rate": 9.512561540689256e-06, "loss": 0.0, "step": 44750 }, { "epoch": 9.721980886185925, "grad_norm": 0.00029819607152603567, "learning_rate": 9.50351143932812e-06, "loss": 0.0, "step": 44760 }, { "epoch": 9.724152910512597, "grad_norm": 0.00038984580896794796, "learning_rate": 9.494461337966985e-06, "loss": 0.0, "step": 44770 }, { "epoch": 9.72632493483927, "grad_norm": 0.0006327672745101154, "learning_rate": 9.48541123660585e-06, "loss": 0.0, "step": 44780 }, { "epoch": 9.728496959165943, "grad_norm": 0.0003006533079314977, "learning_rate": 9.476361135244715e-06, "loss": 0.0, "step": 44790 }, { "epoch": 9.730668983492615, "grad_norm": 0.006074481178075075, "learning_rate": 9.46731103388358e-06, "loss": 0.0001, "step": 44800 }, { "epoch": 9.732841007819287, "grad_norm": 0.00029355884180404246, "learning_rate": 9.458260932522444e-06, "loss": 0.0052, "step": 44810 }, { "epoch": 9.735013032145961, "grad_norm": 0.00029236814589239657, "learning_rate": 9.44921083116131e-06, "loss": 0.0, "step": 44820 }, { "epoch": 9.737185056472633, "grad_norm": 0.00391758419573307, "learning_rate": 9.440160729800174e-06, "loss": 0.004, "step": 44830 }, { "epoch": 9.739357080799305, "grad_norm": 0.0002860849490389228, "learning_rate": 9.43111062843904e-06, "loss": 0.0, "step": 44840 }, { "epoch": 9.741529105125977, "grad_norm": 0.00028976661269553006, "learning_rate": 9.422060527077905e-06, "loss": 0.0, "step": 44850 }, { "epoch": 9.74370112945265, "grad_norm": 0.0003024868783541024, "learning_rate": 9.413010425716769e-06, "loss": 0.0, "step": 44860 }, { "epoch": 9.745873153779323, "grad_norm": 0.0003289765154477209, "learning_rate": 9.403960324355633e-06, "loss": 0.0, "step": 44870 }, { "epoch": 9.748045178105995, "grad_norm": 0.00030651132692582905, "learning_rate": 9.394910222994498e-06, "loss": 0.0001, "step": 44880 }, { "epoch": 9.750217202432667, "grad_norm": 0.0009024749742820859, "learning_rate": 9.385860121633362e-06, "loss": 0.0, "step": 44890 }, { "epoch": 9.75238922675934, "grad_norm": 0.000285855756374076, "learning_rate": 9.376810020272228e-06, "loss": 0.0, "step": 44900 }, { "epoch": 9.754561251086011, "grad_norm": 0.0003339897666592151, "learning_rate": 9.367759918911092e-06, "loss": 0.0, "step": 44910 }, { "epoch": 9.756733275412685, "grad_norm": 0.00028623559046536684, "learning_rate": 9.358709817549957e-06, "loss": 0.0068, "step": 44920 }, { "epoch": 9.758905299739357, "grad_norm": 0.00029339815955609083, "learning_rate": 9.349659716188821e-06, "loss": 0.0, "step": 44930 }, { "epoch": 9.76107732406603, "grad_norm": 0.00038780056638643146, "learning_rate": 9.340609614827685e-06, "loss": 0.0, "step": 44940 }, { "epoch": 9.763249348392701, "grad_norm": 0.0005002043908461928, "learning_rate": 9.331559513466551e-06, "loss": 0.0055, "step": 44950 }, { "epoch": 9.765421372719375, "grad_norm": 0.00046502629993483424, "learning_rate": 9.322509412105416e-06, "loss": 0.0, "step": 44960 }, { "epoch": 9.767593397046047, "grad_norm": 0.0007907067192718387, "learning_rate": 9.313459310744282e-06, "loss": 0.0, "step": 44970 }, { "epoch": 9.76976542137272, "grad_norm": 0.00028519314946606755, "learning_rate": 9.304409209383146e-06, "loss": 0.0, "step": 44980 }, { "epoch": 9.771937445699391, "grad_norm": 0.001314941211603582, "learning_rate": 9.29535910802201e-06, "loss": 0.0, "step": 44990 }, { "epoch": 9.774109470026064, "grad_norm": 0.0015476990956813097, "learning_rate": 9.286309006660875e-06, "loss": 0.0, "step": 45000 }, { "epoch": 9.776281494352737, "grad_norm": 0.00029333692509680986, "learning_rate": 9.277258905299739e-06, "loss": 0.0, "step": 45010 }, { "epoch": 9.77845351867941, "grad_norm": 0.0005370720755308867, "learning_rate": 9.268208803938605e-06, "loss": 0.0, "step": 45020 }, { "epoch": 9.780625543006082, "grad_norm": 0.0005170804797671735, "learning_rate": 9.25915870257747e-06, "loss": 0.0, "step": 45030 }, { "epoch": 9.782797567332754, "grad_norm": 0.00028901023324579, "learning_rate": 9.250108601216334e-06, "loss": 0.0, "step": 45040 }, { "epoch": 9.784969591659426, "grad_norm": 0.0002907784946728498, "learning_rate": 9.241058499855198e-06, "loss": 0.0001, "step": 45050 }, { "epoch": 9.7871416159861, "grad_norm": 0.0002850813325494528, "learning_rate": 9.232008398494063e-06, "loss": 0.0, "step": 45060 }, { "epoch": 9.789313640312772, "grad_norm": 0.0004792496911250055, "learning_rate": 9.222958297132929e-06, "loss": 0.0052, "step": 45070 }, { "epoch": 9.791485664639444, "grad_norm": 0.0003039147413801402, "learning_rate": 9.213908195771793e-06, "loss": 0.0055, "step": 45080 }, { "epoch": 9.793657688966116, "grad_norm": 0.0002853998448699713, "learning_rate": 9.204858094410657e-06, "loss": 0.0, "step": 45090 }, { "epoch": 9.79582971329279, "grad_norm": 0.00031133025186136365, "learning_rate": 9.195807993049523e-06, "loss": 0.0043, "step": 45100 }, { "epoch": 9.798001737619462, "grad_norm": 0.0002923411375377327, "learning_rate": 9.186757891688388e-06, "loss": 0.0, "step": 45110 }, { "epoch": 9.800173761946134, "grad_norm": 0.0005165540496818721, "learning_rate": 9.177707790327252e-06, "loss": 0.0, "step": 45120 }, { "epoch": 9.802345786272806, "grad_norm": 0.000464085751445964, "learning_rate": 9.168657688966118e-06, "loss": 0.0, "step": 45130 }, { "epoch": 9.804517810599478, "grad_norm": 0.0002842270187102258, "learning_rate": 9.159607587604982e-06, "loss": 0.0, "step": 45140 }, { "epoch": 9.806689834926152, "grad_norm": 0.00037417063140310347, "learning_rate": 9.150557486243847e-06, "loss": 0.0, "step": 45150 }, { "epoch": 9.808861859252824, "grad_norm": 0.0002841727982740849, "learning_rate": 9.141507384882711e-06, "loss": 0.0001, "step": 45160 }, { "epoch": 9.811033883579496, "grad_norm": 0.00028294374351389706, "learning_rate": 9.132457283521575e-06, "loss": 0.0001, "step": 45170 }, { "epoch": 9.813205907906168, "grad_norm": 0.0004307189374230802, "learning_rate": 9.12340718216044e-06, "loss": 0.0048, "step": 45180 }, { "epoch": 9.815377932232842, "grad_norm": 0.0002827845746651292, "learning_rate": 9.114357080799306e-06, "loss": 0.0, "step": 45190 }, { "epoch": 9.817549956559514, "grad_norm": 0.00047424028161913157, "learning_rate": 9.10530697943817e-06, "loss": 0.0, "step": 45200 }, { "epoch": 9.819721980886186, "grad_norm": 0.00027991452952846885, "learning_rate": 9.096256878077034e-06, "loss": 0.0085, "step": 45210 }, { "epoch": 9.821894005212858, "grad_norm": 0.00029596214881166816, "learning_rate": 9.087206776715899e-06, "loss": 0.0, "step": 45220 }, { "epoch": 9.82406602953953, "grad_norm": 0.00028860315796919167, "learning_rate": 9.078156675354765e-06, "loss": 0.0, "step": 45230 }, { "epoch": 9.826238053866204, "grad_norm": 0.000579252140596509, "learning_rate": 9.069106573993629e-06, "loss": 0.0, "step": 45240 }, { "epoch": 9.828410078192876, "grad_norm": 0.0004747594066429883, "learning_rate": 9.060056472632495e-06, "loss": 0.0, "step": 45250 }, { "epoch": 9.830582102519548, "grad_norm": 0.0003230497823096812, "learning_rate": 9.05100637127136e-06, "loss": 0.0, "step": 45260 }, { "epoch": 9.83275412684622, "grad_norm": 0.0002852856705430895, "learning_rate": 9.041956269910224e-06, "loss": 0.0, "step": 45270 }, { "epoch": 9.834926151172894, "grad_norm": 0.0002863154513761401, "learning_rate": 9.032906168549088e-06, "loss": 0.0045, "step": 45280 }, { "epoch": 9.837098175499566, "grad_norm": 0.0002846615097951144, "learning_rate": 9.023856067187952e-06, "loss": 0.0, "step": 45290 }, { "epoch": 9.839270199826238, "grad_norm": 0.00028884748462587595, "learning_rate": 9.014805965826819e-06, "loss": 0.0, "step": 45300 }, { "epoch": 9.84144222415291, "grad_norm": 0.0002860878885257989, "learning_rate": 9.005755864465683e-06, "loss": 0.0, "step": 45310 }, { "epoch": 9.843614248479582, "grad_norm": 0.0004514564643613994, "learning_rate": 8.996705763104547e-06, "loss": 0.0, "step": 45320 }, { "epoch": 9.845786272806256, "grad_norm": 0.0002968948392663151, "learning_rate": 8.987655661743412e-06, "loss": 0.0, "step": 45330 }, { "epoch": 9.847958297132928, "grad_norm": 0.0002735159359872341, "learning_rate": 8.978605560382276e-06, "loss": 0.0, "step": 45340 }, { "epoch": 9.8501303214596, "grad_norm": 0.0004131652240175754, "learning_rate": 8.96955545902114e-06, "loss": 0.0, "step": 45350 }, { "epoch": 9.852302345786272, "grad_norm": 0.0002853725745808333, "learning_rate": 8.960505357660006e-06, "loss": 0.0, "step": 45360 }, { "epoch": 9.854474370112944, "grad_norm": 0.001196343800984323, "learning_rate": 8.95145525629887e-06, "loss": 0.0, "step": 45370 }, { "epoch": 9.856646394439618, "grad_norm": 0.000468127807835117, "learning_rate": 8.942405154937737e-06, "loss": 0.0, "step": 45380 }, { "epoch": 9.85881841876629, "grad_norm": 0.0002808228600770235, "learning_rate": 8.933355053576601e-06, "loss": 0.0, "step": 45390 }, { "epoch": 9.860990443092962, "grad_norm": 0.00028429063968360424, "learning_rate": 8.924304952215465e-06, "loss": 0.0, "step": 45400 }, { "epoch": 9.863162467419635, "grad_norm": 0.0003117309242952615, "learning_rate": 8.91525485085433e-06, "loss": 0.0, "step": 45410 }, { "epoch": 9.865334491746308, "grad_norm": 0.00027799929375760257, "learning_rate": 8.906204749493196e-06, "loss": 0.0, "step": 45420 }, { "epoch": 9.86750651607298, "grad_norm": 0.0002811710874084383, "learning_rate": 8.89715464813206e-06, "loss": 0.0, "step": 45430 }, { "epoch": 9.869678540399653, "grad_norm": 0.0002788385027088225, "learning_rate": 8.888104546770924e-06, "loss": 0.0, "step": 45440 }, { "epoch": 9.871850564726325, "grad_norm": 0.0002747518883552402, "learning_rate": 8.879054445409789e-06, "loss": 0.0, "step": 45450 }, { "epoch": 9.874022589052997, "grad_norm": 0.0004052985750604421, "learning_rate": 8.870004344048653e-06, "loss": 0.0, "step": 45460 }, { "epoch": 9.87619461337967, "grad_norm": 0.00027624680660665035, "learning_rate": 8.860954242687519e-06, "loss": 0.0, "step": 45470 }, { "epoch": 9.878366637706343, "grad_norm": 0.00027512721135281026, "learning_rate": 8.851904141326383e-06, "loss": 0.0054, "step": 45480 }, { "epoch": 9.880538662033015, "grad_norm": 0.000608505099080503, "learning_rate": 8.842854039965248e-06, "loss": 0.0, "step": 45490 }, { "epoch": 9.882710686359687, "grad_norm": 0.00027142054750584066, "learning_rate": 8.833803938604112e-06, "loss": 0.0048, "step": 45500 }, { "epoch": 9.884882710686359, "grad_norm": 0.00027802452677860856, "learning_rate": 8.824753837242976e-06, "loss": 0.0, "step": 45510 }, { "epoch": 9.887054735013033, "grad_norm": 0.00029872122104279697, "learning_rate": 8.815703735881842e-06, "loss": 0.0, "step": 45520 }, { "epoch": 9.889226759339705, "grad_norm": 0.000274182966677472, "learning_rate": 8.806653634520708e-06, "loss": 0.0, "step": 45530 }, { "epoch": 9.891398783666377, "grad_norm": 0.00028276850935071707, "learning_rate": 8.797603533159573e-06, "loss": 0.0, "step": 45540 }, { "epoch": 9.893570807993049, "grad_norm": 0.00027952558593824506, "learning_rate": 8.788553431798437e-06, "loss": 0.0, "step": 45550 }, { "epoch": 9.895742832319723, "grad_norm": 0.0002717770403251052, "learning_rate": 8.779503330437301e-06, "loss": 0.0, "step": 45560 }, { "epoch": 9.897914856646395, "grad_norm": 0.00030153506668284535, "learning_rate": 8.770453229076166e-06, "loss": 0.0039, "step": 45570 }, { "epoch": 9.900086880973067, "grad_norm": 0.00028142359224148095, "learning_rate": 8.76140312771503e-06, "loss": 0.0, "step": 45580 }, { "epoch": 9.902258905299739, "grad_norm": 0.0002846510033123195, "learning_rate": 8.752353026353896e-06, "loss": 0.0, "step": 45590 }, { "epoch": 9.904430929626411, "grad_norm": 0.00043699092930182815, "learning_rate": 8.74330292499276e-06, "loss": 0.0, "step": 45600 }, { "epoch": 9.906602953953085, "grad_norm": 0.00026862454251386225, "learning_rate": 8.734252823631625e-06, "loss": 0.0, "step": 45610 }, { "epoch": 9.908774978279757, "grad_norm": 0.0002761590003501624, "learning_rate": 8.72520272227049e-06, "loss": 0.0, "step": 45620 }, { "epoch": 9.910947002606429, "grad_norm": 0.0004086109984200448, "learning_rate": 8.716152620909354e-06, "loss": 0.0, "step": 45630 }, { "epoch": 9.913119026933101, "grad_norm": 0.00026836665347218513, "learning_rate": 8.70710251954822e-06, "loss": 0.0, "step": 45640 }, { "epoch": 9.915291051259775, "grad_norm": 0.00026761431945487857, "learning_rate": 8.698052418187084e-06, "loss": 0.0, "step": 45650 }, { "epoch": 9.917463075586447, "grad_norm": 0.00026968028396368027, "learning_rate": 8.689002316825948e-06, "loss": 0.0, "step": 45660 }, { "epoch": 9.919635099913119, "grad_norm": 0.0003493023104965687, "learning_rate": 8.679952215464814e-06, "loss": 0.0041, "step": 45670 }, { "epoch": 9.921807124239791, "grad_norm": 0.0002837933134287596, "learning_rate": 8.670902114103679e-06, "loss": 0.0, "step": 45680 }, { "epoch": 9.923979148566463, "grad_norm": 0.00027246540412306786, "learning_rate": 8.661852012742543e-06, "loss": 0.0, "step": 45690 }, { "epoch": 9.926151172893137, "grad_norm": 0.00026831854484044015, "learning_rate": 8.652801911381409e-06, "loss": 0.0, "step": 45700 }, { "epoch": 9.92832319721981, "grad_norm": 0.00026500673266127706, "learning_rate": 8.643751810020273e-06, "loss": 0.0, "step": 45710 }, { "epoch": 9.930495221546481, "grad_norm": 0.0002658453886397183, "learning_rate": 8.634701708659138e-06, "loss": 0.0039, "step": 45720 }, { "epoch": 9.932667245873153, "grad_norm": 0.000474416243378073, "learning_rate": 8.625651607298002e-06, "loss": 0.0056, "step": 45730 }, { "epoch": 9.934839270199827, "grad_norm": 0.0003779761027544737, "learning_rate": 8.616601505936866e-06, "loss": 0.0, "step": 45740 }, { "epoch": 9.9370112945265, "grad_norm": 0.0002920727420132607, "learning_rate": 8.60755140457573e-06, "loss": 0.0, "step": 45750 }, { "epoch": 9.939183318853171, "grad_norm": 0.00026948191225528717, "learning_rate": 8.598501303214597e-06, "loss": 0.0, "step": 45760 }, { "epoch": 9.941355343179843, "grad_norm": 0.0003025501500815153, "learning_rate": 8.589451201853461e-06, "loss": 0.0, "step": 45770 }, { "epoch": 9.943527367506515, "grad_norm": 0.00027173449052497745, "learning_rate": 8.580401100492325e-06, "loss": 0.0, "step": 45780 }, { "epoch": 9.94569939183319, "grad_norm": 0.00026526564033702016, "learning_rate": 8.57135099913119e-06, "loss": 0.0, "step": 45790 }, { "epoch": 9.947871416159861, "grad_norm": 0.00026329734828323126, "learning_rate": 8.562300897770056e-06, "loss": 0.0, "step": 45800 }, { "epoch": 9.950043440486533, "grad_norm": 0.0003687392745632678, "learning_rate": 8.55325079640892e-06, "loss": 0.0, "step": 45810 }, { "epoch": 9.952215464813206, "grad_norm": 0.0006631419528275728, "learning_rate": 8.544200695047786e-06, "loss": 0.0, "step": 45820 }, { "epoch": 9.954387489139878, "grad_norm": 0.00026899162912741303, "learning_rate": 8.53515059368665e-06, "loss": 0.0, "step": 45830 }, { "epoch": 9.956559513466551, "grad_norm": 0.0002637408615555614, "learning_rate": 8.526100492325515e-06, "loss": 0.0, "step": 45840 }, { "epoch": 9.958731537793224, "grad_norm": 0.000278302701190114, "learning_rate": 8.51705039096438e-06, "loss": 0.0, "step": 45850 }, { "epoch": 9.960903562119896, "grad_norm": 0.00026116601657122374, "learning_rate": 8.508000289603244e-06, "loss": 0.0, "step": 45860 }, { "epoch": 9.963075586446568, "grad_norm": 0.0002752375148702413, "learning_rate": 8.49895018824211e-06, "loss": 0.0, "step": 45870 }, { "epoch": 9.96524761077324, "grad_norm": 0.00026467558927834034, "learning_rate": 8.489900086880974e-06, "loss": 0.0, "step": 45880 }, { "epoch": 9.967419635099914, "grad_norm": 0.0004597961960826069, "learning_rate": 8.480849985519838e-06, "loss": 0.0, "step": 45890 }, { "epoch": 9.969591659426586, "grad_norm": 0.00026340316981077194, "learning_rate": 8.471799884158703e-06, "loss": 0.0, "step": 45900 }, { "epoch": 9.971763683753258, "grad_norm": 0.0002597050915937871, "learning_rate": 8.462749782797567e-06, "loss": 0.0, "step": 45910 }, { "epoch": 9.97393570807993, "grad_norm": 0.00026734822313301265, "learning_rate": 8.453699681436431e-06, "loss": 0.0, "step": 45920 }, { "epoch": 9.976107732406604, "grad_norm": 0.0002649006200954318, "learning_rate": 8.444649580075297e-06, "loss": 0.0, "step": 45930 }, { "epoch": 9.978279756733276, "grad_norm": 0.00026825052918866277, "learning_rate": 8.435599478714162e-06, "loss": 0.0, "step": 45940 }, { "epoch": 9.980451781059948, "grad_norm": 0.0002630847448017448, "learning_rate": 8.426549377353028e-06, "loss": 0.0056, "step": 45950 }, { "epoch": 9.98262380538662, "grad_norm": 0.0011709785321727395, "learning_rate": 8.417499275991892e-06, "loss": 0.0, "step": 45960 }, { "epoch": 9.984795829713292, "grad_norm": 0.00026459337095730007, "learning_rate": 8.408449174630756e-06, "loss": 0.0, "step": 45970 }, { "epoch": 9.986967854039966, "grad_norm": 0.0002779015921987593, "learning_rate": 8.39939907326962e-06, "loss": 0.0, "step": 45980 }, { "epoch": 9.989139878366638, "grad_norm": 0.00027847522869706154, "learning_rate": 8.390348971908487e-06, "loss": 0.0, "step": 45990 }, { "epoch": 9.99131190269331, "grad_norm": 0.0005873033660463989, "learning_rate": 8.381298870547351e-06, "loss": 0.0039, "step": 46000 }, { "epoch": 9.993483927019982, "grad_norm": 0.00026374112349003553, "learning_rate": 8.372248769186215e-06, "loss": 0.0, "step": 46010 }, { "epoch": 9.995655951346656, "grad_norm": 0.00026576846721582115, "learning_rate": 8.36319866782508e-06, "loss": 0.0, "step": 46020 }, { "epoch": 9.997827975673328, "grad_norm": 0.0003927880898118019, "learning_rate": 8.354148566463944e-06, "loss": 0.0, "step": 46030 }, { "epoch": 10.0, "grad_norm": 0.00025913305580616, "learning_rate": 8.34509846510281e-06, "loss": 0.0, "step": 46040 }, { "epoch": 10.0, "eval_f1": 0.5868725868725868, "eval_loss": 0.08650576323270798, "eval_runtime": 83.6602, "eval_samples_per_second": 119.232, "eval_steps_per_second": 7.459, "step": 46040 }, { "epoch": 10.002172024326672, "grad_norm": 0.0003737666120287031, "learning_rate": 8.336048363741674e-06, "loss": 0.0, "step": 46050 }, { "epoch": 10.004344048653344, "grad_norm": 0.000261695240624249, "learning_rate": 8.326998262380539e-06, "loss": 0.0038, "step": 46060 }, { "epoch": 10.006516072980018, "grad_norm": 0.0002736767055466771, "learning_rate": 8.317948161019403e-06, "loss": 0.0, "step": 46070 }, { "epoch": 10.00868809730669, "grad_norm": 0.00025755647220648825, "learning_rate": 8.308898059658267e-06, "loss": 0.0, "step": 46080 }, { "epoch": 10.010860121633362, "grad_norm": 0.0005032480112276971, "learning_rate": 8.299847958297133e-06, "loss": 0.0, "step": 46090 }, { "epoch": 10.013032145960034, "grad_norm": 0.0003156476595904678, "learning_rate": 8.290797856936e-06, "loss": 0.0, "step": 46100 }, { "epoch": 10.015204170286708, "grad_norm": 0.0002580749278422445, "learning_rate": 8.281747755574864e-06, "loss": 0.0056, "step": 46110 }, { "epoch": 10.01737619461338, "grad_norm": 0.0004497791233006865, "learning_rate": 8.272697654213728e-06, "loss": 0.0, "step": 46120 }, { "epoch": 10.019548218940052, "grad_norm": 0.0002614956465549767, "learning_rate": 8.263647552852593e-06, "loss": 0.0, "step": 46130 }, { "epoch": 10.021720243266724, "grad_norm": 0.00025961664505302906, "learning_rate": 8.254597451491457e-06, "loss": 0.0, "step": 46140 }, { "epoch": 10.023892267593396, "grad_norm": 0.0003735600912477821, "learning_rate": 8.245547350130321e-06, "loss": 0.0, "step": 46150 }, { "epoch": 10.02606429192007, "grad_norm": 0.0002574862737674266, "learning_rate": 8.236497248769187e-06, "loss": 0.0, "step": 46160 }, { "epoch": 10.028236316246742, "grad_norm": 0.0002677075390238315, "learning_rate": 8.227447147408052e-06, "loss": 0.0, "step": 46170 }, { "epoch": 10.030408340573414, "grad_norm": 0.00025923867360688746, "learning_rate": 8.218397046046916e-06, "loss": 0.0, "step": 46180 }, { "epoch": 10.032580364900086, "grad_norm": 0.0003291449975222349, "learning_rate": 8.20934694468578e-06, "loss": 0.0, "step": 46190 }, { "epoch": 10.034752389226758, "grad_norm": 0.0002590891672298312, "learning_rate": 8.200296843324645e-06, "loss": 0.0, "step": 46200 }, { "epoch": 10.036924413553432, "grad_norm": 0.000344390602549538, "learning_rate": 8.191246741963509e-06, "loss": 0.0, "step": 46210 }, { "epoch": 10.039096437880104, "grad_norm": 0.0002652654657140374, "learning_rate": 8.182196640602375e-06, "loss": 0.0, "step": 46220 }, { "epoch": 10.041268462206776, "grad_norm": 0.0002611863019410521, "learning_rate": 8.17314653924124e-06, "loss": 0.0, "step": 46230 }, { "epoch": 10.043440486533449, "grad_norm": 0.00025722087593749166, "learning_rate": 8.164096437880105e-06, "loss": 0.0, "step": 46240 }, { "epoch": 10.045612510860122, "grad_norm": 0.00026090044411830604, "learning_rate": 8.15504633651897e-06, "loss": 0.0, "step": 46250 }, { "epoch": 10.047784535186794, "grad_norm": 0.0002696911687962711, "learning_rate": 8.145996235157834e-06, "loss": 0.0, "step": 46260 }, { "epoch": 10.049956559513467, "grad_norm": 0.0002560535504017025, "learning_rate": 8.1369461337967e-06, "loss": 0.0, "step": 46270 }, { "epoch": 10.052128583840139, "grad_norm": 0.0002532984653953463, "learning_rate": 8.127896032435564e-06, "loss": 0.0, "step": 46280 }, { "epoch": 10.05430060816681, "grad_norm": 0.0002560800057835877, "learning_rate": 8.118845931074429e-06, "loss": 0.0, "step": 46290 }, { "epoch": 10.056472632493485, "grad_norm": 0.00025648128939792514, "learning_rate": 8.109795829713293e-06, "loss": 0.0, "step": 46300 }, { "epoch": 10.058644656820157, "grad_norm": 0.00025430944515392184, "learning_rate": 8.100745728352157e-06, "loss": 0.0037, "step": 46310 }, { "epoch": 10.060816681146829, "grad_norm": 0.00025855813873931766, "learning_rate": 8.091695626991022e-06, "loss": 0.0, "step": 46320 }, { "epoch": 10.0629887054735, "grad_norm": 0.00025462303892709315, "learning_rate": 8.082645525629888e-06, "loss": 0.0039, "step": 46330 }, { "epoch": 10.065160729800175, "grad_norm": 0.000251170014962554, "learning_rate": 8.073595424268752e-06, "loss": 0.0, "step": 46340 }, { "epoch": 10.067332754126847, "grad_norm": 0.00026278331642970443, "learning_rate": 8.064545322907616e-06, "loss": 0.0034, "step": 46350 }, { "epoch": 10.069504778453519, "grad_norm": 0.000444703153334558, "learning_rate": 8.05549522154648e-06, "loss": 0.0062, "step": 46360 }, { "epoch": 10.07167680278019, "grad_norm": 0.0002562769514042884, "learning_rate": 8.046445120185347e-06, "loss": 0.0, "step": 46370 }, { "epoch": 10.073848827106863, "grad_norm": 0.0030441167764365673, "learning_rate": 8.037395018824211e-06, "loss": 0.0, "step": 46380 }, { "epoch": 10.076020851433537, "grad_norm": 0.00029628712218254805, "learning_rate": 8.028344917463077e-06, "loss": 0.0, "step": 46390 }, { "epoch": 10.078192875760209, "grad_norm": 0.000754713371861726, "learning_rate": 8.019294816101942e-06, "loss": 0.0, "step": 46400 }, { "epoch": 10.080364900086881, "grad_norm": 0.0002676001749932766, "learning_rate": 8.010244714740806e-06, "loss": 0.0001, "step": 46410 }, { "epoch": 10.082536924413553, "grad_norm": 0.000641426129732281, "learning_rate": 8.00119461337967e-06, "loss": 0.0, "step": 46420 }, { "epoch": 10.084708948740225, "grad_norm": 0.00025804596953094006, "learning_rate": 7.992144512018535e-06, "loss": 0.0, "step": 46430 }, { "epoch": 10.086880973066899, "grad_norm": 0.00027530855732038617, "learning_rate": 7.9830944106574e-06, "loss": 0.0, "step": 46440 }, { "epoch": 10.089052997393571, "grad_norm": 0.0004725789185613394, "learning_rate": 7.974044309296265e-06, "loss": 0.0001, "step": 46450 }, { "epoch": 10.091225021720243, "grad_norm": 0.000585050496738404, "learning_rate": 7.96499420793513e-06, "loss": 0.0, "step": 46460 }, { "epoch": 10.093397046046915, "grad_norm": 0.0002689628745429218, "learning_rate": 7.955944106573994e-06, "loss": 0.0, "step": 46470 }, { "epoch": 10.095569070373589, "grad_norm": 0.006118168588727713, "learning_rate": 7.946894005212858e-06, "loss": 0.0035, "step": 46480 }, { "epoch": 10.097741094700261, "grad_norm": 0.00026504171546548605, "learning_rate": 7.937843903851722e-06, "loss": 0.0, "step": 46490 }, { "epoch": 10.099913119026933, "grad_norm": 0.00029210373759269714, "learning_rate": 7.928793802490588e-06, "loss": 0.0, "step": 46500 }, { "epoch": 10.102085143353605, "grad_norm": 0.00025521591305732727, "learning_rate": 7.919743701129453e-06, "loss": 0.0, "step": 46510 }, { "epoch": 10.104257167680277, "grad_norm": 0.0004499217902775854, "learning_rate": 7.910693599768319e-06, "loss": 0.0, "step": 46520 }, { "epoch": 10.106429192006951, "grad_norm": 0.00027370412135496736, "learning_rate": 7.901643498407183e-06, "loss": 0.0, "step": 46530 }, { "epoch": 10.108601216333623, "grad_norm": 0.0002593390236143023, "learning_rate": 7.892593397046047e-06, "loss": 0.0, "step": 46540 }, { "epoch": 10.110773240660295, "grad_norm": 0.00035731252864934504, "learning_rate": 7.883543295684912e-06, "loss": 0.0, "step": 46550 }, { "epoch": 10.112945264986967, "grad_norm": 0.0002545152383390814, "learning_rate": 7.874493194323778e-06, "loss": 0.003, "step": 46560 }, { "epoch": 10.115117289313641, "grad_norm": 0.0002480170805938542, "learning_rate": 7.865443092962642e-06, "loss": 0.0, "step": 46570 }, { "epoch": 10.117289313640313, "grad_norm": 0.0002511973725631833, "learning_rate": 7.856392991601506e-06, "loss": 0.0, "step": 46580 }, { "epoch": 10.119461337966985, "grad_norm": 0.0002490723563823849, "learning_rate": 7.84734289024037e-06, "loss": 0.0006, "step": 46590 }, { "epoch": 10.121633362293657, "grad_norm": 0.000278045772574842, "learning_rate": 7.838292788879235e-06, "loss": 0.0, "step": 46600 }, { "epoch": 10.12380538662033, "grad_norm": 0.00024990536621771753, "learning_rate": 7.8292426875181e-06, "loss": 0.0, "step": 46610 }, { "epoch": 10.125977410947003, "grad_norm": 0.0017814398743212223, "learning_rate": 7.820192586156965e-06, "loss": 0.0, "step": 46620 }, { "epoch": 10.128149435273675, "grad_norm": 0.00024815337383188307, "learning_rate": 7.81114248479583e-06, "loss": 0.0, "step": 46630 }, { "epoch": 10.130321459600347, "grad_norm": 0.0002462256234139204, "learning_rate": 7.802092383434694e-06, "loss": 0.0, "step": 46640 }, { "epoch": 10.13249348392702, "grad_norm": 0.0002477782254572958, "learning_rate": 7.79304228207356e-06, "loss": 0.0, "step": 46650 }, { "epoch": 10.134665508253692, "grad_norm": 0.00024287530686706305, "learning_rate": 7.783992180712425e-06, "loss": 0.0, "step": 46660 }, { "epoch": 10.136837532580365, "grad_norm": 0.00024437796673737466, "learning_rate": 7.77494207935129e-06, "loss": 0.0, "step": 46670 }, { "epoch": 10.139009556907038, "grad_norm": 0.0002460273972246796, "learning_rate": 7.765891977990155e-06, "loss": 0.0035, "step": 46680 }, { "epoch": 10.14118158123371, "grad_norm": 0.00024691823637112975, "learning_rate": 7.75684187662902e-06, "loss": 0.0061, "step": 46690 }, { "epoch": 10.143353605560382, "grad_norm": 0.016955753788352013, "learning_rate": 7.747791775267884e-06, "loss": 0.0038, "step": 46700 }, { "epoch": 10.145525629887056, "grad_norm": 0.00024686206597834826, "learning_rate": 7.738741673906748e-06, "loss": 0.0, "step": 46710 }, { "epoch": 10.147697654213728, "grad_norm": 0.00024295347975566983, "learning_rate": 7.729691572545612e-06, "loss": 0.0, "step": 46720 }, { "epoch": 10.1498696785404, "grad_norm": 0.00026942809927277267, "learning_rate": 7.720641471184478e-06, "loss": 0.0, "step": 46730 }, { "epoch": 10.152041702867072, "grad_norm": 0.00024324421247001737, "learning_rate": 7.711591369823343e-06, "loss": 0.0, "step": 46740 }, { "epoch": 10.154213727193744, "grad_norm": 0.00023985575535334647, "learning_rate": 7.702541268462207e-06, "loss": 0.0033, "step": 46750 }, { "epoch": 10.156385751520418, "grad_norm": 0.0002428061852697283, "learning_rate": 7.693491167101071e-06, "loss": 0.0, "step": 46760 }, { "epoch": 10.15855777584709, "grad_norm": 0.00024216584279201925, "learning_rate": 7.684441065739936e-06, "loss": 0.0, "step": 46770 }, { "epoch": 10.160729800173762, "grad_norm": 0.00024820497492328286, "learning_rate": 7.6753909643788e-06, "loss": 0.0071, "step": 46780 }, { "epoch": 10.162901824500434, "grad_norm": 0.0002512444625608623, "learning_rate": 7.666340863017666e-06, "loss": 0.0, "step": 46790 }, { "epoch": 10.165073848827106, "grad_norm": 0.0002537766413297504, "learning_rate": 7.65729076165653e-06, "loss": 0.0, "step": 46800 }, { "epoch": 10.16724587315378, "grad_norm": 0.00023964142019394785, "learning_rate": 7.648240660295396e-06, "loss": 0.0, "step": 46810 }, { "epoch": 10.169417897480452, "grad_norm": 0.00023952442279551178, "learning_rate": 7.63919055893426e-06, "loss": 0.0031, "step": 46820 }, { "epoch": 10.171589921807124, "grad_norm": 0.00023879566288087517, "learning_rate": 7.630140457573125e-06, "loss": 0.0, "step": 46830 }, { "epoch": 10.173761946133796, "grad_norm": 0.000258896267041564, "learning_rate": 7.621090356211989e-06, "loss": 0.0, "step": 46840 }, { "epoch": 10.17593397046047, "grad_norm": 0.00023973002680577338, "learning_rate": 7.6120402548508554e-06, "loss": 0.0, "step": 46850 }, { "epoch": 10.178105994787142, "grad_norm": 0.0002393622271483764, "learning_rate": 7.60299015348972e-06, "loss": 0.0, "step": 46860 }, { "epoch": 10.180278019113814, "grad_norm": 0.0002395124320173636, "learning_rate": 7.593940052128584e-06, "loss": 0.0, "step": 46870 }, { "epoch": 10.182450043440486, "grad_norm": 0.00024033243244048208, "learning_rate": 7.5848899507674485e-06, "loss": 0.0, "step": 46880 }, { "epoch": 10.184622067767158, "grad_norm": 0.001191705116070807, "learning_rate": 7.575839849406313e-06, "loss": 0.0062, "step": 46890 }, { "epoch": 10.186794092093832, "grad_norm": 0.0002385633415542543, "learning_rate": 7.566789748045179e-06, "loss": 0.0, "step": 46900 }, { "epoch": 10.188966116420504, "grad_norm": 0.0002466611040290445, "learning_rate": 7.557739646684044e-06, "loss": 0.0, "step": 46910 }, { "epoch": 10.191138140747176, "grad_norm": 0.00024101352028083056, "learning_rate": 7.548689545322908e-06, "loss": 0.0, "step": 46920 }, { "epoch": 10.193310165073848, "grad_norm": 0.00024026913160923868, "learning_rate": 7.539639443961773e-06, "loss": 0.0, "step": 46930 }, { "epoch": 10.195482189400522, "grad_norm": 0.00024203627253882587, "learning_rate": 7.530589342600637e-06, "loss": 0.0, "step": 46940 }, { "epoch": 10.197654213727194, "grad_norm": 0.00024288229178637266, "learning_rate": 7.521539241239501e-06, "loss": 0.0, "step": 46950 }, { "epoch": 10.199826238053866, "grad_norm": 0.00024093518732115626, "learning_rate": 7.512489139878367e-06, "loss": 0.0, "step": 46960 }, { "epoch": 10.201998262380538, "grad_norm": 0.00023962014529388398, "learning_rate": 7.503439038517232e-06, "loss": 0.0035, "step": 46970 }, { "epoch": 10.20417028670721, "grad_norm": 0.0004495520843192935, "learning_rate": 7.494388937156097e-06, "loss": 0.006, "step": 46980 }, { "epoch": 10.206342311033884, "grad_norm": 0.00027092520031146705, "learning_rate": 7.485338835794961e-06, "loss": 0.0, "step": 46990 }, { "epoch": 10.208514335360556, "grad_norm": 0.00037834502290934324, "learning_rate": 7.476288734433826e-06, "loss": 0.0, "step": 47000 }, { "epoch": 10.210686359687228, "grad_norm": 0.00024022634897846729, "learning_rate": 7.46723863307269e-06, "loss": 0.0, "step": 47010 }, { "epoch": 10.2128583840139, "grad_norm": 0.00024379647220484912, "learning_rate": 7.458188531711556e-06, "loss": 0.0, "step": 47020 }, { "epoch": 10.215030408340574, "grad_norm": 0.00023640983272343874, "learning_rate": 7.44913843035042e-06, "loss": 0.0, "step": 47030 }, { "epoch": 10.217202432667246, "grad_norm": 0.00023816687462385744, "learning_rate": 7.440088328989285e-06, "loss": 0.0059, "step": 47040 }, { "epoch": 10.219374456993918, "grad_norm": 0.00024333465262316167, "learning_rate": 7.43103822762815e-06, "loss": 0.0, "step": 47050 }, { "epoch": 10.22154648132059, "grad_norm": 0.00023967133893165737, "learning_rate": 7.421988126267014e-06, "loss": 0.0, "step": 47060 }, { "epoch": 10.223718505647263, "grad_norm": 0.0003435276448726654, "learning_rate": 7.41293802490588e-06, "loss": 0.0, "step": 47070 }, { "epoch": 10.225890529973936, "grad_norm": 0.0002411666646366939, "learning_rate": 7.4038879235447446e-06, "loss": 0.0, "step": 47080 }, { "epoch": 10.228062554300609, "grad_norm": 0.0003091098624281585, "learning_rate": 7.394837822183609e-06, "loss": 0.0, "step": 47090 }, { "epoch": 10.23023457862728, "grad_norm": 0.00024104368640109897, "learning_rate": 7.385787720822473e-06, "loss": 0.0053, "step": 47100 }, { "epoch": 10.232406602953953, "grad_norm": 0.00024548693909309804, "learning_rate": 7.376737619461338e-06, "loss": 0.0, "step": 47110 }, { "epoch": 10.234578627280625, "grad_norm": 0.00024295470211654902, "learning_rate": 7.367687518100203e-06, "loss": 0.0, "step": 47120 }, { "epoch": 10.236750651607299, "grad_norm": 0.0002608944196254015, "learning_rate": 7.358637416739069e-06, "loss": 0.0, "step": 47130 }, { "epoch": 10.23892267593397, "grad_norm": 0.00024352218315470964, "learning_rate": 7.349587315377933e-06, "loss": 0.0, "step": 47140 }, { "epoch": 10.241094700260643, "grad_norm": 0.0002514914667699486, "learning_rate": 7.3405372140167975e-06, "loss": 0.0, "step": 47150 }, { "epoch": 10.243266724587315, "grad_norm": 0.00023742808843962848, "learning_rate": 7.331487112655662e-06, "loss": 0.0, "step": 47160 }, { "epoch": 10.245438748913989, "grad_norm": 0.0003619439958129078, "learning_rate": 7.322437011294526e-06, "loss": 0.0, "step": 47170 }, { "epoch": 10.24761077324066, "grad_norm": 0.00024090451188385487, "learning_rate": 7.313386909933391e-06, "loss": 0.0, "step": 47180 }, { "epoch": 10.249782797567333, "grad_norm": 0.0002406891289865598, "learning_rate": 7.3043368085722565e-06, "loss": 0.0, "step": 47190 }, { "epoch": 10.251954821894005, "grad_norm": 0.00024321397359017283, "learning_rate": 7.295286707211122e-06, "loss": 0.0, "step": 47200 }, { "epoch": 10.254126846220677, "grad_norm": 0.00024024557205848396, "learning_rate": 7.286236605849986e-06, "loss": 0.0, "step": 47210 }, { "epoch": 10.25629887054735, "grad_norm": 0.00023844727547839284, "learning_rate": 7.27718650448885e-06, "loss": 0.0, "step": 47220 }, { "epoch": 10.258470894874023, "grad_norm": 0.0002457252121530473, "learning_rate": 7.268136403127715e-06, "loss": 0.0, "step": 47230 }, { "epoch": 10.260642919200695, "grad_norm": 0.0002406234125373885, "learning_rate": 7.259086301766579e-06, "loss": 0.0037, "step": 47240 }, { "epoch": 10.262814943527367, "grad_norm": 0.0005037950468249619, "learning_rate": 7.250036200405445e-06, "loss": 0.0, "step": 47250 }, { "epoch": 10.264986967854039, "grad_norm": 0.00024237303296104074, "learning_rate": 7.24098609904431e-06, "loss": 0.0, "step": 47260 }, { "epoch": 10.267158992180713, "grad_norm": 0.00032777455635368824, "learning_rate": 7.231935997683175e-06, "loss": 0.0, "step": 47270 }, { "epoch": 10.269331016507385, "grad_norm": 0.00023873074678704143, "learning_rate": 7.222885896322039e-06, "loss": 0.0, "step": 47280 }, { "epoch": 10.271503040834057, "grad_norm": 0.00024848480825312436, "learning_rate": 7.213835794960903e-06, "loss": 0.0, "step": 47290 }, { "epoch": 10.27367506516073, "grad_norm": 0.0002387873100815341, "learning_rate": 7.204785693599769e-06, "loss": 0.0, "step": 47300 }, { "epoch": 10.275847089487403, "grad_norm": 0.0002637762518133968, "learning_rate": 7.195735592238634e-06, "loss": 0.0, "step": 47310 }, { "epoch": 10.278019113814075, "grad_norm": 0.0002451884211041033, "learning_rate": 7.186685490877498e-06, "loss": 0.0, "step": 47320 }, { "epoch": 10.280191138140747, "grad_norm": 0.00024033308727666736, "learning_rate": 7.177635389516363e-06, "loss": 0.0, "step": 47330 }, { "epoch": 10.28236316246742, "grad_norm": 0.0003520794562064111, "learning_rate": 7.1685852881552275e-06, "loss": 0.0, "step": 47340 }, { "epoch": 10.284535186794091, "grad_norm": 0.0002365164109505713, "learning_rate": 7.159535186794092e-06, "loss": 0.0, "step": 47350 }, { "epoch": 10.286707211120765, "grad_norm": 0.0002398495125817135, "learning_rate": 7.150485085432958e-06, "loss": 0.0052, "step": 47360 }, { "epoch": 10.288879235447437, "grad_norm": 0.0003276202769484371, "learning_rate": 7.141434984071822e-06, "loss": 0.0, "step": 47370 }, { "epoch": 10.29105125977411, "grad_norm": 0.0002395589544903487, "learning_rate": 7.132384882710687e-06, "loss": 0.0, "step": 47380 }, { "epoch": 10.293223284100781, "grad_norm": 0.00023600317945238203, "learning_rate": 7.123334781349551e-06, "loss": 0.0, "step": 47390 }, { "epoch": 10.295395308427455, "grad_norm": 0.00023694564879406244, "learning_rate": 7.114284679988416e-06, "loss": 0.0038, "step": 47400 }, { "epoch": 10.297567332754127, "grad_norm": 0.00024834126816131175, "learning_rate": 7.1052345786272804e-06, "loss": 0.005, "step": 47410 }, { "epoch": 10.2997393570808, "grad_norm": 0.00023720713215880096, "learning_rate": 7.0961844772661465e-06, "loss": 0.0, "step": 47420 }, { "epoch": 10.301911381407471, "grad_norm": 0.00023603474255651236, "learning_rate": 7.087134375905011e-06, "loss": 0.0, "step": 47430 }, { "epoch": 10.304083405734143, "grad_norm": 0.00023258681176230311, "learning_rate": 7.078084274543875e-06, "loss": 0.0, "step": 47440 }, { "epoch": 10.306255430060817, "grad_norm": 0.00023691673413850367, "learning_rate": 7.0690341731827395e-06, "loss": 0.0, "step": 47450 }, { "epoch": 10.30842745438749, "grad_norm": 0.0002366963162785396, "learning_rate": 7.059984071821605e-06, "loss": 0.0, "step": 47460 }, { "epoch": 10.310599478714161, "grad_norm": 0.0003267844149377197, "learning_rate": 7.05093397046047e-06, "loss": 0.0, "step": 47470 }, { "epoch": 10.312771503040834, "grad_norm": 0.00031106435926631093, "learning_rate": 7.041883869099335e-06, "loss": 0.0044, "step": 47480 }, { "epoch": 10.314943527367507, "grad_norm": 0.00023934834462124854, "learning_rate": 7.032833767738199e-06, "loss": 0.0054, "step": 47490 }, { "epoch": 10.31711555169418, "grad_norm": 0.0011841370724141598, "learning_rate": 7.023783666377064e-06, "loss": 0.0, "step": 47500 }, { "epoch": 10.319287576020852, "grad_norm": 0.00024405766453128308, "learning_rate": 7.014733565015928e-06, "loss": 0.0039, "step": 47510 }, { "epoch": 10.321459600347524, "grad_norm": 0.0005109702469781041, "learning_rate": 7.005683463654792e-06, "loss": 0.0, "step": 47520 }, { "epoch": 10.323631624674196, "grad_norm": 0.00024197706079576164, "learning_rate": 6.9966333622936584e-06, "loss": 0.0, "step": 47530 }, { "epoch": 10.32580364900087, "grad_norm": 0.00023690721718594432, "learning_rate": 6.987583260932523e-06, "loss": 0.0, "step": 47540 }, { "epoch": 10.327975673327542, "grad_norm": 0.0002361913357162848, "learning_rate": 6.978533159571388e-06, "loss": 0.0, "step": 47550 }, { "epoch": 10.330147697654214, "grad_norm": 0.00023627316113561392, "learning_rate": 6.969483058210252e-06, "loss": 0.0, "step": 47560 }, { "epoch": 10.332319721980886, "grad_norm": 0.00023409361892845482, "learning_rate": 6.960432956849117e-06, "loss": 0.0048, "step": 47570 }, { "epoch": 10.334491746307558, "grad_norm": 0.00023509345191996545, "learning_rate": 6.951382855487981e-06, "loss": 0.0043, "step": 47580 }, { "epoch": 10.336663770634232, "grad_norm": 0.0002373265306232497, "learning_rate": 6.942332754126847e-06, "loss": 0.0, "step": 47590 }, { "epoch": 10.338835794960904, "grad_norm": 0.00023308381787501276, "learning_rate": 6.933282652765711e-06, "loss": 0.0, "step": 47600 }, { "epoch": 10.341007819287576, "grad_norm": 0.00023303077614400536, "learning_rate": 6.924232551404576e-06, "loss": 0.0, "step": 47610 }, { "epoch": 10.343179843614248, "grad_norm": 0.0002339918282814324, "learning_rate": 6.915182450043441e-06, "loss": 0.0, "step": 47620 }, { "epoch": 10.345351867940922, "grad_norm": 0.00023397189215756953, "learning_rate": 6.906132348682305e-06, "loss": 0.0, "step": 47630 }, { "epoch": 10.347523892267594, "grad_norm": 0.00023484285338781774, "learning_rate": 6.8970822473211696e-06, "loss": 0.0, "step": 47640 }, { "epoch": 10.349695916594266, "grad_norm": 0.00023741343466099352, "learning_rate": 6.888032145960036e-06, "loss": 0.0, "step": 47650 }, { "epoch": 10.351867940920938, "grad_norm": 0.00023638234415557235, "learning_rate": 6.8789820445989e-06, "loss": 0.0, "step": 47660 }, { "epoch": 10.35403996524761, "grad_norm": 0.0002334350865567103, "learning_rate": 6.869931943237764e-06, "loss": 0.0, "step": 47670 }, { "epoch": 10.356211989574284, "grad_norm": 0.00023239596339408308, "learning_rate": 6.8608818418766295e-06, "loss": 0.0, "step": 47680 }, { "epoch": 10.358384013900956, "grad_norm": 0.0002384950203122571, "learning_rate": 6.851831740515494e-06, "loss": 0.0052, "step": 47690 }, { "epoch": 10.360556038227628, "grad_norm": 0.00023201614385470748, "learning_rate": 6.84278163915436e-06, "loss": 0.0, "step": 47700 }, { "epoch": 10.3627280625543, "grad_norm": 0.0002313854784006253, "learning_rate": 6.833731537793224e-06, "loss": 0.0, "step": 47710 }, { "epoch": 10.364900086880972, "grad_norm": 0.0002525383315514773, "learning_rate": 6.8246814364320885e-06, "loss": 0.0, "step": 47720 }, { "epoch": 10.367072111207646, "grad_norm": 0.00023291408433578908, "learning_rate": 6.815631335070953e-06, "loss": 0.0, "step": 47730 }, { "epoch": 10.369244135534318, "grad_norm": 0.0003160819469485432, "learning_rate": 6.806581233709817e-06, "loss": 0.0, "step": 47740 }, { "epoch": 10.37141615986099, "grad_norm": 0.00023806083481758833, "learning_rate": 6.797531132348682e-06, "loss": 0.0, "step": 47750 }, { "epoch": 10.373588184187662, "grad_norm": 0.00023504970886278898, "learning_rate": 6.7884810309875476e-06, "loss": 0.0, "step": 47760 }, { "epoch": 10.375760208514336, "grad_norm": 0.00023199041606858373, "learning_rate": 6.779430929626413e-06, "loss": 0.004, "step": 47770 }, { "epoch": 10.377932232841008, "grad_norm": 0.0002341802028240636, "learning_rate": 6.770380828265277e-06, "loss": 0.0, "step": 47780 }, { "epoch": 10.38010425716768, "grad_norm": 0.00031527108512818813, "learning_rate": 6.7613307269041414e-06, "loss": 0.0, "step": 47790 }, { "epoch": 10.382276281494352, "grad_norm": 0.00023442119709216058, "learning_rate": 6.752280625543006e-06, "loss": 0.0091, "step": 47800 }, { "epoch": 10.384448305821024, "grad_norm": 0.00039361134986393154, "learning_rate": 6.74323052418187e-06, "loss": 0.0051, "step": 47810 }, { "epoch": 10.386620330147698, "grad_norm": 0.00023753584537189454, "learning_rate": 6.734180422820736e-06, "loss": 0.0052, "step": 47820 }, { "epoch": 10.38879235447437, "grad_norm": 0.00023664938635192811, "learning_rate": 6.725130321459601e-06, "loss": 0.0001, "step": 47830 }, { "epoch": 10.390964378801042, "grad_norm": 0.00023270200472325087, "learning_rate": 6.716080220098466e-06, "loss": 0.0, "step": 47840 }, { "epoch": 10.393136403127714, "grad_norm": 0.00024005438899621367, "learning_rate": 6.70703011873733e-06, "loss": 0.0, "step": 47850 }, { "epoch": 10.395308427454388, "grad_norm": 0.00023178478295449167, "learning_rate": 6.697980017376194e-06, "loss": 0.0, "step": 47860 }, { "epoch": 10.39748045178106, "grad_norm": 0.00023401351063512266, "learning_rate": 6.68892991601506e-06, "loss": 0.0, "step": 47870 }, { "epoch": 10.399652476107732, "grad_norm": 0.0003052498504985124, "learning_rate": 6.679879814653925e-06, "loss": 0.0039, "step": 47880 }, { "epoch": 10.401824500434405, "grad_norm": 0.00023244529438670725, "learning_rate": 6.670829713292789e-06, "loss": 0.0044, "step": 47890 }, { "epoch": 10.403996524761077, "grad_norm": 0.0028232275508344173, "learning_rate": 6.661779611931654e-06, "loss": 0.0, "step": 47900 }, { "epoch": 10.40616854908775, "grad_norm": 0.00023363882792182267, "learning_rate": 6.652729510570519e-06, "loss": 0.0, "step": 47910 }, { "epoch": 10.408340573414423, "grad_norm": 0.00023200880968943238, "learning_rate": 6.643679409209383e-06, "loss": 0.0047, "step": 47920 }, { "epoch": 10.410512597741095, "grad_norm": 0.00023291223624255508, "learning_rate": 6.634629307848249e-06, "loss": 0.0, "step": 47930 }, { "epoch": 10.412684622067767, "grad_norm": 0.00023574443184770644, "learning_rate": 6.625579206487113e-06, "loss": 0.0047, "step": 47940 }, { "epoch": 10.41485664639444, "grad_norm": 0.00023099414829630405, "learning_rate": 6.616529105125978e-06, "loss": 0.0, "step": 47950 }, { "epoch": 10.417028670721113, "grad_norm": 0.0002286070812260732, "learning_rate": 6.607479003764842e-06, "loss": 0.0, "step": 47960 }, { "epoch": 10.419200695047785, "grad_norm": 0.0002335784665774554, "learning_rate": 6.598428902403707e-06, "loss": 0.0, "step": 47970 }, { "epoch": 10.421372719374457, "grad_norm": 0.00023560720728710294, "learning_rate": 6.5893788010425715e-06, "loss": 0.0, "step": 47980 }, { "epoch": 10.423544743701129, "grad_norm": 0.00025418924633413553, "learning_rate": 6.5803286996814375e-06, "loss": 0.0, "step": 47990 }, { "epoch": 10.425716768027803, "grad_norm": 0.00023169341147877276, "learning_rate": 6.571278598320302e-06, "loss": 0.0, "step": 48000 }, { "epoch": 10.427888792354475, "grad_norm": 0.0002545344177633524, "learning_rate": 6.562228496959166e-06, "loss": 0.0397, "step": 48010 }, { "epoch": 10.430060816681147, "grad_norm": 0.00028514183941297233, "learning_rate": 6.5531783955980305e-06, "loss": 0.0051, "step": 48020 }, { "epoch": 10.432232841007819, "grad_norm": 0.00029667047783732414, "learning_rate": 6.544128294236896e-06, "loss": 0.0003, "step": 48030 }, { "epoch": 10.434404865334491, "grad_norm": 0.00027842583949677646, "learning_rate": 6.53507819287576e-06, "loss": 0.0001, "step": 48040 }, { "epoch": 10.436576889661165, "grad_norm": 0.007493993733078241, "learning_rate": 6.526028091514626e-06, "loss": 0.0, "step": 48050 }, { "epoch": 10.438748913987837, "grad_norm": 0.0002761534124147147, "learning_rate": 6.5169779901534904e-06, "loss": 0.0042, "step": 48060 }, { "epoch": 10.440920938314509, "grad_norm": 0.00026150167104788125, "learning_rate": 6.507927888792355e-06, "loss": 0.0, "step": 48070 }, { "epoch": 10.443092962641181, "grad_norm": 0.0003275485069025308, "learning_rate": 6.498877787431219e-06, "loss": 0.0042, "step": 48080 }, { "epoch": 10.445264986967855, "grad_norm": 0.0002545382303651422, "learning_rate": 6.4898276860700835e-06, "loss": 0.0001, "step": 48090 }, { "epoch": 10.447437011294527, "grad_norm": 0.0002404392434982583, "learning_rate": 6.4807775847089495e-06, "loss": 0.0051, "step": 48100 }, { "epoch": 10.449609035621199, "grad_norm": 0.0002498602552805096, "learning_rate": 6.471727483347814e-06, "loss": 0.0, "step": 48110 }, { "epoch": 10.451781059947871, "grad_norm": 0.00023550092009827495, "learning_rate": 6.462677381986679e-06, "loss": 0.0, "step": 48120 }, { "epoch": 10.453953084274543, "grad_norm": 0.000236342559219338, "learning_rate": 6.453627280625543e-06, "loss": 0.0, "step": 48130 }, { "epoch": 10.456125108601217, "grad_norm": 0.00025661668041720986, "learning_rate": 6.444577179264408e-06, "loss": 0.0043, "step": 48140 }, { "epoch": 10.458297132927889, "grad_norm": 0.00042461883276700974, "learning_rate": 6.435527077903272e-06, "loss": 0.0, "step": 48150 }, { "epoch": 10.460469157254561, "grad_norm": 0.0003444579488132149, "learning_rate": 6.426476976542138e-06, "loss": 0.0, "step": 48160 }, { "epoch": 10.462641181581233, "grad_norm": 0.0003125490911770612, "learning_rate": 6.417426875181002e-06, "loss": 0.0, "step": 48170 }, { "epoch": 10.464813205907905, "grad_norm": 0.0002570390061009675, "learning_rate": 6.408376773819868e-06, "loss": 0.0058, "step": 48180 }, { "epoch": 10.46698523023458, "grad_norm": 0.0004991987370885909, "learning_rate": 6.399326672458732e-06, "loss": 0.0042, "step": 48190 }, { "epoch": 10.469157254561251, "grad_norm": 0.0003679233486764133, "learning_rate": 6.390276571097596e-06, "loss": 0.0, "step": 48200 }, { "epoch": 10.471329278887923, "grad_norm": 0.0002311110874870792, "learning_rate": 6.381226469736461e-06, "loss": 0.0, "step": 48210 }, { "epoch": 10.473501303214595, "grad_norm": 0.00023058304213918746, "learning_rate": 6.372176368375327e-06, "loss": 0.0, "step": 48220 }, { "epoch": 10.47567332754127, "grad_norm": 0.000238971013459377, "learning_rate": 6.363126267014191e-06, "loss": 0.0, "step": 48230 }, { "epoch": 10.477845351867941, "grad_norm": 0.00023313738347496837, "learning_rate": 6.354076165653055e-06, "loss": 0.0035, "step": 48240 }, { "epoch": 10.480017376194613, "grad_norm": 0.0002325878303963691, "learning_rate": 6.3450260642919205e-06, "loss": 0.0, "step": 48250 }, { "epoch": 10.482189400521285, "grad_norm": 0.00031619417131878436, "learning_rate": 6.335975962930785e-06, "loss": 0.0, "step": 48260 }, { "epoch": 10.484361424847958, "grad_norm": 0.0002313339791726321, "learning_rate": 6.326925861569651e-06, "loss": 0.0, "step": 48270 }, { "epoch": 10.486533449174631, "grad_norm": 0.00022938975598663092, "learning_rate": 6.317875760208515e-06, "loss": 0.0, "step": 48280 }, { "epoch": 10.488705473501303, "grad_norm": 0.00022925181838218123, "learning_rate": 6.3088256588473796e-06, "loss": 0.0, "step": 48290 }, { "epoch": 10.490877497827976, "grad_norm": 0.00023099995451048017, "learning_rate": 6.299775557486244e-06, "loss": 0.0, "step": 48300 }, { "epoch": 10.493049522154648, "grad_norm": 0.0003380636335350573, "learning_rate": 6.290725456125108e-06, "loss": 0.0, "step": 48310 }, { "epoch": 10.495221546481321, "grad_norm": 0.0002366377302678302, "learning_rate": 6.281675354763973e-06, "loss": 0.0, "step": 48320 }, { "epoch": 10.497393570807994, "grad_norm": 0.0002278648898936808, "learning_rate": 6.272625253402839e-06, "loss": 0.0035, "step": 48330 }, { "epoch": 10.499565595134666, "grad_norm": 0.0002272507263114676, "learning_rate": 6.263575152041704e-06, "loss": 0.0085, "step": 48340 }, { "epoch": 10.501737619461338, "grad_norm": 0.00023705446801614016, "learning_rate": 6.254525050680568e-06, "loss": 0.0, "step": 48350 }, { "epoch": 10.50390964378801, "grad_norm": 0.00023882483947090805, "learning_rate": 6.2454749493194325e-06, "loss": 0.0, "step": 48360 }, { "epoch": 10.506081668114684, "grad_norm": 0.00023301866895053536, "learning_rate": 6.236424847958298e-06, "loss": 0.0, "step": 48370 }, { "epoch": 10.508253692441356, "grad_norm": 0.0002331801224499941, "learning_rate": 6.227374746597162e-06, "loss": 0.0, "step": 48380 }, { "epoch": 10.510425716768028, "grad_norm": 0.00022870057728141546, "learning_rate": 6.218324645236027e-06, "loss": 0.0, "step": 48390 }, { "epoch": 10.5125977410947, "grad_norm": 0.00023882264213170856, "learning_rate": 6.209274543874892e-06, "loss": 0.0, "step": 48400 }, { "epoch": 10.514769765421374, "grad_norm": 0.0002304526569787413, "learning_rate": 6.200224442513757e-06, "loss": 0.0, "step": 48410 }, { "epoch": 10.516941789748046, "grad_norm": 0.0003574812435545027, "learning_rate": 6.191174341152621e-06, "loss": 0.0, "step": 48420 }, { "epoch": 10.519113814074718, "grad_norm": 0.00023324844369199127, "learning_rate": 6.182124239791486e-06, "loss": 0.0, "step": 48430 }, { "epoch": 10.52128583840139, "grad_norm": 0.00022773882665205747, "learning_rate": 6.1730741384303506e-06, "loss": 0.0, "step": 48440 }, { "epoch": 10.523457862728062, "grad_norm": 0.00023141004203353077, "learning_rate": 6.164024037069215e-06, "loss": 0.0, "step": 48450 }, { "epoch": 10.525629887054736, "grad_norm": 0.00022916783927939832, "learning_rate": 6.15497393570808e-06, "loss": 0.0, "step": 48460 }, { "epoch": 10.527801911381408, "grad_norm": 0.0003077143628615886, "learning_rate": 6.145923834346945e-06, "loss": 0.0, "step": 48470 }, { "epoch": 10.52997393570808, "grad_norm": 0.0004104797844775021, "learning_rate": 6.13687373298581e-06, "loss": 0.0, "step": 48480 }, { "epoch": 10.532145960034752, "grad_norm": 0.0003059869341086596, "learning_rate": 6.127823631624675e-06, "loss": 0.0, "step": 48490 }, { "epoch": 10.534317984361424, "grad_norm": 0.00022940864437259734, "learning_rate": 6.118773530263539e-06, "loss": 0.0, "step": 48500 }, { "epoch": 10.536490008688098, "grad_norm": 0.000472991174319759, "learning_rate": 6.1097234289024035e-06, "loss": 0.0, "step": 48510 }, { "epoch": 10.53866203301477, "grad_norm": 0.0002505776647012681, "learning_rate": 6.100673327541269e-06, "loss": 0.0, "step": 48520 }, { "epoch": 10.540834057341442, "grad_norm": 0.00022529246052727103, "learning_rate": 6.091623226180133e-06, "loss": 0.0, "step": 48530 }, { "epoch": 10.543006081668114, "grad_norm": 0.000230813238886185, "learning_rate": 6.082573124818998e-06, "loss": 0.0037, "step": 48540 }, { "epoch": 10.545178105994786, "grad_norm": 0.00022568507120013237, "learning_rate": 6.073523023457863e-06, "loss": 0.0, "step": 48550 }, { "epoch": 10.54735013032146, "grad_norm": 0.0002367023262195289, "learning_rate": 6.064472922096728e-06, "loss": 0.0, "step": 48560 }, { "epoch": 10.549522154648132, "grad_norm": 0.00022548387642018497, "learning_rate": 6.055422820735593e-06, "loss": 0.0, "step": 48570 }, { "epoch": 10.551694178974804, "grad_norm": 0.00022634779452346265, "learning_rate": 6.046372719374457e-06, "loss": 0.0053, "step": 48580 }, { "epoch": 10.553866203301476, "grad_norm": 0.00023146615421865135, "learning_rate": 6.037322618013322e-06, "loss": 0.0057, "step": 48590 }, { "epoch": 10.55603822762815, "grad_norm": 0.0002269007236463949, "learning_rate": 6.028272516652187e-06, "loss": 0.0, "step": 48600 }, { "epoch": 10.558210251954822, "grad_norm": 0.00022439331223722547, "learning_rate": 6.019222415291052e-06, "loss": 0.0, "step": 48610 }, { "epoch": 10.560382276281494, "grad_norm": 0.00022950119455344975, "learning_rate": 6.010172313929916e-06, "loss": 0.0, "step": 48620 }, { "epoch": 10.562554300608166, "grad_norm": 0.300792396068573, "learning_rate": 6.0011222125687815e-06, "loss": 0.0001, "step": 48630 }, { "epoch": 10.564726324934838, "grad_norm": 0.00022572164016310126, "learning_rate": 5.992072111207646e-06, "loss": 0.0, "step": 48640 }, { "epoch": 10.566898349261512, "grad_norm": 0.000312326563289389, "learning_rate": 5.98302200984651e-06, "loss": 0.0, "step": 48650 }, { "epoch": 10.569070373588184, "grad_norm": 0.00022468189126811922, "learning_rate": 5.973971908485375e-06, "loss": 0.0, "step": 48660 }, { "epoch": 10.571242397914856, "grad_norm": 0.00023983907885849476, "learning_rate": 5.96492180712424e-06, "loss": 0.0, "step": 48670 }, { "epoch": 10.573414422241528, "grad_norm": 0.0002266259107273072, "learning_rate": 5.955871705763105e-06, "loss": 0.0, "step": 48680 }, { "epoch": 10.575586446568202, "grad_norm": 0.00022839626763015985, "learning_rate": 5.94682160440197e-06, "loss": 0.0, "step": 48690 }, { "epoch": 10.577758470894874, "grad_norm": 0.000231469253776595, "learning_rate": 5.937771503040834e-06, "loss": 0.0, "step": 48700 }, { "epoch": 10.579930495221546, "grad_norm": 0.00023143812722992152, "learning_rate": 5.928721401679699e-06, "loss": 0.0, "step": 48710 }, { "epoch": 10.582102519548219, "grad_norm": 0.00031239417148754, "learning_rate": 5.919671300318564e-06, "loss": 0.0, "step": 48720 }, { "epoch": 10.58427454387489, "grad_norm": 0.00022409581288229674, "learning_rate": 5.910621198957428e-06, "loss": 0.0, "step": 48730 }, { "epoch": 10.586446568201564, "grad_norm": 0.0002723014331422746, "learning_rate": 5.9015710975962934e-06, "loss": 0.0043, "step": 48740 }, { "epoch": 10.588618592528237, "grad_norm": 0.00022293497750069946, "learning_rate": 5.892520996235159e-06, "loss": 0.0, "step": 48750 }, { "epoch": 10.590790616854909, "grad_norm": 0.0003747916198335588, "learning_rate": 5.883470894874023e-06, "loss": 0.0, "step": 48760 }, { "epoch": 10.59296264118158, "grad_norm": 0.00022305836318992078, "learning_rate": 5.874420793512888e-06, "loss": 0.0, "step": 48770 }, { "epoch": 10.595134665508255, "grad_norm": 0.00023418181808665395, "learning_rate": 5.8653706921517525e-06, "loss": 0.0, "step": 48780 }, { "epoch": 10.597306689834927, "grad_norm": 0.00022406678181141615, "learning_rate": 5.856320590790617e-06, "loss": 0.0, "step": 48790 }, { "epoch": 10.599478714161599, "grad_norm": 0.00023150903871282935, "learning_rate": 5.847270489429482e-06, "loss": 0.0, "step": 48800 }, { "epoch": 10.60165073848827, "grad_norm": 0.0003837795229628682, "learning_rate": 5.838220388068346e-06, "loss": 0.0056, "step": 48810 }, { "epoch": 10.603822762814943, "grad_norm": 0.00023224468168336898, "learning_rate": 5.8291702867072115e-06, "loss": 0.005, "step": 48820 }, { "epoch": 10.605994787141617, "grad_norm": 0.17190490663051605, "learning_rate": 5.820120185346077e-06, "loss": 0.0047, "step": 48830 }, { "epoch": 10.608166811468289, "grad_norm": 0.00022320100106298923, "learning_rate": 5.811070083984941e-06, "loss": 0.0, "step": 48840 }, { "epoch": 10.61033883579496, "grad_norm": 0.00032960029784590006, "learning_rate": 5.802019982623805e-06, "loss": 0.0, "step": 48850 }, { "epoch": 10.612510860121633, "grad_norm": 0.00023157663235906512, "learning_rate": 5.792969881262671e-06, "loss": 0.0, "step": 48860 }, { "epoch": 10.614682884448305, "grad_norm": 0.00022281825658865273, "learning_rate": 5.783919779901535e-06, "loss": 0.0, "step": 48870 }, { "epoch": 10.616854908774979, "grad_norm": 0.0002246215008199215, "learning_rate": 5.774869678540399e-06, "loss": 0.0, "step": 48880 }, { "epoch": 10.619026933101651, "grad_norm": 0.00022870773682370782, "learning_rate": 5.7658195771792645e-06, "loss": 0.0, "step": 48890 }, { "epoch": 10.621198957428323, "grad_norm": 0.0002241594047518447, "learning_rate": 5.75676947581813e-06, "loss": 0.0, "step": 48900 }, { "epoch": 10.623370981754995, "grad_norm": 0.0002240870235254988, "learning_rate": 5.747719374456994e-06, "loss": 0.0, "step": 48910 }, { "epoch": 10.625543006081669, "grad_norm": 0.00022091949358582497, "learning_rate": 5.738669273095859e-06, "loss": 0.0, "step": 48920 }, { "epoch": 10.627715030408341, "grad_norm": 0.00022567623818758875, "learning_rate": 5.7296191717347235e-06, "loss": 0.0, "step": 48930 }, { "epoch": 10.629887054735013, "grad_norm": 0.00022049974359106272, "learning_rate": 5.720569070373588e-06, "loss": 0.0085, "step": 48940 }, { "epoch": 10.632059079061685, "grad_norm": 0.0002641979663167149, "learning_rate": 5.711518969012453e-06, "loss": 0.0, "step": 48950 }, { "epoch": 10.634231103388357, "grad_norm": 0.0002207313955295831, "learning_rate": 5.702468867651318e-06, "loss": 0.0, "step": 48960 }, { "epoch": 10.636403127715031, "grad_norm": 0.00021989879314787686, "learning_rate": 5.693418766290183e-06, "loss": 0.0, "step": 48970 }, { "epoch": 10.638575152041703, "grad_norm": 0.00022552079462911934, "learning_rate": 5.684368664929048e-06, "loss": 0.0, "step": 48980 }, { "epoch": 10.640747176368375, "grad_norm": 0.00022579723736271262, "learning_rate": 5.675318563567912e-06, "loss": 0.0049, "step": 48990 }, { "epoch": 10.642919200695047, "grad_norm": 0.0003467233618721366, "learning_rate": 5.666268462206777e-06, "loss": 0.0, "step": 49000 }, { "epoch": 10.64509122502172, "grad_norm": 0.00022149008873384446, "learning_rate": 5.657218360845642e-06, "loss": 0.0, "step": 49010 }, { "epoch": 10.647263249348393, "grad_norm": 0.0002353919408051297, "learning_rate": 5.648168259484506e-06, "loss": 0.0, "step": 49020 }, { "epoch": 10.649435273675065, "grad_norm": 0.00022022766643203795, "learning_rate": 5.639118158123371e-06, "loss": 0.0, "step": 49030 }, { "epoch": 10.651607298001737, "grad_norm": 0.00022060348419472575, "learning_rate": 5.630068056762236e-06, "loss": 0.0, "step": 49040 }, { "epoch": 10.65377932232841, "grad_norm": 0.00022515558521263301, "learning_rate": 5.621017955401101e-06, "loss": 0.0, "step": 49050 }, { "epoch": 10.655951346655083, "grad_norm": 0.00022750017524231225, "learning_rate": 5.611967854039966e-06, "loss": 0.0, "step": 49060 }, { "epoch": 10.658123370981755, "grad_norm": 0.00023040255473461002, "learning_rate": 5.60291775267883e-06, "loss": 0.0, "step": 49070 }, { "epoch": 10.660295395308427, "grad_norm": 0.0002275063015986234, "learning_rate": 5.5938676513176945e-06, "loss": 0.0, "step": 49080 }, { "epoch": 10.6624674196351, "grad_norm": 0.00023775036970619112, "learning_rate": 5.58481754995656e-06, "loss": 0.0, "step": 49090 }, { "epoch": 10.664639443961772, "grad_norm": 0.0002341943036299199, "learning_rate": 5.575767448595425e-06, "loss": 0.0, "step": 49100 }, { "epoch": 10.666811468288445, "grad_norm": 0.00022713349608238786, "learning_rate": 5.566717347234289e-06, "loss": 0.0, "step": 49110 }, { "epoch": 10.668983492615117, "grad_norm": 0.00022505798551719636, "learning_rate": 5.5576672458731544e-06, "loss": 0.0047, "step": 49120 }, { "epoch": 10.67115551694179, "grad_norm": 0.0002189160732086748, "learning_rate": 5.548617144512019e-06, "loss": 0.0, "step": 49130 }, { "epoch": 10.673327541268462, "grad_norm": 0.00022511309362016618, "learning_rate": 5.539567043150883e-06, "loss": 0.0, "step": 49140 }, { "epoch": 10.675499565595135, "grad_norm": 0.0002203310577897355, "learning_rate": 5.530516941789748e-06, "loss": 0.0, "step": 49150 }, { "epoch": 10.677671589921808, "grad_norm": 0.0002233179402537644, "learning_rate": 5.521466840428613e-06, "loss": 0.0049, "step": 49160 }, { "epoch": 10.67984361424848, "grad_norm": 0.0003005561593454331, "learning_rate": 5.512416739067478e-06, "loss": 0.0, "step": 49170 }, { "epoch": 10.682015638575152, "grad_norm": 0.00022120712674222887, "learning_rate": 5.503366637706343e-06, "loss": 0.0, "step": 49180 }, { "epoch": 10.684187662901824, "grad_norm": 0.00033629994140937924, "learning_rate": 5.494316536345207e-06, "loss": 0.0043, "step": 49190 }, { "epoch": 10.686359687228498, "grad_norm": 0.00021915044635534286, "learning_rate": 5.4852664349840725e-06, "loss": 0.0049, "step": 49200 }, { "epoch": 10.68853171155517, "grad_norm": 0.0002180044393753633, "learning_rate": 5.476216333622937e-06, "loss": 0.0, "step": 49210 }, { "epoch": 10.690703735881842, "grad_norm": 0.00023001058434601873, "learning_rate": 5.467166232261801e-06, "loss": 0.0, "step": 49220 }, { "epoch": 10.692875760208514, "grad_norm": 0.00022796729172114283, "learning_rate": 5.458116130900666e-06, "loss": 0.0, "step": 49230 }, { "epoch": 10.695047784535188, "grad_norm": 0.00021778048539999872, "learning_rate": 5.449066029539531e-06, "loss": 0.0045, "step": 49240 }, { "epoch": 10.69721980886186, "grad_norm": 0.00022593970061279833, "learning_rate": 5.440015928178396e-06, "loss": 0.0, "step": 49250 }, { "epoch": 10.699391833188532, "grad_norm": 0.00022018144954927266, "learning_rate": 5.430965826817261e-06, "loss": 0.0, "step": 49260 }, { "epoch": 10.701563857515204, "grad_norm": 0.00022087100660428405, "learning_rate": 5.4219157254561254e-06, "loss": 0.0, "step": 49270 }, { "epoch": 10.703735881841876, "grad_norm": 0.00022406043717637658, "learning_rate": 5.41286562409499e-06, "loss": 0.0, "step": 49280 }, { "epoch": 10.70590790616855, "grad_norm": 0.00021810720500070602, "learning_rate": 5.403815522733855e-06, "loss": 0.0045, "step": 49290 }, { "epoch": 10.708079930495222, "grad_norm": 0.00022153431200422347, "learning_rate": 5.394765421372719e-06, "loss": 0.0041, "step": 49300 }, { "epoch": 10.710251954821894, "grad_norm": 0.00021901496802456677, "learning_rate": 5.3857153200115845e-06, "loss": 0.0, "step": 49310 }, { "epoch": 10.712423979148566, "grad_norm": 0.00022114437888376415, "learning_rate": 5.37666521865045e-06, "loss": 0.0101, "step": 49320 }, { "epoch": 10.714596003475238, "grad_norm": 0.00022218165395315737, "learning_rate": 5.367615117289314e-06, "loss": 0.0045, "step": 49330 }, { "epoch": 10.716768027801912, "grad_norm": 0.00023637167760170996, "learning_rate": 5.358565015928178e-06, "loss": 0.0, "step": 49340 }, { "epoch": 10.718940052128584, "grad_norm": 0.00023195294488687068, "learning_rate": 5.3495149145670435e-06, "loss": 0.0, "step": 49350 }, { "epoch": 10.721112076455256, "grad_norm": 0.0002186178317060694, "learning_rate": 5.340464813205908e-06, "loss": 0.0, "step": 49360 }, { "epoch": 10.723284100781928, "grad_norm": 0.0003002184384968132, "learning_rate": 5.331414711844773e-06, "loss": 0.0, "step": 49370 }, { "epoch": 10.725456125108602, "grad_norm": 0.00022116424224805087, "learning_rate": 5.322364610483637e-06, "loss": 0.0, "step": 49380 }, { "epoch": 10.727628149435274, "grad_norm": 0.0002234865096397698, "learning_rate": 5.313314509122503e-06, "loss": 0.0, "step": 49390 }, { "epoch": 10.729800173761946, "grad_norm": 0.00022370461374521255, "learning_rate": 5.304264407761368e-06, "loss": 0.0044, "step": 49400 }, { "epoch": 10.731972198088618, "grad_norm": 0.00022042910859454423, "learning_rate": 5.295214306400232e-06, "loss": 0.0, "step": 49410 }, { "epoch": 10.73414422241529, "grad_norm": 0.00028280200785957277, "learning_rate": 5.2861642050390965e-06, "loss": 0.0, "step": 49420 }, { "epoch": 10.736316246741964, "grad_norm": 0.00021926072076894343, "learning_rate": 5.277114103677962e-06, "loss": 0.0, "step": 49430 }, { "epoch": 10.738488271068636, "grad_norm": 0.00023302929184865206, "learning_rate": 5.268064002316826e-06, "loss": 0.0, "step": 49440 }, { "epoch": 10.740660295395308, "grad_norm": 0.0002172905660700053, "learning_rate": 5.25901390095569e-06, "loss": 0.0, "step": 49450 }, { "epoch": 10.74283231972198, "grad_norm": 0.00022836528660263866, "learning_rate": 5.249963799594556e-06, "loss": 0.0, "step": 49460 }, { "epoch": 10.745004344048652, "grad_norm": 0.00032331692636944354, "learning_rate": 5.240913698233421e-06, "loss": 0.004, "step": 49470 }, { "epoch": 10.747176368375326, "grad_norm": 0.0002181615273002535, "learning_rate": 5.231863596872285e-06, "loss": 0.0048, "step": 49480 }, { "epoch": 10.749348392701998, "grad_norm": 0.00029964291024953127, "learning_rate": 5.22281349551115e-06, "loss": 0.0, "step": 49490 }, { "epoch": 10.75152041702867, "grad_norm": 0.00022976213949732482, "learning_rate": 5.2137633941500146e-06, "loss": 0.0, "step": 49500 }, { "epoch": 10.753692441355343, "grad_norm": 0.00021645518427249044, "learning_rate": 5.204713292788879e-06, "loss": 0.0, "step": 49510 }, { "epoch": 10.755864465682016, "grad_norm": 0.00021738260693382472, "learning_rate": 5.195663191427744e-06, "loss": 0.0, "step": 49520 }, { "epoch": 10.758036490008688, "grad_norm": 0.00022163652465678751, "learning_rate": 5.186613090066609e-06, "loss": 0.0, "step": 49530 }, { "epoch": 10.76020851433536, "grad_norm": 0.00026377089670859277, "learning_rate": 5.177562988705474e-06, "loss": 0.0, "step": 49540 }, { "epoch": 10.762380538662033, "grad_norm": 0.00021624031069222838, "learning_rate": 5.168512887344339e-06, "loss": 0.0, "step": 49550 }, { "epoch": 10.764552562988705, "grad_norm": 0.00021584972273558378, "learning_rate": 5.159462785983203e-06, "loss": 0.0, "step": 49560 }, { "epoch": 10.766724587315379, "grad_norm": 0.0002195909182773903, "learning_rate": 5.150412684622068e-06, "loss": 0.0, "step": 49570 }, { "epoch": 10.76889661164205, "grad_norm": 0.00021642334468197078, "learning_rate": 5.141362583260933e-06, "loss": 0.0, "step": 49580 }, { "epoch": 10.771068635968723, "grad_norm": 0.00022419106971938163, "learning_rate": 5.132312481899797e-06, "loss": 0.0, "step": 49590 }, { "epoch": 10.773240660295395, "grad_norm": 0.0002185798075515777, "learning_rate": 5.123262380538662e-06, "loss": 0.0, "step": 49600 }, { "epoch": 10.775412684622069, "grad_norm": 0.00021758888033218682, "learning_rate": 5.114212279177527e-06, "loss": 0.0, "step": 49610 }, { "epoch": 10.77758470894874, "grad_norm": 0.00030351977329701185, "learning_rate": 5.105162177816392e-06, "loss": 0.0, "step": 49620 }, { "epoch": 10.779756733275413, "grad_norm": 0.0002139311982318759, "learning_rate": 5.096112076455257e-06, "loss": 0.0, "step": 49630 }, { "epoch": 10.781928757602085, "grad_norm": 0.00025567892589606345, "learning_rate": 5.087061975094121e-06, "loss": 0.0, "step": 49640 }, { "epoch": 10.784100781928757, "grad_norm": 0.0002477488887961954, "learning_rate": 5.0780118737329856e-06, "loss": 0.0, "step": 49650 }, { "epoch": 10.78627280625543, "grad_norm": 0.0002187481295550242, "learning_rate": 5.068961772371851e-06, "loss": 0.0042, "step": 49660 }, { "epoch": 10.788444830582103, "grad_norm": 0.0002477424859534949, "learning_rate": 5.059911671010716e-06, "loss": 0.0051, "step": 49670 }, { "epoch": 10.790616854908775, "grad_norm": 0.0002899961546063423, "learning_rate": 5.05086156964958e-06, "loss": 0.0, "step": 49680 }, { "epoch": 10.792788879235447, "grad_norm": 0.00028191893943585455, "learning_rate": 5.0418114682884455e-06, "loss": 0.0, "step": 49690 }, { "epoch": 10.79496090356212, "grad_norm": 0.0002191022940678522, "learning_rate": 5.03276136692731e-06, "loss": 0.0, "step": 49700 }, { "epoch": 10.797132927888793, "grad_norm": 0.0002154961839551106, "learning_rate": 5.023711265566174e-06, "loss": 0.0, "step": 49710 }, { "epoch": 10.799304952215465, "grad_norm": 0.0002220691676484421, "learning_rate": 5.014661164205039e-06, "loss": 0.0, "step": 49720 }, { "epoch": 10.801476976542137, "grad_norm": 0.00021827526506967843, "learning_rate": 5.005611062843904e-06, "loss": 0.0, "step": 49730 }, { "epoch": 10.803649000868809, "grad_norm": 0.002871220000088215, "learning_rate": 4.996560961482769e-06, "loss": 0.0, "step": 49740 }, { "epoch": 10.805821025195483, "grad_norm": 0.00022947440447751433, "learning_rate": 4.987510860121634e-06, "loss": 0.0, "step": 49750 }, { "epoch": 10.807993049522155, "grad_norm": 0.00021768937585875392, "learning_rate": 4.978460758760498e-06, "loss": 0.0, "step": 49760 }, { "epoch": 10.810165073848827, "grad_norm": 0.00021608640963677317, "learning_rate": 4.9694106573993636e-06, "loss": 0.0, "step": 49770 }, { "epoch": 10.8123370981755, "grad_norm": 0.0002164438192266971, "learning_rate": 4.960360556038228e-06, "loss": 0.0, "step": 49780 }, { "epoch": 10.814509122502171, "grad_norm": 0.00021347503934521228, "learning_rate": 4.951310454677092e-06, "loss": 0.0, "step": 49790 }, { "epoch": 10.816681146828845, "grad_norm": 0.00025063398061320186, "learning_rate": 4.9422603533159574e-06, "loss": 0.0, "step": 49800 }, { "epoch": 10.818853171155517, "grad_norm": 0.00022577929485123605, "learning_rate": 4.933210251954823e-06, "loss": 0.0, "step": 49810 }, { "epoch": 10.82102519548219, "grad_norm": 0.0003572820278350264, "learning_rate": 4.924160150593687e-06, "loss": 0.0, "step": 49820 }, { "epoch": 10.823197219808861, "grad_norm": 0.00021640605700667948, "learning_rate": 4.915110049232552e-06, "loss": 0.0041, "step": 49830 }, { "epoch": 10.825369244135535, "grad_norm": 0.00021510386432055384, "learning_rate": 4.9060599478714165e-06, "loss": 0.0, "step": 49840 }, { "epoch": 10.827541268462207, "grad_norm": 0.0002934297954197973, "learning_rate": 4.897009846510281e-06, "loss": 0.0, "step": 49850 }, { "epoch": 10.82971329278888, "grad_norm": 0.16740760207176208, "learning_rate": 4.887959745149146e-06, "loss": 0.005, "step": 49860 }, { "epoch": 10.831885317115551, "grad_norm": 0.0002762663352768868, "learning_rate": 4.87890964378801e-06, "loss": 0.0, "step": 49870 }, { "epoch": 10.834057341442223, "grad_norm": 0.00021649140398949385, "learning_rate": 4.8698595424268755e-06, "loss": 0.0, "step": 49880 }, { "epoch": 10.836229365768897, "grad_norm": 0.00021669291891157627, "learning_rate": 4.860809441065741e-06, "loss": 0.0, "step": 49890 }, { "epoch": 10.83840139009557, "grad_norm": 0.0002275826846016571, "learning_rate": 4.851759339704605e-06, "loss": 0.0, "step": 49900 }, { "epoch": 10.840573414422241, "grad_norm": 0.00021657983597833663, "learning_rate": 4.842709238343469e-06, "loss": 0.0, "step": 49910 }, { "epoch": 10.842745438748914, "grad_norm": 0.00029274701955728233, "learning_rate": 4.833659136982335e-06, "loss": 0.0, "step": 49920 }, { "epoch": 10.844917463075586, "grad_norm": 0.00021872835350222886, "learning_rate": 4.824609035621199e-06, "loss": 0.0, "step": 49930 }, { "epoch": 10.84708948740226, "grad_norm": 0.00033134184195660055, "learning_rate": 4.815558934260063e-06, "loss": 0.0, "step": 49940 }, { "epoch": 10.849261511728931, "grad_norm": 0.0002122445439454168, "learning_rate": 4.8065088328989284e-06, "loss": 0.0, "step": 49950 }, { "epoch": 10.851433536055604, "grad_norm": 0.00027742431848309934, "learning_rate": 4.797458731537794e-06, "loss": 0.0, "step": 49960 }, { "epoch": 10.853605560382276, "grad_norm": 0.0002131950604962185, "learning_rate": 4.788408630176658e-06, "loss": 0.0, "step": 49970 }, { "epoch": 10.85577758470895, "grad_norm": 0.00021493675012607127, "learning_rate": 4.779358528815523e-06, "loss": 0.0, "step": 49980 }, { "epoch": 10.857949609035622, "grad_norm": 0.00021079306316096336, "learning_rate": 4.7703084274543875e-06, "loss": 0.0041, "step": 49990 }, { "epoch": 10.860121633362294, "grad_norm": 0.00021532582468353212, "learning_rate": 4.761258326093253e-06, "loss": 0.0, "step": 50000 }, { "epoch": 10.862293657688966, "grad_norm": 0.00021149902022443712, "learning_rate": 4.752208224732117e-06, "loss": 0.0, "step": 50010 }, { "epoch": 10.864465682015638, "grad_norm": 0.00021205766825005412, "learning_rate": 4.743158123370982e-06, "loss": 0.0, "step": 50020 }, { "epoch": 10.866637706342312, "grad_norm": 0.00030917723779566586, "learning_rate": 4.734108022009847e-06, "loss": 0.0043, "step": 50030 }, { "epoch": 10.868809730668984, "grad_norm": 0.0002103663864545524, "learning_rate": 4.725057920648712e-06, "loss": 0.0, "step": 50040 }, { "epoch": 10.870981754995656, "grad_norm": 0.00021224647935014218, "learning_rate": 4.716007819287576e-06, "loss": 0.0051, "step": 50050 }, { "epoch": 10.873153779322328, "grad_norm": 0.00035064792609773576, "learning_rate": 4.706957717926441e-06, "loss": 0.005, "step": 50060 }, { "epoch": 10.875325803649002, "grad_norm": 0.00020956051594112068, "learning_rate": 4.697907616565306e-06, "loss": 0.0, "step": 50070 }, { "epoch": 10.877497827975674, "grad_norm": 0.00021312307217158377, "learning_rate": 4.68885751520417e-06, "loss": 0.0, "step": 50080 }, { "epoch": 10.879669852302346, "grad_norm": 0.0002734953013714403, "learning_rate": 4.679807413843035e-06, "loss": 0.0, "step": 50090 }, { "epoch": 10.881841876629018, "grad_norm": 0.00021291685698088259, "learning_rate": 4.6707573124819e-06, "loss": 0.0, "step": 50100 }, { "epoch": 10.88401390095569, "grad_norm": 0.0002116796822519973, "learning_rate": 4.661707211120765e-06, "loss": 0.0, "step": 50110 }, { "epoch": 10.886185925282364, "grad_norm": 0.00021584934438578784, "learning_rate": 4.65265710975963e-06, "loss": 0.0, "step": 50120 }, { "epoch": 10.888357949609036, "grad_norm": 0.00021120175370015204, "learning_rate": 4.643607008398494e-06, "loss": 0.0, "step": 50130 }, { "epoch": 10.890529973935708, "grad_norm": 0.00021197435853537172, "learning_rate": 4.6345569070373585e-06, "loss": 0.0, "step": 50140 }, { "epoch": 10.89270199826238, "grad_norm": 0.00021428019681479782, "learning_rate": 4.625506805676224e-06, "loss": 0.0, "step": 50150 }, { "epoch": 10.894874022589054, "grad_norm": 0.00020921516988892108, "learning_rate": 4.616456704315088e-06, "loss": 0.0047, "step": 50160 }, { "epoch": 10.897046046915726, "grad_norm": 0.00027560058515518904, "learning_rate": 4.607406602953953e-06, "loss": 0.0, "step": 50170 }, { "epoch": 10.899218071242398, "grad_norm": 0.00023641523148398846, "learning_rate": 4.598356501592818e-06, "loss": 0.0, "step": 50180 }, { "epoch": 10.90139009556907, "grad_norm": 0.000214401152334176, "learning_rate": 4.589306400231683e-06, "loss": 0.0, "step": 50190 }, { "epoch": 10.903562119895742, "grad_norm": 0.0002733489091042429, "learning_rate": 4.580256298870548e-06, "loss": 0.0044, "step": 50200 }, { "epoch": 10.905734144222416, "grad_norm": 0.00022136476763989776, "learning_rate": 4.571206197509412e-06, "loss": 0.0, "step": 50210 }, { "epoch": 10.907906168549088, "grad_norm": 0.0002093520452035591, "learning_rate": 4.562156096148277e-06, "loss": 0.0, "step": 50220 }, { "epoch": 10.91007819287576, "grad_norm": 0.0002700627373997122, "learning_rate": 4.553105994787142e-06, "loss": 0.0, "step": 50230 }, { "epoch": 10.912250217202432, "grad_norm": 0.00028141363873146474, "learning_rate": 4.544055893426007e-06, "loss": 0.0, "step": 50240 }, { "epoch": 10.914422241529104, "grad_norm": 0.00021085295884404331, "learning_rate": 4.535005792064871e-06, "loss": 0.0043, "step": 50250 }, { "epoch": 10.916594265855778, "grad_norm": 0.00021086714696139097, "learning_rate": 4.5259556907037365e-06, "loss": 0.0, "step": 50260 }, { "epoch": 10.91876629018245, "grad_norm": 0.00020958646200597286, "learning_rate": 4.516905589342601e-06, "loss": 0.0, "step": 50270 }, { "epoch": 10.920938314509122, "grad_norm": 0.00021219199697952718, "learning_rate": 4.507855487981465e-06, "loss": 0.0141, "step": 50280 }, { "epoch": 10.923110338835794, "grad_norm": 0.0002100965939462185, "learning_rate": 4.49880538662033e-06, "loss": 0.0, "step": 50290 }, { "epoch": 10.925282363162466, "grad_norm": 0.00022129624267108738, "learning_rate": 4.489755285259195e-06, "loss": 0.0, "step": 50300 }, { "epoch": 10.92745438748914, "grad_norm": 0.000218289002077654, "learning_rate": 4.48070518389806e-06, "loss": 0.0, "step": 50310 }, { "epoch": 10.929626411815812, "grad_norm": 0.0002210328821092844, "learning_rate": 4.471655082536925e-06, "loss": 0.0, "step": 50320 }, { "epoch": 10.931798436142484, "grad_norm": 0.00021514331456273794, "learning_rate": 4.4626049811757894e-06, "loss": 0.0, "step": 50330 }, { "epoch": 10.933970460469157, "grad_norm": 0.00020960548135917634, "learning_rate": 4.453554879814654e-06, "loss": 0.0, "step": 50340 }, { "epoch": 10.93614248479583, "grad_norm": 0.0002283220092067495, "learning_rate": 4.444504778453519e-06, "loss": 0.0, "step": 50350 }, { "epoch": 10.938314509122502, "grad_norm": 0.0002141373261110857, "learning_rate": 4.435454677092383e-06, "loss": 0.0, "step": 50360 }, { "epoch": 10.940486533449175, "grad_norm": 0.00021023667068220675, "learning_rate": 4.4264045757312485e-06, "loss": 0.0, "step": 50370 }, { "epoch": 10.942658557775847, "grad_norm": 0.00021123423357494175, "learning_rate": 4.417354474370114e-06, "loss": 0.0, "step": 50380 }, { "epoch": 10.944830582102519, "grad_norm": 0.00023207410413306206, "learning_rate": 4.408304373008978e-06, "loss": 0.0042, "step": 50390 }, { "epoch": 10.947002606429193, "grad_norm": 0.00020956066146027297, "learning_rate": 4.399254271647843e-06, "loss": 0.0, "step": 50400 }, { "epoch": 10.949174630755865, "grad_norm": 0.0002111865032929927, "learning_rate": 4.3902041702867075e-06, "loss": 0.0, "step": 50410 }, { "epoch": 10.951346655082537, "grad_norm": 0.0002907202870119363, "learning_rate": 4.381154068925572e-06, "loss": 0.0, "step": 50420 }, { "epoch": 10.953518679409209, "grad_norm": 0.00020739728643093258, "learning_rate": 4.372103967564437e-06, "loss": 0.0, "step": 50430 }, { "epoch": 10.955690703735883, "grad_norm": 0.00033158701262436807, "learning_rate": 4.363053866203301e-06, "loss": 0.0, "step": 50440 }, { "epoch": 10.957862728062555, "grad_norm": 0.00020972038328181952, "learning_rate": 4.354003764842167e-06, "loss": 0.0, "step": 50450 }, { "epoch": 10.960034752389227, "grad_norm": 0.00020969909382984042, "learning_rate": 4.344953663481032e-06, "loss": 0.0, "step": 50460 }, { "epoch": 10.962206776715899, "grad_norm": 0.00020956064690835774, "learning_rate": 4.335903562119896e-06, "loss": 0.0041, "step": 50470 }, { "epoch": 10.964378801042571, "grad_norm": 0.0002089909539790824, "learning_rate": 4.3268534607587604e-06, "loss": 0.0, "step": 50480 }, { "epoch": 10.966550825369245, "grad_norm": 0.00020743778441101313, "learning_rate": 4.317803359397626e-06, "loss": 0.0, "step": 50490 }, { "epoch": 10.968722849695917, "grad_norm": 0.00020597422553692013, "learning_rate": 4.30875325803649e-06, "loss": 0.0, "step": 50500 }, { "epoch": 10.970894874022589, "grad_norm": 0.0002715985174290836, "learning_rate": 4.299703156675354e-06, "loss": 0.0, "step": 50510 }, { "epoch": 10.973066898349261, "grad_norm": 0.0002085827582050115, "learning_rate": 4.2906530553142195e-06, "loss": 0.0, "step": 50520 }, { "epoch": 10.975238922675935, "grad_norm": 0.00021042587468400598, "learning_rate": 4.281602953953085e-06, "loss": 0.0, "step": 50530 }, { "epoch": 10.977410947002607, "grad_norm": 0.0002095496020046994, "learning_rate": 4.272552852591949e-06, "loss": 0.0, "step": 50540 }, { "epoch": 10.979582971329279, "grad_norm": 0.00026932769105769694, "learning_rate": 4.263502751230814e-06, "loss": 0.0, "step": 50550 }, { "epoch": 10.981754995655951, "grad_norm": 0.00020780177146662027, "learning_rate": 4.2544526498696785e-06, "loss": 0.0, "step": 50560 }, { "epoch": 10.983927019982623, "grad_norm": 0.00031749060144647956, "learning_rate": 4.245402548508543e-06, "loss": 0.0, "step": 50570 }, { "epoch": 10.986099044309297, "grad_norm": 0.0002206074568675831, "learning_rate": 4.236352447147408e-06, "loss": 0.0, "step": 50580 }, { "epoch": 10.988271068635969, "grad_norm": 0.00020820109057240188, "learning_rate": 4.227302345786273e-06, "loss": 0.0048, "step": 50590 }, { "epoch": 10.990443092962641, "grad_norm": 0.00021198611648287624, "learning_rate": 4.2182522444251384e-06, "loss": 0.0, "step": 50600 }, { "epoch": 10.992615117289313, "grad_norm": 0.00020806727115996182, "learning_rate": 4.209202143064003e-06, "loss": 0.0, "step": 50610 }, { "epoch": 10.994787141615987, "grad_norm": 0.00027271060389466584, "learning_rate": 4.200152041702867e-06, "loss": 0.0, "step": 50620 }, { "epoch": 10.996959165942659, "grad_norm": 0.00020815835159737617, "learning_rate": 4.191101940341732e-06, "loss": 0.0, "step": 50630 }, { "epoch": 10.999131190269331, "grad_norm": 0.00021419930271804333, "learning_rate": 4.182051838980597e-06, "loss": 0.0, "step": 50640 }, { "epoch": 11.0, "eval_f1": 0.6240601503759399, "eval_loss": 0.09284297376871109, "eval_runtime": 84.1266, "eval_samples_per_second": 118.571, "eval_steps_per_second": 7.417, "step": 50644 }, { "epoch": 11.001303214596003, "grad_norm": 0.09123075008392334, "learning_rate": 4.173001737619461e-06, "loss": 0.0084, "step": 50650 }, { "epoch": 11.003475238922675, "grad_norm": 0.00022338244889397174, "learning_rate": 4.163951636258326e-06, "loss": 0.0042, "step": 50660 }, { "epoch": 11.00564726324935, "grad_norm": 0.0002102691651089117, "learning_rate": 4.154901534897191e-06, "loss": 0.0, "step": 50670 }, { "epoch": 11.007819287576021, "grad_norm": 0.00021717610070481896, "learning_rate": 4.145851433536056e-06, "loss": 0.0049, "step": 50680 }, { "epoch": 11.009991311902693, "grad_norm": 0.0002068415778921917, "learning_rate": 4.136801332174921e-06, "loss": 0.0, "step": 50690 }, { "epoch": 11.012163336229365, "grad_norm": 0.00020632539235521108, "learning_rate": 4.127751230813785e-06, "loss": 0.0, "step": 50700 }, { "epoch": 11.014335360556037, "grad_norm": 0.00022067526879254729, "learning_rate": 4.1187011294526496e-06, "loss": 0.0, "step": 50710 }, { "epoch": 11.016507384882711, "grad_norm": 0.00027272888110019267, "learning_rate": 4.109651028091515e-06, "loss": 0.0, "step": 50720 }, { "epoch": 11.018679409209383, "grad_norm": 0.0002099119737977162, "learning_rate": 4.10060092673038e-06, "loss": 0.0, "step": 50730 }, { "epoch": 11.020851433536055, "grad_norm": 0.00020631964434869587, "learning_rate": 4.091550825369244e-06, "loss": 0.0041, "step": 50740 }, { "epoch": 11.023023457862728, "grad_norm": 0.00020822268561460078, "learning_rate": 4.0825007240081095e-06, "loss": 0.0, "step": 50750 }, { "epoch": 11.025195482189401, "grad_norm": 0.00020695666898973286, "learning_rate": 4.073450622646974e-06, "loss": 0.0, "step": 50760 }, { "epoch": 11.027367506516073, "grad_norm": 0.00021030766947660595, "learning_rate": 4.064400521285838e-06, "loss": 0.0041, "step": 50770 }, { "epoch": 11.029539530842746, "grad_norm": 0.000298218394163996, "learning_rate": 4.055350419924703e-06, "loss": 0.0, "step": 50780 }, { "epoch": 11.031711555169418, "grad_norm": 0.00020447710994631052, "learning_rate": 4.046300318563568e-06, "loss": 0.0, "step": 50790 }, { "epoch": 11.03388357949609, "grad_norm": 0.00021559254673775285, "learning_rate": 4.037250217202433e-06, "loss": 0.0, "step": 50800 }, { "epoch": 11.036055603822764, "grad_norm": 0.0002991893270518631, "learning_rate": 4.028200115841298e-06, "loss": 0.0, "step": 50810 }, { "epoch": 11.038227628149436, "grad_norm": 0.00020672754908446223, "learning_rate": 4.019150014480162e-06, "loss": 0.0, "step": 50820 }, { "epoch": 11.040399652476108, "grad_norm": 0.00020618090638890862, "learning_rate": 4.0100999131190276e-06, "loss": 0.0047, "step": 50830 }, { "epoch": 11.04257167680278, "grad_norm": 0.0002046554145636037, "learning_rate": 4.001049811757892e-06, "loss": 0.0, "step": 50840 }, { "epoch": 11.044743701129452, "grad_norm": 0.00021028323681093752, "learning_rate": 3.991999710396756e-06, "loss": 0.0, "step": 50850 }, { "epoch": 11.046915725456126, "grad_norm": 0.0002057504461845383, "learning_rate": 3.982949609035621e-06, "loss": 0.0, "step": 50860 }, { "epoch": 11.049087749782798, "grad_norm": 0.00020733078417833894, "learning_rate": 3.973899507674486e-06, "loss": 0.0, "step": 50870 }, { "epoch": 11.05125977410947, "grad_norm": 0.00020432537712622434, "learning_rate": 3.964849406313351e-06, "loss": 0.0, "step": 50880 }, { "epoch": 11.053431798436142, "grad_norm": 0.0002050166658591479, "learning_rate": 3.955799304952216e-06, "loss": 0.0, "step": 50890 }, { "epoch": 11.055603822762816, "grad_norm": 0.00023185595637187362, "learning_rate": 3.9467492035910805e-06, "loss": 0.0, "step": 50900 }, { "epoch": 11.057775847089488, "grad_norm": 0.0003302676195744425, "learning_rate": 3.937699102229945e-06, "loss": 0.0, "step": 50910 }, { "epoch": 11.05994787141616, "grad_norm": 0.0002059488178929314, "learning_rate": 3.92864900086881e-06, "loss": 0.0044, "step": 50920 }, { "epoch": 11.062119895742832, "grad_norm": 0.0002076542004942894, "learning_rate": 3.919598899507674e-06, "loss": 0.0, "step": 50930 }, { "epoch": 11.064291920069504, "grad_norm": 0.00020699271408375353, "learning_rate": 3.9105487981465395e-06, "loss": 0.0, "step": 50940 }, { "epoch": 11.066463944396178, "grad_norm": 0.0002058657701127231, "learning_rate": 3.901498696785405e-06, "loss": 0.0, "step": 50950 }, { "epoch": 11.06863596872285, "grad_norm": 0.0002156527916667983, "learning_rate": 3.892448595424269e-06, "loss": 0.0, "step": 50960 }, { "epoch": 11.070807993049522, "grad_norm": 0.000269353884505108, "learning_rate": 3.883398494063133e-06, "loss": 0.0, "step": 50970 }, { "epoch": 11.072980017376194, "grad_norm": 0.0002053501084446907, "learning_rate": 3.8743483927019986e-06, "loss": 0.0, "step": 50980 }, { "epoch": 11.075152041702868, "grad_norm": 0.0002061450359178707, "learning_rate": 3.865298291340863e-06, "loss": 0.0, "step": 50990 }, { "epoch": 11.07732406602954, "grad_norm": 0.00020651152590289712, "learning_rate": 3.856248189979728e-06, "loss": 0.0, "step": 51000 }, { "epoch": 11.079496090356212, "grad_norm": 0.00026513266493566334, "learning_rate": 3.8471980886185924e-06, "loss": 0.0, "step": 51010 }, { "epoch": 11.081668114682884, "grad_norm": 0.00023419792705681175, "learning_rate": 3.838147987257458e-06, "loss": 0.0, "step": 51020 }, { "epoch": 11.083840139009556, "grad_norm": 0.00022187073773238808, "learning_rate": 3.829097885896323e-06, "loss": 0.0, "step": 51030 }, { "epoch": 11.08601216333623, "grad_norm": 0.00020510748436208814, "learning_rate": 3.820047784535187e-06, "loss": 0.0, "step": 51040 }, { "epoch": 11.088184187662902, "grad_norm": 0.0002099367993650958, "learning_rate": 3.8109976831740515e-06, "loss": 0.0, "step": 51050 }, { "epoch": 11.090356211989574, "grad_norm": 0.0002047920279437676, "learning_rate": 3.8019475818129167e-06, "loss": 0.0, "step": 51060 }, { "epoch": 11.092528236316246, "grad_norm": 0.00020407889678608626, "learning_rate": 3.7928974804517814e-06, "loss": 0.0, "step": 51070 }, { "epoch": 11.094700260642918, "grad_norm": 0.00020646867051254958, "learning_rate": 3.7838473790906458e-06, "loss": 0.0, "step": 51080 }, { "epoch": 11.096872284969592, "grad_norm": 0.0002043266867985949, "learning_rate": 3.774797277729511e-06, "loss": 0.0, "step": 51090 }, { "epoch": 11.099044309296264, "grad_norm": 0.0002051582414424047, "learning_rate": 3.7657471763683753e-06, "loss": 0.0, "step": 51100 }, { "epoch": 11.101216333622936, "grad_norm": 0.00020401063375175, "learning_rate": 3.75669707500724e-06, "loss": 0.0, "step": 51110 }, { "epoch": 11.103388357949608, "grad_norm": 0.00020295396097935736, "learning_rate": 3.7476469736461052e-06, "loss": 0.0, "step": 51120 }, { "epoch": 11.105560382276282, "grad_norm": 0.00020517785742413253, "learning_rate": 3.7385968722849696e-06, "loss": 0.0, "step": 51130 }, { "epoch": 11.107732406602954, "grad_norm": 0.0002051791234407574, "learning_rate": 3.7295467709238343e-06, "loss": 0.0, "step": 51140 }, { "epoch": 11.109904430929626, "grad_norm": 0.00026767002418637276, "learning_rate": 3.7204966695626995e-06, "loss": 0.0, "step": 51150 }, { "epoch": 11.112076455256299, "grad_norm": 0.00020361509814392775, "learning_rate": 3.711446568201564e-06, "loss": 0.0, "step": 51160 }, { "epoch": 11.11424847958297, "grad_norm": 0.0002157597045879811, "learning_rate": 3.7023964668404286e-06, "loss": 0.0, "step": 51170 }, { "epoch": 11.116420503909644, "grad_norm": 0.00020699432934634387, "learning_rate": 3.693346365479294e-06, "loss": 0.0, "step": 51180 }, { "epoch": 11.118592528236316, "grad_norm": 0.0002198000584030524, "learning_rate": 3.684296264118158e-06, "loss": 0.0, "step": 51190 }, { "epoch": 11.120764552562989, "grad_norm": 0.14969320595264435, "learning_rate": 3.6752461627570233e-06, "loss": 0.0081, "step": 51200 }, { "epoch": 11.12293657688966, "grad_norm": 0.00020602031145244837, "learning_rate": 3.6661960613958877e-06, "loss": 0.0, "step": 51210 }, { "epoch": 11.125108601216334, "grad_norm": 0.00020917251822538674, "learning_rate": 3.6571459600347524e-06, "loss": 0.0, "step": 51220 }, { "epoch": 11.127280625543007, "grad_norm": 0.0002067875029752031, "learning_rate": 3.6480958586736176e-06, "loss": 0.0, "step": 51230 }, { "epoch": 11.129452649869679, "grad_norm": 0.00020278933516237885, "learning_rate": 3.639045757312482e-06, "loss": 0.0, "step": 51240 }, { "epoch": 11.13162467419635, "grad_norm": 0.00020642305025830865, "learning_rate": 3.6299956559513467e-06, "loss": 0.0, "step": 51250 }, { "epoch": 11.133796698523023, "grad_norm": 0.00020503332780208439, "learning_rate": 3.620945554590212e-06, "loss": 0.0072, "step": 51260 }, { "epoch": 11.135968722849697, "grad_norm": 0.00020080467220395803, "learning_rate": 3.6118954532290763e-06, "loss": 0.0, "step": 51270 }, { "epoch": 11.138140747176369, "grad_norm": 0.00020930106984451413, "learning_rate": 3.602845351867941e-06, "loss": 0.0, "step": 51280 }, { "epoch": 11.14031277150304, "grad_norm": 0.0001999106170842424, "learning_rate": 3.593795250506806e-06, "loss": 0.0, "step": 51290 }, { "epoch": 11.142484795829713, "grad_norm": 0.00020284131460357457, "learning_rate": 3.5847451491456705e-06, "loss": 0.0, "step": 51300 }, { "epoch": 11.144656820156385, "grad_norm": 0.0002229460224043578, "learning_rate": 3.575695047784535e-06, "loss": 0.0, "step": 51310 }, { "epoch": 11.146828844483059, "grad_norm": 0.00020142899302300066, "learning_rate": 3.5666449464234005e-06, "loss": 0.0, "step": 51320 }, { "epoch": 11.14900086880973, "grad_norm": 0.00020130908524151891, "learning_rate": 3.557594845062265e-06, "loss": 0.0, "step": 51330 }, { "epoch": 11.151172893136403, "grad_norm": 0.0002031168551184237, "learning_rate": 3.548544743701129e-06, "loss": 0.0, "step": 51340 }, { "epoch": 11.153344917463075, "grad_norm": 0.00019986261031590402, "learning_rate": 3.5394946423399944e-06, "loss": 0.0047, "step": 51350 }, { "epoch": 11.155516941789749, "grad_norm": 0.00020868683350272477, "learning_rate": 3.530444540978859e-06, "loss": 0.0, "step": 51360 }, { "epoch": 11.157688966116421, "grad_norm": 0.00025968122645281255, "learning_rate": 3.5213944396177235e-06, "loss": 0.0, "step": 51370 }, { "epoch": 11.159860990443093, "grad_norm": 0.00020654463151004165, "learning_rate": 3.5123443382565887e-06, "loss": 0.0, "step": 51380 }, { "epoch": 11.162033014769765, "grad_norm": 0.00020101090194657445, "learning_rate": 3.5032942368954534e-06, "loss": 0.0094, "step": 51390 }, { "epoch": 11.164205039096437, "grad_norm": 0.00020278354350011796, "learning_rate": 3.4942441355343186e-06, "loss": 0.0051, "step": 51400 }, { "epoch": 11.166377063423111, "grad_norm": 0.00029431344592012465, "learning_rate": 3.485194034173183e-06, "loss": 0.0, "step": 51410 }, { "epoch": 11.168549087749783, "grad_norm": 0.00020167112234048545, "learning_rate": 3.4761439328120477e-06, "loss": 0.0, "step": 51420 }, { "epoch": 11.170721112076455, "grad_norm": 0.0002188723155995831, "learning_rate": 3.467093831450913e-06, "loss": 0.0, "step": 51430 }, { "epoch": 11.172893136403127, "grad_norm": 0.00021310002193786204, "learning_rate": 3.4580437300897772e-06, "loss": 0.0043, "step": 51440 }, { "epoch": 11.175065160729801, "grad_norm": 0.0002156758273486048, "learning_rate": 3.4489936287286416e-06, "loss": 0.0, "step": 51450 }, { "epoch": 11.177237185056473, "grad_norm": 0.0002034068020293489, "learning_rate": 3.4399435273675068e-06, "loss": 0.0, "step": 51460 }, { "epoch": 11.179409209383145, "grad_norm": 0.0002786774421110749, "learning_rate": 3.4308934260063715e-06, "loss": 0.0, "step": 51470 }, { "epoch": 11.181581233709817, "grad_norm": 0.0002041921834461391, "learning_rate": 3.421843324645236e-06, "loss": 0.0, "step": 51480 }, { "epoch": 11.18375325803649, "grad_norm": 0.000210064637940377, "learning_rate": 3.412793223284101e-06, "loss": 0.0, "step": 51490 }, { "epoch": 11.185925282363163, "grad_norm": 0.00020386015239637345, "learning_rate": 3.403743121922966e-06, "loss": 0.0, "step": 51500 }, { "epoch": 11.188097306689835, "grad_norm": 0.00020263722399249673, "learning_rate": 3.39469302056183e-06, "loss": 0.0, "step": 51510 }, { "epoch": 11.190269331016507, "grad_norm": 0.00019954588788095862, "learning_rate": 3.3856429192006953e-06, "loss": 0.0087, "step": 51520 }, { "epoch": 11.19244135534318, "grad_norm": 0.1446802169084549, "learning_rate": 3.37659281783956e-06, "loss": 0.0079, "step": 51530 }, { "epoch": 11.194613379669851, "grad_norm": 0.0002020450192503631, "learning_rate": 3.3675427164784244e-06, "loss": 0.0, "step": 51540 }, { "epoch": 11.196785403996525, "grad_norm": 0.00020075002976227552, "learning_rate": 3.3584926151172896e-06, "loss": 0.0, "step": 51550 }, { "epoch": 11.198957428323197, "grad_norm": 0.00020184363529551774, "learning_rate": 3.349442513756154e-06, "loss": 0.0, "step": 51560 }, { "epoch": 11.20112945264987, "grad_norm": 0.00020253408001735806, "learning_rate": 3.3403924123950187e-06, "loss": 0.0, "step": 51570 }, { "epoch": 11.203301476976542, "grad_norm": 0.00020108975877519697, "learning_rate": 3.331342311033884e-06, "loss": 0.0, "step": 51580 }, { "epoch": 11.205473501303215, "grad_norm": 0.00026143176364712417, "learning_rate": 3.3222922096727482e-06, "loss": 0.0, "step": 51590 }, { "epoch": 11.207645525629887, "grad_norm": 0.00020975733059458435, "learning_rate": 3.3132421083116134e-06, "loss": 0.0, "step": 51600 }, { "epoch": 11.20981754995656, "grad_norm": 0.000205664211534895, "learning_rate": 3.304192006950478e-06, "loss": 0.0, "step": 51610 }, { "epoch": 11.211989574283232, "grad_norm": 0.00042388608562760055, "learning_rate": 3.2951419055893425e-06, "loss": 0.0, "step": 51620 }, { "epoch": 11.214161598609904, "grad_norm": 0.00020332858548499644, "learning_rate": 3.2860918042282077e-06, "loss": 0.0, "step": 51630 }, { "epoch": 11.216333622936578, "grad_norm": 0.00020449819567147642, "learning_rate": 3.2770417028670725e-06, "loss": 0.0, "step": 51640 }, { "epoch": 11.21850564726325, "grad_norm": 0.000200850103283301, "learning_rate": 3.267991601505937e-06, "loss": 0.0, "step": 51650 }, { "epoch": 11.220677671589922, "grad_norm": 0.0002570762299001217, "learning_rate": 3.258941500144802e-06, "loss": 0.0, "step": 51660 }, { "epoch": 11.222849695916594, "grad_norm": 0.00020373582083266228, "learning_rate": 3.2498913987836663e-06, "loss": 0.0, "step": 51670 }, { "epoch": 11.225021720243266, "grad_norm": 0.00020122008572798222, "learning_rate": 3.240841297422531e-06, "loss": 0.0046, "step": 51680 }, { "epoch": 11.22719374456994, "grad_norm": 0.00019910384435206652, "learning_rate": 3.2317911960613963e-06, "loss": 0.0, "step": 51690 }, { "epoch": 11.229365768896612, "grad_norm": 0.00020562649297062308, "learning_rate": 3.2227410947002606e-06, "loss": 0.0, "step": 51700 }, { "epoch": 11.231537793223284, "grad_norm": 0.0002331691503059119, "learning_rate": 3.2136909933391254e-06, "loss": 0.0, "step": 51710 }, { "epoch": 11.233709817549956, "grad_norm": 0.00020238434080965817, "learning_rate": 3.2046408919779906e-06, "loss": 0.005, "step": 51720 }, { "epoch": 11.23588184187663, "grad_norm": 0.0001993612531805411, "learning_rate": 3.195590790616855e-06, "loss": 0.0046, "step": 51730 }, { "epoch": 11.238053866203302, "grad_norm": 0.0001995191996684298, "learning_rate": 3.1865406892557197e-06, "loss": 0.0, "step": 51740 }, { "epoch": 11.240225890529974, "grad_norm": 0.00020470732124522328, "learning_rate": 3.177490587894585e-06, "loss": 0.0, "step": 51750 }, { "epoch": 11.242397914856646, "grad_norm": 0.00020886657875962555, "learning_rate": 3.168440486533449e-06, "loss": 0.0, "step": 51760 }, { "epoch": 11.244569939183318, "grad_norm": 0.00020291624241508543, "learning_rate": 3.1593903851723135e-06, "loss": 0.0049, "step": 51770 }, { "epoch": 11.246741963509992, "grad_norm": 0.0002016778162214905, "learning_rate": 3.150340283811179e-06, "loss": 0.0038, "step": 51780 }, { "epoch": 11.248913987836664, "grad_norm": 0.0002016224607359618, "learning_rate": 3.1412901824500435e-06, "loss": 0.0, "step": 51790 }, { "epoch": 11.251086012163336, "grad_norm": 0.00019841018365696073, "learning_rate": 3.1322400810889087e-06, "loss": 0.0, "step": 51800 }, { "epoch": 11.253258036490008, "grad_norm": 0.00019952683942392468, "learning_rate": 3.123189979727773e-06, "loss": 0.0085, "step": 51810 }, { "epoch": 11.255430060816682, "grad_norm": 0.00032277125865221024, "learning_rate": 3.1141398783666378e-06, "loss": 0.0079, "step": 51820 }, { "epoch": 11.257602085143354, "grad_norm": 0.000201634771656245, "learning_rate": 3.1050897770055025e-06, "loss": 0.0035, "step": 51830 }, { "epoch": 11.259774109470026, "grad_norm": 0.00019903185602743179, "learning_rate": 3.0960396756443673e-06, "loss": 0.0, "step": 51840 }, { "epoch": 11.261946133796698, "grad_norm": 0.00020083566778339446, "learning_rate": 3.0869895742832325e-06, "loss": 0.0048, "step": 51850 }, { "epoch": 11.26411815812337, "grad_norm": 0.00025526623358018696, "learning_rate": 3.077939472922097e-06, "loss": 0.0, "step": 51860 }, { "epoch": 11.266290182450044, "grad_norm": 0.00020170937932562083, "learning_rate": 3.0688893715609616e-06, "loss": 0.0, "step": 51870 }, { "epoch": 11.268462206776716, "grad_norm": 0.00020052251056768, "learning_rate": 3.0598392701998264e-06, "loss": 0.0041, "step": 51880 }, { "epoch": 11.270634231103388, "grad_norm": 0.00019856641301885247, "learning_rate": 3.050789168838691e-06, "loss": 0.0, "step": 51890 }, { "epoch": 11.27280625543006, "grad_norm": 0.00025934906443580985, "learning_rate": 3.041739067477556e-06, "loss": 0.0, "step": 51900 }, { "epoch": 11.274978279756734, "grad_norm": 0.00019815000996459275, "learning_rate": 3.0326889661164206e-06, "loss": 0.0, "step": 51910 }, { "epoch": 11.277150304083406, "grad_norm": 0.0002467527228873223, "learning_rate": 3.0236388647552854e-06, "loss": 0.0, "step": 51920 }, { "epoch": 11.279322328410078, "grad_norm": 0.00019958475604653358, "learning_rate": 3.01458876339415e-06, "loss": 0.0, "step": 51930 }, { "epoch": 11.28149435273675, "grad_norm": 0.00019774853717535734, "learning_rate": 3.005538662033015e-06, "loss": 0.0, "step": 51940 }, { "epoch": 11.283666377063422, "grad_norm": 0.00019953559967689216, "learning_rate": 2.9964885606718797e-06, "loss": 0.0, "step": 51950 }, { "epoch": 11.285838401390096, "grad_norm": 0.0002085790765704587, "learning_rate": 2.9874384593107445e-06, "loss": 0.0, "step": 51960 }, { "epoch": 11.288010425716768, "grad_norm": 0.0002594150719232857, "learning_rate": 2.9783883579496092e-06, "loss": 0.0, "step": 51970 }, { "epoch": 11.29018245004344, "grad_norm": 0.00026963651180267334, "learning_rate": 2.969338256588474e-06, "loss": 0.0, "step": 51980 }, { "epoch": 11.292354474370113, "grad_norm": 0.00020867727289441973, "learning_rate": 2.9602881552273387e-06, "loss": 0.0, "step": 51990 }, { "epoch": 11.294526498696785, "grad_norm": 0.00019796937704086304, "learning_rate": 2.9512380538662035e-06, "loss": 0.0, "step": 52000 }, { "epoch": 11.296698523023458, "grad_norm": 0.000202857336262241, "learning_rate": 2.9421879525050683e-06, "loss": 0.0036, "step": 52010 }, { "epoch": 11.29887054735013, "grad_norm": 0.0002166828780900687, "learning_rate": 2.9331378511439326e-06, "loss": 0.0, "step": 52020 }, { "epoch": 11.301042571676803, "grad_norm": 0.00021046474284958094, "learning_rate": 2.924087749782798e-06, "loss": 0.0048, "step": 52030 }, { "epoch": 11.303214596003475, "grad_norm": 0.00020024993864353746, "learning_rate": 2.9150376484216626e-06, "loss": 0.0, "step": 52040 }, { "epoch": 11.305386620330149, "grad_norm": 0.00020179520652163774, "learning_rate": 2.9059875470605273e-06, "loss": 0.0047, "step": 52050 }, { "epoch": 11.30755864465682, "grad_norm": 0.00020518811652436852, "learning_rate": 2.896937445699392e-06, "loss": 0.0, "step": 52060 }, { "epoch": 11.309730668983493, "grad_norm": 0.0002528747427277267, "learning_rate": 2.887887344338257e-06, "loss": 0.0, "step": 52070 }, { "epoch": 11.311902693310165, "grad_norm": 0.00019628154404927045, "learning_rate": 2.8788372429771216e-06, "loss": 0.0, "step": 52080 }, { "epoch": 11.314074717636837, "grad_norm": 0.0002019262028625235, "learning_rate": 2.869787141615986e-06, "loss": 0.0, "step": 52090 }, { "epoch": 11.31624674196351, "grad_norm": 0.00019746019097510725, "learning_rate": 2.860737040254851e-06, "loss": 0.0, "step": 52100 }, { "epoch": 11.318418766290183, "grad_norm": 0.00020607651094906032, "learning_rate": 2.851686938893716e-06, "loss": 0.0, "step": 52110 }, { "epoch": 11.320590790616855, "grad_norm": 0.0002728735562413931, "learning_rate": 2.8426368375325802e-06, "loss": 0.0, "step": 52120 }, { "epoch": 11.322762814943527, "grad_norm": 0.00019636953948065639, "learning_rate": 2.8335867361714454e-06, "loss": 0.0, "step": 52130 }, { "epoch": 11.324934839270199, "grad_norm": 0.00019914501172024757, "learning_rate": 2.82453663481031e-06, "loss": 0.0, "step": 52140 }, { "epoch": 11.327106863596873, "grad_norm": 0.00020094211504328996, "learning_rate": 2.815486533449175e-06, "loss": 0.0, "step": 52150 }, { "epoch": 11.329278887923545, "grad_norm": 0.00033041482674889266, "learning_rate": 2.8064364320880393e-06, "loss": 0.0, "step": 52160 }, { "epoch": 11.331450912250217, "grad_norm": 0.0002014395868172869, "learning_rate": 2.7973863307269045e-06, "loss": 0.0, "step": 52170 }, { "epoch": 11.333622936576889, "grad_norm": 0.00019805788178928196, "learning_rate": 2.7883362293657692e-06, "loss": 0.0046, "step": 52180 }, { "epoch": 11.335794960903563, "grad_norm": 0.00020002457313239574, "learning_rate": 2.7792861280046336e-06, "loss": 0.0, "step": 52190 }, { "epoch": 11.337966985230235, "grad_norm": 0.00019858147425111383, "learning_rate": 2.7702360266434983e-06, "loss": 0.0, "step": 52200 }, { "epoch": 11.340139009556907, "grad_norm": 0.00019874947611242533, "learning_rate": 2.7611859252823635e-06, "loss": 0.0, "step": 52210 }, { "epoch": 11.342311033883579, "grad_norm": 0.00019803144095931202, "learning_rate": 2.752135823921228e-06, "loss": 0.0, "step": 52220 }, { "epoch": 11.344483058210251, "grad_norm": 0.00022093136794865131, "learning_rate": 2.7430857225600926e-06, "loss": 0.0, "step": 52230 }, { "epoch": 11.346655082536925, "grad_norm": 0.00019645935390144587, "learning_rate": 2.734035621198958e-06, "loss": 0.0, "step": 52240 }, { "epoch": 11.348827106863597, "grad_norm": 0.00020049404702149332, "learning_rate": 2.7249855198378226e-06, "loss": 0.0035, "step": 52250 }, { "epoch": 11.35099913119027, "grad_norm": 0.00020869314903393388, "learning_rate": 2.715935418476687e-06, "loss": 0.0, "step": 52260 }, { "epoch": 11.353171155516941, "grad_norm": 0.00019707180035766214, "learning_rate": 2.7077903272516652e-06, "loss": 0.0077, "step": 52270 }, { "epoch": 11.355343179843615, "grad_norm": 0.0002042713458649814, "learning_rate": 2.69874022589053e-06, "loss": 0.0, "step": 52280 }, { "epoch": 11.357515204170287, "grad_norm": 0.00019734865054488182, "learning_rate": 2.6896901245293948e-06, "loss": 0.0, "step": 52290 }, { "epoch": 11.35968722849696, "grad_norm": 0.000252844620263204, "learning_rate": 2.6806400231682595e-06, "loss": 0.0, "step": 52300 }, { "epoch": 11.361859252823631, "grad_norm": 0.00019945701933465898, "learning_rate": 2.6715899218071243e-06, "loss": 0.0, "step": 52310 }, { "epoch": 11.364031277150303, "grad_norm": 0.00019766220066230744, "learning_rate": 2.6625398204459895e-06, "loss": 0.0, "step": 52320 }, { "epoch": 11.366203301476977, "grad_norm": 0.0001986150018638, "learning_rate": 2.653489719084854e-06, "loss": 0.0, "step": 52330 }, { "epoch": 11.36837532580365, "grad_norm": 0.00027733895694836974, "learning_rate": 2.6444396177237186e-06, "loss": 0.0, "step": 52340 }, { "epoch": 11.370547350130321, "grad_norm": 0.00020426575792953372, "learning_rate": 2.6353895163625833e-06, "loss": 0.0, "step": 52350 }, { "epoch": 11.372719374456993, "grad_norm": 0.0002020970277953893, "learning_rate": 2.626339415001448e-06, "loss": 0.0, "step": 52360 }, { "epoch": 11.374891398783667, "grad_norm": 0.00019799098663497716, "learning_rate": 2.617289313640313e-06, "loss": 0.0, "step": 52370 }, { "epoch": 11.37706342311034, "grad_norm": 0.00022170203737914562, "learning_rate": 2.6082392122791776e-06, "loss": 0.0, "step": 52380 }, { "epoch": 11.379235447437011, "grad_norm": 0.0001970038574654609, "learning_rate": 2.5991891109180424e-06, "loss": 0.0, "step": 52390 }, { "epoch": 11.381407471763684, "grad_norm": 0.0003018612042069435, "learning_rate": 2.590139009556907e-06, "loss": 0.0, "step": 52400 }, { "epoch": 11.383579496090356, "grad_norm": 0.00019775984401348978, "learning_rate": 2.581088908195772e-06, "loss": 0.0049, "step": 52410 }, { "epoch": 11.38575152041703, "grad_norm": 0.0001969892909983173, "learning_rate": 2.5720388068346367e-06, "loss": 0.0, "step": 52420 }, { "epoch": 11.387923544743701, "grad_norm": 0.0002039795508608222, "learning_rate": 2.5629887054735014e-06, "loss": 0.0, "step": 52430 }, { "epoch": 11.390095569070374, "grad_norm": 0.0001986090501304716, "learning_rate": 2.553938604112366e-06, "loss": 0.0, "step": 52440 }, { "epoch": 11.392267593397046, "grad_norm": 0.00019856690778397024, "learning_rate": 2.544888502751231e-06, "loss": 0.0, "step": 52450 }, { "epoch": 11.394439617723718, "grad_norm": 0.0001971104647964239, "learning_rate": 2.5358384013900957e-06, "loss": 0.0, "step": 52460 }, { "epoch": 11.396611642050392, "grad_norm": 0.00019835439161397517, "learning_rate": 2.5267883000289605e-06, "loss": 0.0, "step": 52470 }, { "epoch": 11.398783666377064, "grad_norm": 0.0002031942130997777, "learning_rate": 2.5177381986678253e-06, "loss": 0.0, "step": 52480 }, { "epoch": 11.400955690703736, "grad_norm": 0.00019665226864162832, "learning_rate": 2.5086880973066896e-06, "loss": 0.0, "step": 52490 }, { "epoch": 11.403127715030408, "grad_norm": 0.00019563220848795027, "learning_rate": 2.4996379959455548e-06, "loss": 0.0055, "step": 52500 }, { "epoch": 11.405299739357082, "grad_norm": 0.00020207982743158937, "learning_rate": 2.4905878945844195e-06, "loss": 0.0046, "step": 52510 }, { "epoch": 11.407471763683754, "grad_norm": 0.0002643620246089995, "learning_rate": 2.4815377932232843e-06, "loss": 0.0, "step": 52520 }, { "epoch": 11.409643788010426, "grad_norm": 0.0002021700784098357, "learning_rate": 2.472487691862149e-06, "loss": 0.0, "step": 52530 }, { "epoch": 11.411815812337098, "grad_norm": 0.00019614986376836896, "learning_rate": 2.463437590501014e-06, "loss": 0.0, "step": 52540 }, { "epoch": 11.41398783666377, "grad_norm": 0.00019699233234860003, "learning_rate": 2.4543874891398786e-06, "loss": 0.0, "step": 52550 }, { "epoch": 11.416159860990444, "grad_norm": 0.00019868268282152712, "learning_rate": 2.445337387778743e-06, "loss": 0.0, "step": 52560 }, { "epoch": 11.418331885317116, "grad_norm": 0.0001990678283618763, "learning_rate": 2.436287286417608e-06, "loss": 0.0, "step": 52570 }, { "epoch": 11.420503909643788, "grad_norm": 0.00019700905249919742, "learning_rate": 2.427237185056473e-06, "loss": 0.0, "step": 52580 }, { "epoch": 11.42267593397046, "grad_norm": 0.00019700094708241522, "learning_rate": 2.4181870836953372e-06, "loss": 0.0045, "step": 52590 }, { "epoch": 11.424847958297132, "grad_norm": 0.00019845672068186104, "learning_rate": 2.4091369823342024e-06, "loss": 0.0, "step": 52600 }, { "epoch": 11.427019982623806, "grad_norm": 0.00019562583474908024, "learning_rate": 2.400086880973067e-06, "loss": 0.0, "step": 52610 }, { "epoch": 11.429192006950478, "grad_norm": 0.0001981602981686592, "learning_rate": 2.391036779611932e-06, "loss": 0.0, "step": 52620 }, { "epoch": 11.43136403127715, "grad_norm": 0.000247137708356604, "learning_rate": 2.3819866782507963e-06, "loss": 0.0048, "step": 52630 }, { "epoch": 11.433536055603822, "grad_norm": 0.00019939042977057397, "learning_rate": 2.3729365768896615e-06, "loss": 0.0, "step": 52640 }, { "epoch": 11.435708079930496, "grad_norm": 0.00020174359087832272, "learning_rate": 2.3638864755285262e-06, "loss": 0.0, "step": 52650 }, { "epoch": 11.437880104257168, "grad_norm": 0.00020131657947786152, "learning_rate": 2.3548363741673906e-06, "loss": 0.0, "step": 52660 }, { "epoch": 11.44005212858384, "grad_norm": 0.0001996621285798028, "learning_rate": 2.3457862728062553e-06, "loss": 0.0, "step": 52670 }, { "epoch": 11.442224152910512, "grad_norm": 0.00019746148609556258, "learning_rate": 2.3367361714451205e-06, "loss": 0.0, "step": 52680 }, { "epoch": 11.444396177237184, "grad_norm": 0.0001999850501306355, "learning_rate": 2.327686070083985e-06, "loss": 0.0, "step": 52690 }, { "epoch": 11.446568201563858, "grad_norm": 0.0001960912486538291, "learning_rate": 2.3186359687228496e-06, "loss": 0.0, "step": 52700 }, { "epoch": 11.44874022589053, "grad_norm": 0.00019612940377555788, "learning_rate": 2.309585867361715e-06, "loss": 0.0, "step": 52710 }, { "epoch": 11.450912250217202, "grad_norm": 0.00020600967400241643, "learning_rate": 2.3005357660005796e-06, "loss": 0.0, "step": 52720 }, { "epoch": 11.453084274543874, "grad_norm": 0.0001977920619538054, "learning_rate": 2.291485664639444e-06, "loss": 0.0, "step": 52730 }, { "epoch": 11.455256298870548, "grad_norm": 0.0001973673061002046, "learning_rate": 2.2824355632783087e-06, "loss": 0.0, "step": 52740 }, { "epoch": 11.45742832319722, "grad_norm": 0.00019590802548918873, "learning_rate": 2.273385461917174e-06, "loss": 0.0036, "step": 52750 }, { "epoch": 11.459600347523892, "grad_norm": 0.0001969041331904009, "learning_rate": 2.264335360556038e-06, "loss": 0.0, "step": 52760 }, { "epoch": 11.461772371850564, "grad_norm": 0.00019703819998539984, "learning_rate": 2.255285259194903e-06, "loss": 0.0, "step": 52770 }, { "epoch": 11.463944396177236, "grad_norm": 0.0001995855272980407, "learning_rate": 2.246235157833768e-06, "loss": 0.0, "step": 52780 }, { "epoch": 11.46611642050391, "grad_norm": 0.00020003956160508096, "learning_rate": 2.2371850564726325e-06, "loss": 0.0, "step": 52790 }, { "epoch": 11.468288444830582, "grad_norm": 0.00020116106315981597, "learning_rate": 2.2281349551114972e-06, "loss": 0.0, "step": 52800 }, { "epoch": 11.470460469157254, "grad_norm": 0.00020231454982422292, "learning_rate": 2.219084853750362e-06, "loss": 0.0, "step": 52810 }, { "epoch": 11.472632493483927, "grad_norm": 0.0001936189946718514, "learning_rate": 2.210034752389227e-06, "loss": 0.0, "step": 52820 }, { "epoch": 11.474804517810599, "grad_norm": 0.0001966677518794313, "learning_rate": 2.2009846510280915e-06, "loss": 0.0, "step": 52830 }, { "epoch": 11.476976542137272, "grad_norm": 0.00019911628623958677, "learning_rate": 2.1919345496669563e-06, "loss": 0.0, "step": 52840 }, { "epoch": 11.479148566463945, "grad_norm": 0.00019401231838855892, "learning_rate": 2.182884448305821e-06, "loss": 0.0, "step": 52850 }, { "epoch": 11.481320590790617, "grad_norm": 0.00019698469259310514, "learning_rate": 2.173834346944686e-06, "loss": 0.0, "step": 52860 }, { "epoch": 11.483492615117289, "grad_norm": 0.0001935689797392115, "learning_rate": 2.1647842455835506e-06, "loss": 0.0, "step": 52870 }, { "epoch": 11.485664639443963, "grad_norm": 0.0008656844729557633, "learning_rate": 2.1557341442224153e-06, "loss": 0.0, "step": 52880 }, { "epoch": 11.487836663770635, "grad_norm": 0.00019588657596614212, "learning_rate": 2.14668404286128e-06, "loss": 0.0, "step": 52890 }, { "epoch": 11.490008688097307, "grad_norm": 0.00019638192316051573, "learning_rate": 2.137633941500145e-06, "loss": 0.0049, "step": 52900 }, { "epoch": 11.492180712423979, "grad_norm": 0.00020129536278545856, "learning_rate": 2.1285838401390096e-06, "loss": 0.0, "step": 52910 }, { "epoch": 11.49435273675065, "grad_norm": 0.00019692025671247393, "learning_rate": 2.1195337387778744e-06, "loss": 0.0, "step": 52920 }, { "epoch": 11.496524761077325, "grad_norm": 0.00019469275139272213, "learning_rate": 2.110483637416739e-06, "loss": 0.0, "step": 52930 }, { "epoch": 11.498696785403997, "grad_norm": 0.0002038736711256206, "learning_rate": 2.101433536055604e-06, "loss": 0.0, "step": 52940 }, { "epoch": 11.500868809730669, "grad_norm": 0.00019812029495369643, "learning_rate": 2.0923834346944687e-06, "loss": 0.0054, "step": 52950 }, { "epoch": 11.503040834057341, "grad_norm": 0.000194476917386055, "learning_rate": 2.0833333333333334e-06, "loss": 0.0088, "step": 52960 }, { "epoch": 11.505212858384013, "grad_norm": 0.0001952751917997375, "learning_rate": 2.074283231972198e-06, "loss": 0.0, "step": 52970 }, { "epoch": 11.507384882710687, "grad_norm": 0.0001947148412000388, "learning_rate": 2.065233130611063e-06, "loss": 0.0, "step": 52980 }, { "epoch": 11.509556907037359, "grad_norm": 0.00020383935770951211, "learning_rate": 2.0561830292499277e-06, "loss": 0.0043, "step": 52990 }, { "epoch": 11.511728931364031, "grad_norm": 0.0002019301027758047, "learning_rate": 2.0471329278887925e-06, "loss": 0.0, "step": 53000 }, { "epoch": 11.513900955690703, "grad_norm": 0.000210135942324996, "learning_rate": 2.0380828265276572e-06, "loss": 0.0, "step": 53010 }, { "epoch": 11.516072980017377, "grad_norm": 0.00020146237511653453, "learning_rate": 2.029032725166522e-06, "loss": 0.0, "step": 53020 }, { "epoch": 11.518245004344049, "grad_norm": 0.0004484684322960675, "learning_rate": 2.0199826238053868e-06, "loss": 0.0, "step": 53030 }, { "epoch": 11.520417028670721, "grad_norm": 0.0001989894371945411, "learning_rate": 2.0109325224442515e-06, "loss": 0.0, "step": 53040 }, { "epoch": 11.522589052997393, "grad_norm": 0.00019518462067935616, "learning_rate": 2.0018824210831163e-06, "loss": 0.0, "step": 53050 }, { "epoch": 11.524761077324065, "grad_norm": 0.00031708358437754214, "learning_rate": 1.992832319721981e-06, "loss": 0.0, "step": 53060 }, { "epoch": 11.526933101650739, "grad_norm": 0.00019694925867952406, "learning_rate": 1.983782218360846e-06, "loss": 0.0, "step": 53070 }, { "epoch": 11.529105125977411, "grad_norm": 0.00019491143757477403, "learning_rate": 1.9747321169997106e-06, "loss": 0.0, "step": 53080 }, { "epoch": 11.531277150304083, "grad_norm": 0.00019578862702473998, "learning_rate": 1.965682015638575e-06, "loss": 0.0, "step": 53090 }, { "epoch": 11.533449174630755, "grad_norm": 0.0002560818975325674, "learning_rate": 1.95663191427744e-06, "loss": 0.0, "step": 53100 }, { "epoch": 11.535621198957429, "grad_norm": 0.00019754750246647745, "learning_rate": 1.947581812916305e-06, "loss": 0.0, "step": 53110 }, { "epoch": 11.537793223284101, "grad_norm": 0.00020018761279061437, "learning_rate": 1.9385317115551696e-06, "loss": 0.0, "step": 53120 }, { "epoch": 11.539965247610773, "grad_norm": 0.00019547737610992044, "learning_rate": 1.9294816101940344e-06, "loss": 0.0, "step": 53130 }, { "epoch": 11.542137271937445, "grad_norm": 0.00019425964273978025, "learning_rate": 1.920431508832899e-06, "loss": 0.0, "step": 53140 }, { "epoch": 11.544309296264117, "grad_norm": 0.00020175384997855872, "learning_rate": 1.911381407471764e-06, "loss": 0.0, "step": 53150 }, { "epoch": 11.546481320590791, "grad_norm": 0.00025675594224594533, "learning_rate": 1.9023313061106285e-06, "loss": 0.0, "step": 53160 }, { "epoch": 11.548653344917463, "grad_norm": 0.0001961501402547583, "learning_rate": 1.8932812047494932e-06, "loss": 0.0, "step": 53170 }, { "epoch": 11.550825369244135, "grad_norm": 0.0002024386340053752, "learning_rate": 1.8842311033883582e-06, "loss": 0.0, "step": 53180 }, { "epoch": 11.552997393570807, "grad_norm": 0.0002116846153512597, "learning_rate": 1.8751810020272225e-06, "loss": 0.0, "step": 53190 }, { "epoch": 11.555169417897481, "grad_norm": 0.00024865844170562923, "learning_rate": 1.8661309006660875e-06, "loss": 0.0, "step": 53200 }, { "epoch": 11.557341442224153, "grad_norm": 0.00020497996592894197, "learning_rate": 1.8570807993049523e-06, "loss": 0.0, "step": 53210 }, { "epoch": 11.559513466550825, "grad_norm": 0.0003205514221917838, "learning_rate": 1.8480306979438173e-06, "loss": 0.0043, "step": 53220 }, { "epoch": 11.561685490877498, "grad_norm": 0.00035617026151157916, "learning_rate": 1.8389805965826818e-06, "loss": 0.0, "step": 53230 }, { "epoch": 11.56385751520417, "grad_norm": 0.00019723277364391834, "learning_rate": 1.8299304952215466e-06, "loss": 0.0, "step": 53240 }, { "epoch": 11.566029539530843, "grad_norm": 0.00019615769269876182, "learning_rate": 1.8208803938604115e-06, "loss": 0.0, "step": 53250 }, { "epoch": 11.568201563857516, "grad_norm": 0.00026189288473688066, "learning_rate": 1.8118302924992759e-06, "loss": 0.0052, "step": 53260 }, { "epoch": 11.570373588184188, "grad_norm": 0.0003638887428678572, "learning_rate": 1.8027801911381409e-06, "loss": 0.0, "step": 53270 }, { "epoch": 11.57254561251086, "grad_norm": 0.0002062423445750028, "learning_rate": 1.7937300897770056e-06, "loss": 0.0, "step": 53280 }, { "epoch": 11.574717636837534, "grad_norm": 0.0001934427273226902, "learning_rate": 1.7846799884158702e-06, "loss": 0.0, "step": 53290 }, { "epoch": 11.576889661164206, "grad_norm": 0.00019680126570165157, "learning_rate": 1.7756298870547351e-06, "loss": 0.0, "step": 53300 }, { "epoch": 11.579061685490878, "grad_norm": 0.0001950986625161022, "learning_rate": 1.7665797856936e-06, "loss": 0.0, "step": 53310 }, { "epoch": 11.58123370981755, "grad_norm": 0.14624013006687164, "learning_rate": 1.7575296843324647e-06, "loss": 0.009, "step": 53320 }, { "epoch": 11.583405734144222, "grad_norm": 0.0001986539427889511, "learning_rate": 1.7484795829713292e-06, "loss": 0.0, "step": 53330 }, { "epoch": 11.585577758470896, "grad_norm": 0.00019793520914390683, "learning_rate": 1.7394294816101942e-06, "loss": 0.0, "step": 53340 }, { "epoch": 11.587749782797568, "grad_norm": 0.00019655383948702365, "learning_rate": 1.730379380249059e-06, "loss": 0.0, "step": 53350 }, { "epoch": 11.58992180712424, "grad_norm": 0.00019772254745475948, "learning_rate": 1.7213292788879235e-06, "loss": 0.0039, "step": 53360 }, { "epoch": 11.592093831450912, "grad_norm": 0.00019759469432756305, "learning_rate": 1.7122791775267883e-06, "loss": 0.0, "step": 53370 }, { "epoch": 11.594265855777584, "grad_norm": 0.0001963729882845655, "learning_rate": 1.7032290761656532e-06, "loss": 0.0, "step": 53380 }, { "epoch": 11.596437880104258, "grad_norm": 0.00021433483925648034, "learning_rate": 1.6941789748045178e-06, "loss": 0.0, "step": 53390 }, { "epoch": 11.59860990443093, "grad_norm": 0.0002027168811764568, "learning_rate": 1.6851288734433826e-06, "loss": 0.0047, "step": 53400 }, { "epoch": 11.600781928757602, "grad_norm": 0.00019676884403452277, "learning_rate": 1.6760787720822475e-06, "loss": 0.0, "step": 53410 }, { "epoch": 11.602953953084274, "grad_norm": 0.0002005890419241041, "learning_rate": 1.6670286707211123e-06, "loss": 0.0, "step": 53420 }, { "epoch": 11.605125977410946, "grad_norm": 0.00019666865409817547, "learning_rate": 1.6579785693599769e-06, "loss": 0.0, "step": 53430 }, { "epoch": 11.60729800173762, "grad_norm": 0.00019894151773769408, "learning_rate": 1.6489284679988416e-06, "loss": 0.0, "step": 53440 }, { "epoch": 11.609470026064292, "grad_norm": 0.0002711409470066428, "learning_rate": 1.6398783666377066e-06, "loss": 0.0, "step": 53450 }, { "epoch": 11.611642050390964, "grad_norm": 0.00020063733973074704, "learning_rate": 1.6308282652765711e-06, "loss": 0.0, "step": 53460 }, { "epoch": 11.613814074717636, "grad_norm": 0.00024653473519720137, "learning_rate": 1.621778163915436e-06, "loss": 0.0, "step": 53470 }, { "epoch": 11.61598609904431, "grad_norm": 0.0002731581625994295, "learning_rate": 1.6127280625543009e-06, "loss": 0.0, "step": 53480 }, { "epoch": 11.618158123370982, "grad_norm": 0.00021144855418242514, "learning_rate": 1.6036779611931652e-06, "loss": 0.0, "step": 53490 }, { "epoch": 11.620330147697654, "grad_norm": 0.0002458023955114186, "learning_rate": 1.5946278598320302e-06, "loss": 0.0, "step": 53500 }, { "epoch": 11.622502172024326, "grad_norm": 0.00019741586584132165, "learning_rate": 1.585577758470895e-06, "loss": 0.0, "step": 53510 }, { "epoch": 11.624674196350998, "grad_norm": 0.00020069196762051433, "learning_rate": 1.57652765710976e-06, "loss": 0.0, "step": 53520 }, { "epoch": 11.626846220677672, "grad_norm": 0.00023551438061986119, "learning_rate": 1.5674775557486245e-06, "loss": 0.0, "step": 53530 }, { "epoch": 11.629018245004344, "grad_norm": 0.00019561080262064934, "learning_rate": 1.5584274543874892e-06, "loss": 0.0, "step": 53540 }, { "epoch": 11.631190269331016, "grad_norm": 0.0001954359613591805, "learning_rate": 1.549377353026354e-06, "loss": 0.0, "step": 53550 }, { "epoch": 11.633362293657688, "grad_norm": 0.00019509869161993265, "learning_rate": 1.5403272516652188e-06, "loss": 0.0, "step": 53560 }, { "epoch": 11.635534317984362, "grad_norm": 0.0003004560712724924, "learning_rate": 1.5312771503040835e-06, "loss": 0.0, "step": 53570 }, { "epoch": 11.637706342311034, "grad_norm": 0.0001922779920278117, "learning_rate": 1.522227048942948e-06, "loss": 0.0, "step": 53580 }, { "epoch": 11.639878366637706, "grad_norm": 0.0001934824831550941, "learning_rate": 1.513176947581813e-06, "loss": 0.0045, "step": 53590 }, { "epoch": 11.642050390964378, "grad_norm": 0.00019457947928458452, "learning_rate": 1.5041268462206776e-06, "loss": 0.0, "step": 53600 }, { "epoch": 11.64422241529105, "grad_norm": 0.0001975044870050624, "learning_rate": 1.4950767448595426e-06, "loss": 0.0, "step": 53610 }, { "epoch": 11.646394439617724, "grad_norm": 0.00019748661725316197, "learning_rate": 1.4860266434984073e-06, "loss": 0.0, "step": 53620 }, { "epoch": 11.648566463944396, "grad_norm": 0.0003096135624218732, "learning_rate": 1.4769765421372719e-06, "loss": 0.0, "step": 53630 }, { "epoch": 11.650738488271069, "grad_norm": 0.00019721903663594276, "learning_rate": 1.4679264407761369e-06, "loss": 0.0, "step": 53640 }, { "epoch": 11.65291051259774, "grad_norm": 0.00019730576605070382, "learning_rate": 1.4588763394150014e-06, "loss": 0.0, "step": 53650 }, { "epoch": 11.655082536924414, "grad_norm": 0.00019422029436100274, "learning_rate": 1.4498262380538664e-06, "loss": 0.0, "step": 53660 }, { "epoch": 11.657254561251086, "grad_norm": 0.00019854224228765815, "learning_rate": 1.440776136692731e-06, "loss": 0.0, "step": 53670 }, { "epoch": 11.659426585577759, "grad_norm": 0.000195562926819548, "learning_rate": 1.4317260353315957e-06, "loss": 0.0, "step": 53680 }, { "epoch": 11.66159860990443, "grad_norm": 0.00019524309027474374, "learning_rate": 1.4226759339704605e-06, "loss": 0.0051, "step": 53690 }, { "epoch": 11.663770634231103, "grad_norm": 0.0001945122639881447, "learning_rate": 1.4136258326093252e-06, "loss": 0.0, "step": 53700 }, { "epoch": 11.665942658557777, "grad_norm": 0.00041479626088403165, "learning_rate": 1.4045757312481902e-06, "loss": 0.0, "step": 53710 }, { "epoch": 11.668114682884449, "grad_norm": 0.00019896173034794629, "learning_rate": 1.3955256298870548e-06, "loss": 0.0, "step": 53720 }, { "epoch": 11.67028670721112, "grad_norm": 0.00019946540123783052, "learning_rate": 1.3864755285259195e-06, "loss": 0.0, "step": 53730 }, { "epoch": 11.672458731537793, "grad_norm": 0.00019381535821594298, "learning_rate": 1.3774254271647843e-06, "loss": 0.0048, "step": 53740 }, { "epoch": 11.674630755864465, "grad_norm": 0.0001966750860447064, "learning_rate": 1.368375325803649e-06, "loss": 0.0, "step": 53750 }, { "epoch": 11.676802780191139, "grad_norm": 0.0001956068881554529, "learning_rate": 1.3593252244425138e-06, "loss": 0.0, "step": 53760 }, { "epoch": 11.67897480451781, "grad_norm": 0.0001950589648913592, "learning_rate": 1.3502751230813786e-06, "loss": 0.0, "step": 53770 }, { "epoch": 11.681146828844483, "grad_norm": 0.00019512952712830156, "learning_rate": 1.3412250217202433e-06, "loss": 0.0093, "step": 53780 }, { "epoch": 11.683318853171155, "grad_norm": 0.00019810015510302037, "learning_rate": 1.332174920359108e-06, "loss": 0.0, "step": 53790 }, { "epoch": 11.685490877497829, "grad_norm": 0.00019662882550619543, "learning_rate": 1.3231248189979729e-06, "loss": 0.0045, "step": 53800 }, { "epoch": 11.6876629018245, "grad_norm": 0.0001998993247980252, "learning_rate": 1.3140747176368376e-06, "loss": 0.0, "step": 53810 }, { "epoch": 11.689834926151173, "grad_norm": 0.0001943770475918427, "learning_rate": 1.3050246162757024e-06, "loss": 0.0, "step": 53820 }, { "epoch": 11.692006950477845, "grad_norm": 0.0002038269303739071, "learning_rate": 1.295974514914567e-06, "loss": 0.0, "step": 53830 }, { "epoch": 11.694178974804517, "grad_norm": 0.00019428586529102176, "learning_rate": 1.286924413553432e-06, "loss": 0.0, "step": 53840 }, { "epoch": 11.696350999131191, "grad_norm": 0.00023232153034768999, "learning_rate": 1.2778743121922967e-06, "loss": 0.0, "step": 53850 }, { "epoch": 11.698523023457863, "grad_norm": 0.00020070855680387467, "learning_rate": 1.2688242108311614e-06, "loss": 0.0, "step": 53860 }, { "epoch": 11.700695047784535, "grad_norm": 0.0002596253762021661, "learning_rate": 1.2597741094700262e-06, "loss": 0.0, "step": 53870 }, { "epoch": 11.702867072111207, "grad_norm": 0.00019496695313137025, "learning_rate": 1.2507240081088907e-06, "loss": 0.0, "step": 53880 }, { "epoch": 11.70503909643788, "grad_norm": 0.00021064665634185076, "learning_rate": 1.2416739067477557e-06, "loss": 0.0, "step": 53890 }, { "epoch": 11.707211120764553, "grad_norm": 0.00019595645426306874, "learning_rate": 1.2326238053866203e-06, "loss": 0.0, "step": 53900 }, { "epoch": 11.709383145091225, "grad_norm": 0.0001996116479858756, "learning_rate": 1.2235737040254852e-06, "loss": 0.0, "step": 53910 }, { "epoch": 11.711555169417897, "grad_norm": 0.0001989005832001567, "learning_rate": 1.2145236026643498e-06, "loss": 0.0, "step": 53920 }, { "epoch": 11.71372719374457, "grad_norm": 0.0001964517723536119, "learning_rate": 1.2054735013032146e-06, "loss": 0.004, "step": 53930 }, { "epoch": 11.715899218071243, "grad_norm": 0.00024799612583592534, "learning_rate": 1.1964233999420795e-06, "loss": 0.0036, "step": 53940 }, { "epoch": 11.718071242397915, "grad_norm": 0.00019944515952374786, "learning_rate": 1.187373298580944e-06, "loss": 0.0, "step": 53950 }, { "epoch": 11.720243266724587, "grad_norm": 0.00020389580458868295, "learning_rate": 1.178323197219809e-06, "loss": 0.0042, "step": 53960 }, { "epoch": 11.72241529105126, "grad_norm": 0.00019486738892737776, "learning_rate": 1.1692730958586736e-06, "loss": 0.0, "step": 53970 }, { "epoch": 11.724587315377931, "grad_norm": 0.00019491862622089684, "learning_rate": 1.1602229944975384e-06, "loss": 0.0046, "step": 53980 }, { "epoch": 11.726759339704605, "grad_norm": 0.00019389142107684165, "learning_rate": 1.1511728931364031e-06, "loss": 0.0, "step": 53990 }, { "epoch": 11.728931364031277, "grad_norm": 0.00019837978470604867, "learning_rate": 1.142122791775268e-06, "loss": 0.0, "step": 54000 }, { "epoch": 11.73110338835795, "grad_norm": 0.00021431567438412458, "learning_rate": 1.1330726904141327e-06, "loss": 0.0, "step": 54010 }, { "epoch": 11.733275412684621, "grad_norm": 0.00024961764574982226, "learning_rate": 1.1240225890529974e-06, "loss": 0.0, "step": 54020 }, { "epoch": 11.735447437011295, "grad_norm": 0.00020189674978610128, "learning_rate": 1.1149724876918622e-06, "loss": 0.0, "step": 54030 }, { "epoch": 11.737619461337967, "grad_norm": 0.000198492401978001, "learning_rate": 1.105922386330727e-06, "loss": 0.0, "step": 54040 }, { "epoch": 11.73979148566464, "grad_norm": 0.00026981427799910307, "learning_rate": 1.0968722849695917e-06, "loss": 0.0, "step": 54050 }, { "epoch": 11.741963509991312, "grad_norm": 0.002462733769789338, "learning_rate": 1.0878221836084565e-06, "loss": 0.0, "step": 54060 }, { "epoch": 11.744135534317984, "grad_norm": 0.0001970611629076302, "learning_rate": 1.0787720822473212e-06, "loss": 0.0, "step": 54070 }, { "epoch": 11.746307558644657, "grad_norm": 0.0001956707565113902, "learning_rate": 1.069721980886186e-06, "loss": 0.0, "step": 54080 }, { "epoch": 11.74847958297133, "grad_norm": 0.00031365029281005263, "learning_rate": 1.0606718795250508e-06, "loss": 0.0, "step": 54090 }, { "epoch": 11.750651607298002, "grad_norm": 0.00019289724878035486, "learning_rate": 1.0516217781639155e-06, "loss": 0.0, "step": 54100 }, { "epoch": 11.752823631624674, "grad_norm": 0.00020483179832808673, "learning_rate": 1.0425716768027803e-06, "loss": 0.0055, "step": 54110 }, { "epoch": 11.754995655951348, "grad_norm": 0.00019535243336576968, "learning_rate": 1.033521575441645e-06, "loss": 0.0051, "step": 54120 }, { "epoch": 11.75716768027802, "grad_norm": 0.00019420160970184952, "learning_rate": 1.0244714740805096e-06, "loss": 0.0, "step": 54130 }, { "epoch": 11.759339704604692, "grad_norm": 0.0002710081171244383, "learning_rate": 1.0154213727193746e-06, "loss": 0.0, "step": 54140 }, { "epoch": 11.761511728931364, "grad_norm": 0.00019317958503961563, "learning_rate": 1.0063712713582393e-06, "loss": 0.0049, "step": 54150 }, { "epoch": 11.763683753258036, "grad_norm": 0.0001933265448315069, "learning_rate": 9.97321169997104e-07, "loss": 0.0, "step": 54160 }, { "epoch": 11.76585577758471, "grad_norm": 0.00019155530026182532, "learning_rate": 9.882710686359689e-07, "loss": 0.0, "step": 54170 }, { "epoch": 11.768027801911382, "grad_norm": 0.0002440862444927916, "learning_rate": 9.792209672748334e-07, "loss": 0.0, "step": 54180 }, { "epoch": 11.770199826238054, "grad_norm": 0.00020259429584257305, "learning_rate": 9.701708659136984e-07, "loss": 0.0, "step": 54190 }, { "epoch": 11.772371850564726, "grad_norm": 0.00019361911108717322, "learning_rate": 9.61120764552563e-07, "loss": 0.0, "step": 54200 }, { "epoch": 11.774543874891398, "grad_norm": 0.00019974037422798574, "learning_rate": 9.520706631914279e-07, "loss": 0.0053, "step": 54210 }, { "epoch": 11.776715899218072, "grad_norm": 0.0001930526486830786, "learning_rate": 9.430205618302926e-07, "loss": 0.0, "step": 54220 }, { "epoch": 11.778887923544744, "grad_norm": 0.00019227658049203455, "learning_rate": 9.339704604691572e-07, "loss": 0.0048, "step": 54230 }, { "epoch": 11.781059947871416, "grad_norm": 0.00019346507906448096, "learning_rate": 9.249203591080221e-07, "loss": 0.0, "step": 54240 }, { "epoch": 11.783231972198088, "grad_norm": 0.0002069149340968579, "learning_rate": 9.158702577468867e-07, "loss": 0.0046, "step": 54250 }, { "epoch": 11.785403996524762, "grad_norm": 0.00019784543837886304, "learning_rate": 9.068201563857516e-07, "loss": 0.0, "step": 54260 }, { "epoch": 11.787576020851434, "grad_norm": 0.0001948605931829661, "learning_rate": 8.986750651607298e-07, "loss": 0.0, "step": 54270 }, { "epoch": 11.789748045178106, "grad_norm": 0.00019112876907456666, "learning_rate": 8.896249637995947e-07, "loss": 0.0, "step": 54280 }, { "epoch": 11.791920069504778, "grad_norm": 0.00019701290875673294, "learning_rate": 8.805748624384594e-07, "loss": 0.0, "step": 54290 }, { "epoch": 11.79409209383145, "grad_norm": 0.00027486091130413115, "learning_rate": 8.71524761077324e-07, "loss": 0.0, "step": 54300 }, { "epoch": 11.796264118158124, "grad_norm": 0.0002852969046216458, "learning_rate": 8.624746597161889e-07, "loss": 0.0, "step": 54310 }, { "epoch": 11.798436142484796, "grad_norm": 0.00019162050739396363, "learning_rate": 8.534245583550535e-07, "loss": 0.0, "step": 54320 }, { "epoch": 11.800608166811468, "grad_norm": 0.0002552253135945648, "learning_rate": 8.443744569939184e-07, "loss": 0.0043, "step": 54330 }, { "epoch": 11.80278019113814, "grad_norm": 0.00023266920470632613, "learning_rate": 8.353243556327831e-07, "loss": 0.0, "step": 54340 }, { "epoch": 11.804952215464812, "grad_norm": 0.00019286252791061997, "learning_rate": 8.262742542716478e-07, "loss": 0.0, "step": 54350 }, { "epoch": 11.807124239791486, "grad_norm": 0.00029586063465103507, "learning_rate": 8.172241529105127e-07, "loss": 0.0, "step": 54360 }, { "epoch": 11.809296264118158, "grad_norm": 0.00019190393504686654, "learning_rate": 8.081740515493774e-07, "loss": 0.0, "step": 54370 }, { "epoch": 11.81146828844483, "grad_norm": 0.00025092356372624636, "learning_rate": 7.991239501882422e-07, "loss": 0.0, "step": 54380 }, { "epoch": 11.813640312771502, "grad_norm": 0.00019765045726671815, "learning_rate": 7.900738488271069e-07, "loss": 0.0, "step": 54390 }, { "epoch": 11.815812337098176, "grad_norm": 0.00019178666116204113, "learning_rate": 7.810237474659716e-07, "loss": 0.0087, "step": 54400 }, { "epoch": 11.817984361424848, "grad_norm": 0.0002000819513341412, "learning_rate": 7.719736461048364e-07, "loss": 0.0, "step": 54410 }, { "epoch": 11.82015638575152, "grad_norm": 0.00020233175018802285, "learning_rate": 7.629235447437012e-07, "loss": 0.0, "step": 54420 }, { "epoch": 11.822328410078192, "grad_norm": 0.00019419100135564804, "learning_rate": 7.538734433825659e-07, "loss": 0.0, "step": 54430 }, { "epoch": 11.824500434404865, "grad_norm": 0.00020138765103183687, "learning_rate": 7.448233420214307e-07, "loss": 0.0, "step": 54440 }, { "epoch": 11.826672458731538, "grad_norm": 0.00019825338677037507, "learning_rate": 7.357732406602955e-07, "loss": 0.0, "step": 54450 }, { "epoch": 11.82884448305821, "grad_norm": 0.0001928244309965521, "learning_rate": 7.267231392991602e-07, "loss": 0.0, "step": 54460 }, { "epoch": 11.831016507384883, "grad_norm": 0.00019557155610527843, "learning_rate": 7.176730379380249e-07, "loss": 0.0, "step": 54470 }, { "epoch": 11.833188531711555, "grad_norm": 0.00019326162873767316, "learning_rate": 7.086229365768896e-07, "loss": 0.0, "step": 54480 }, { "epoch": 11.835360556038228, "grad_norm": 0.00019357928249519318, "learning_rate": 6.995728352157544e-07, "loss": 0.0, "step": 54490 }, { "epoch": 11.8375325803649, "grad_norm": 0.00025056168669834733, "learning_rate": 6.905227338546192e-07, "loss": 0.0, "step": 54500 }, { "epoch": 11.839704604691573, "grad_norm": 0.1530565619468689, "learning_rate": 6.81472632493484e-07, "loss": 0.004, "step": 54510 }, { "epoch": 11.841876629018245, "grad_norm": 0.0001981578243430704, "learning_rate": 6.724225311323487e-07, "loss": 0.0, "step": 54520 }, { "epoch": 11.844048653344917, "grad_norm": 0.0001922385417856276, "learning_rate": 6.633724297712135e-07, "loss": 0.0, "step": 54530 }, { "epoch": 11.84622067767159, "grad_norm": 0.00022527927649207413, "learning_rate": 6.543223284100782e-07, "loss": 0.0, "step": 54540 }, { "epoch": 11.848392701998263, "grad_norm": 0.00019796429842244834, "learning_rate": 6.45272227048943e-07, "loss": 0.0, "step": 54550 }, { "epoch": 11.850564726324935, "grad_norm": 0.00025319092674180865, "learning_rate": 6.362221256878077e-07, "loss": 0.0, "step": 54560 }, { "epoch": 11.852736750651607, "grad_norm": 0.0001936436165124178, "learning_rate": 6.271720243266724e-07, "loss": 0.0044, "step": 54570 }, { "epoch": 11.85490877497828, "grad_norm": 0.0001932820159709081, "learning_rate": 6.181219229655373e-07, "loss": 0.0, "step": 54580 }, { "epoch": 11.857080799304953, "grad_norm": 0.00019361200975254178, "learning_rate": 6.09071821604402e-07, "loss": 0.0, "step": 54590 }, { "epoch": 11.859252823631625, "grad_norm": 0.00020959909306839108, "learning_rate": 6.000217202432668e-07, "loss": 0.0, "step": 54600 }, { "epoch": 11.861424847958297, "grad_norm": 0.00019550872093532234, "learning_rate": 5.909716188821316e-07, "loss": 0.0, "step": 54610 }, { "epoch": 11.863596872284969, "grad_norm": 0.0002506279561202973, "learning_rate": 5.819215175209962e-07, "loss": 0.0045, "step": 54620 }, { "epoch": 11.865768896611643, "grad_norm": 0.0002453475899528712, "learning_rate": 5.72871416159861e-07, "loss": 0.0, "step": 54630 }, { "epoch": 11.867940920938315, "grad_norm": 0.00019125892140436918, "learning_rate": 5.638213147987257e-07, "loss": 0.0, "step": 54640 }, { "epoch": 11.870112945264987, "grad_norm": 0.00019863221677951515, "learning_rate": 5.547712134375905e-07, "loss": 0.0, "step": 54650 }, { "epoch": 11.872284969591659, "grad_norm": 0.00019293044169899076, "learning_rate": 5.457211120764553e-07, "loss": 0.0, "step": 54660 }, { "epoch": 11.874456993918331, "grad_norm": 0.00019518301996868104, "learning_rate": 5.3667101071532e-07, "loss": 0.0, "step": 54670 }, { "epoch": 11.876629018245005, "grad_norm": 0.00019262121350038797, "learning_rate": 5.276209093541848e-07, "loss": 0.0048, "step": 54680 }, { "epoch": 11.878801042571677, "grad_norm": 0.00019273081852588803, "learning_rate": 5.185708079930495e-07, "loss": 0.0, "step": 54690 }, { "epoch": 11.880973066898349, "grad_norm": 0.0001927847770275548, "learning_rate": 5.095207066319143e-07, "loss": 0.005, "step": 54700 }, { "epoch": 11.883145091225021, "grad_norm": 0.0001937558117788285, "learning_rate": 5.004706052707791e-07, "loss": 0.0, "step": 54710 }, { "epoch": 11.885317115551695, "grad_norm": 0.00019255632651038468, "learning_rate": 4.914205039096437e-07, "loss": 0.0, "step": 54720 }, { "epoch": 11.887489139878367, "grad_norm": 0.00024727650452405214, "learning_rate": 4.823704025485086e-07, "loss": 0.0, "step": 54730 }, { "epoch": 11.88966116420504, "grad_norm": 0.00019835654529742897, "learning_rate": 4.733203011873733e-07, "loss": 0.0, "step": 54740 }, { "epoch": 11.891833188531711, "grad_norm": 0.00019501452334225178, "learning_rate": 4.6427019982623807e-07, "loss": 0.0, "step": 54750 }, { "epoch": 11.894005212858383, "grad_norm": 0.00020195850811433047, "learning_rate": 4.552200984651029e-07, "loss": 0.0, "step": 54760 }, { "epoch": 11.896177237185057, "grad_norm": 0.17142532765865326, "learning_rate": 4.4616999710396754e-07, "loss": 0.0047, "step": 54770 }, { "epoch": 11.89834926151173, "grad_norm": 0.0001938289642566815, "learning_rate": 4.371198957428323e-07, "loss": 0.0, "step": 54780 }, { "epoch": 11.900521285838401, "grad_norm": 0.00020454356854315847, "learning_rate": 4.2806979438169707e-07, "loss": 0.0, "step": 54790 }, { "epoch": 11.902693310165073, "grad_norm": 0.043572086840867996, "learning_rate": 4.190196930205619e-07, "loss": 0.0, "step": 54800 }, { "epoch": 11.904865334491745, "grad_norm": 0.00019681689445860684, "learning_rate": 4.0996959165942665e-07, "loss": 0.0, "step": 54810 }, { "epoch": 11.90703735881842, "grad_norm": 0.0002530421188566834, "learning_rate": 4.009194902982913e-07, "loss": 0.0094, "step": 54820 }, { "epoch": 11.909209383145091, "grad_norm": 0.00019192055333405733, "learning_rate": 3.918693889371561e-07, "loss": 0.0, "step": 54830 }, { "epoch": 11.911381407471763, "grad_norm": 0.00019355231779627502, "learning_rate": 3.828192875760209e-07, "loss": 0.0, "step": 54840 }, { "epoch": 11.913553431798436, "grad_norm": 0.00023093956406228244, "learning_rate": 3.7376918621488564e-07, "loss": 0.0, "step": 54850 }, { "epoch": 11.91572545612511, "grad_norm": 0.00019186771532986313, "learning_rate": 3.6471908485375035e-07, "loss": 0.0, "step": 54860 }, { "epoch": 11.917897480451781, "grad_norm": 0.00019328697817400098, "learning_rate": 3.556689834926151e-07, "loss": 0.0, "step": 54870 }, { "epoch": 11.920069504778454, "grad_norm": 0.000195430257008411, "learning_rate": 3.466188821314799e-07, "loss": 0.0, "step": 54880 }, { "epoch": 11.922241529105126, "grad_norm": 0.00019267095194663852, "learning_rate": 3.3756878077034464e-07, "loss": 0.0, "step": 54890 }, { "epoch": 11.924413553431798, "grad_norm": 0.00019316418911330402, "learning_rate": 3.285186794092094e-07, "loss": 0.0, "step": 54900 }, { "epoch": 11.926585577758472, "grad_norm": 0.00019968993728980422, "learning_rate": 3.1946857804807417e-07, "loss": 0.0044, "step": 54910 }, { "epoch": 11.928757602085144, "grad_norm": 0.00019761281146202236, "learning_rate": 3.1041847668693893e-07, "loss": 0.0, "step": 54920 }, { "epoch": 11.930929626411816, "grad_norm": 0.16718927025794983, "learning_rate": 3.0136837532580364e-07, "loss": 0.0048, "step": 54930 }, { "epoch": 11.933101650738488, "grad_norm": 0.00022303135483525693, "learning_rate": 2.923182739646684e-07, "loss": 0.0, "step": 54940 }, { "epoch": 11.935273675065162, "grad_norm": 0.00019292996148578823, "learning_rate": 2.8326817260353316e-07, "loss": 0.0, "step": 54950 }, { "epoch": 11.937445699391834, "grad_norm": 0.0001956681371666491, "learning_rate": 2.7421807124239793e-07, "loss": 0.0, "step": 54960 }, { "epoch": 11.939617723718506, "grad_norm": 0.00019297373364679515, "learning_rate": 2.651679698812627e-07, "loss": 0.0, "step": 54970 }, { "epoch": 11.941789748045178, "grad_norm": 0.00019810539379250258, "learning_rate": 2.561178685201274e-07, "loss": 0.0, "step": 54980 }, { "epoch": 11.94396177237185, "grad_norm": 0.0001998866646317765, "learning_rate": 2.470677671589922e-07, "loss": 0.0, "step": 54990 }, { "epoch": 11.946133796698524, "grad_norm": 0.00024236796889454126, "learning_rate": 2.3801766579785698e-07, "loss": 0.0, "step": 55000 }, { "epoch": 11.948305821025196, "grad_norm": 0.00019588737632147968, "learning_rate": 2.2896756443672169e-07, "loss": 0.0048, "step": 55010 }, { "epoch": 11.950477845351868, "grad_norm": 0.00029732659459114075, "learning_rate": 2.1991746307558648e-07, "loss": 0.0, "step": 55020 }, { "epoch": 11.95264986967854, "grad_norm": 0.00019339253776706755, "learning_rate": 2.1086736171445119e-07, "loss": 0.0, "step": 55030 }, { "epoch": 11.954821894005214, "grad_norm": 0.0001921090151881799, "learning_rate": 2.0181726035331597e-07, "loss": 0.005, "step": 55040 }, { "epoch": 11.956993918331886, "grad_norm": 0.00019395742856431752, "learning_rate": 1.927671589921807e-07, "loss": 0.0, "step": 55050 }, { "epoch": 11.959165942658558, "grad_norm": 0.00019353099924046546, "learning_rate": 1.8371705763104547e-07, "loss": 0.0, "step": 55060 }, { "epoch": 11.96133796698523, "grad_norm": 0.00025086343521252275, "learning_rate": 1.7466695626991024e-07, "loss": 0.0, "step": 55070 }, { "epoch": 11.963509991311902, "grad_norm": 0.0002000442473217845, "learning_rate": 1.65616854908775e-07, "loss": 0.0, "step": 55080 }, { "epoch": 11.965682015638576, "grad_norm": 0.00019300452549941838, "learning_rate": 1.5656675354763973e-07, "loss": 0.0, "step": 55090 }, { "epoch": 11.967854039965248, "grad_norm": 0.000195394764887169, "learning_rate": 1.475166521865045e-07, "loss": 0.0048, "step": 55100 }, { "epoch": 11.97002606429192, "grad_norm": 0.00020142064022365957, "learning_rate": 1.3846655082536923e-07, "loss": 0.0044, "step": 55110 }, { "epoch": 11.972198088618592, "grad_norm": 0.00020469767332542688, "learning_rate": 1.2941644946423402e-07, "loss": 0.0, "step": 55120 }, { "epoch": 11.974370112945264, "grad_norm": 0.00019746186444535851, "learning_rate": 1.2036634810309876e-07, "loss": 0.0, "step": 55130 }, { "epoch": 11.976542137271938, "grad_norm": 0.00019035911827813834, "learning_rate": 1.1131624674196352e-07, "loss": 0.0, "step": 55140 }, { "epoch": 11.97871416159861, "grad_norm": 0.00019300123676657677, "learning_rate": 1.0226614538082827e-07, "loss": 0.0, "step": 55150 }, { "epoch": 11.980886185925282, "grad_norm": 0.00021308429131750017, "learning_rate": 9.321604401969303e-08, "loss": 0.0, "step": 55160 }, { "epoch": 11.983058210251954, "grad_norm": 0.00019624890410341322, "learning_rate": 8.416594265855778e-08, "loss": 0.0, "step": 55170 }, { "epoch": 11.985230234578626, "grad_norm": 0.0002495882799848914, "learning_rate": 7.511584129742253e-08, "loss": 0.0, "step": 55180 }, { "epoch": 11.9874022589053, "grad_norm": 0.0002517025568522513, "learning_rate": 6.60657399362873e-08, "loss": 0.0, "step": 55190 }, { "epoch": 11.989574283231972, "grad_norm": 0.0002455560024827719, "learning_rate": 5.7015638575152043e-08, "loss": 0.0, "step": 55200 }, { "epoch": 11.991746307558644, "grad_norm": 0.0002454043715260923, "learning_rate": 4.79655372140168e-08, "loss": 0.0, "step": 55210 }, { "epoch": 11.993918331885316, "grad_norm": 0.00019305164460092783, "learning_rate": 3.8915435852881555e-08, "loss": 0.0, "step": 55220 }, { "epoch": 11.99609035621199, "grad_norm": 0.0004981443635188043, "learning_rate": 2.986533449174631e-08, "loss": 0.0, "step": 55230 }, { "epoch": 11.998262380538662, "grad_norm": 0.00019264254660811275, "learning_rate": 2.0815233130611064e-08, "loss": 0.0, "step": 55240 }, { "epoch": 12.0, "eval_f1": 0.6394052044609665, "eval_loss": 0.09074818342924118, "eval_runtime": 83.8935, "eval_samples_per_second": 118.901, "eval_steps_per_second": 7.438, "step": 55248 }, { "epoch": 12.0, "step": 55248, "total_flos": 6.849671946013016e+19, "train_loss": 0.0005868991143025554, "train_runtime": 6465.7027, "train_samples_per_second": 136.709, "train_steps_per_second": 8.545 } ], "logging_steps": 10, "max_steps": 55248, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.849671946013016e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }