{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.39489923788056713, "eval_steps": 500, "global_step": 400848, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.925797782208806e-05, "grad_norm": 4.725747585296631, "learning_rate": 4.9000000000000005e-06, "loss": 10.8905, "step": 50 }, { "epoch": 9.851595564417613e-05, "grad_norm": 2.1797287464141846, "learning_rate": 9.9e-06, "loss": 9.8832, "step": 100 }, { "epoch": 0.0001477739334662642, "grad_norm": 1.7530624866485596, "learning_rate": 9.999903452461176e-06, "loss": 9.1544, "step": 150 }, { "epoch": 0.00019703191128835225, "grad_norm": 1.458598256111145, "learning_rate": 9.999804934564413e-06, "loss": 8.6751, "step": 200 }, { "epoch": 0.00024628988911044034, "grad_norm": 1.4195055961608887, "learning_rate": 9.999706416667653e-06, "loss": 8.2598, "step": 250 }, { "epoch": 0.0002955478669325284, "grad_norm": 1.2028930187225342, "learning_rate": 9.999607898770891e-06, "loss": 7.9452, "step": 300 }, { "epoch": 0.00034480584475461647, "grad_norm": 1.1207659244537354, "learning_rate": 9.99950938087413e-06, "loss": 7.7246, "step": 350 }, { "epoch": 0.0003940638225767045, "grad_norm": 0.9927027225494385, "learning_rate": 9.999410862977369e-06, "loss": 7.5687, "step": 400 }, { "epoch": 0.0004433218003987926, "grad_norm": 0.7077513933181763, "learning_rate": 9.999312345080608e-06, "loss": 7.4449, "step": 450 }, { "epoch": 0.0004925797782208807, "grad_norm": 1.172118902206421, "learning_rate": 9.999213827183846e-06, "loss": 7.3723, "step": 500 }, { "epoch": 0.0005418377560429688, "grad_norm": 0.7955384254455566, "learning_rate": 9.999115309287086e-06, "loss": 7.2582, "step": 550 }, { "epoch": 0.0005910957338650568, "grad_norm": 1.0187832117080688, "learning_rate": 9.999016791390325e-06, "loss": 7.2036, "step": 600 }, { "epoch": 0.0006403537116871448, "grad_norm": 1.025915265083313, "learning_rate": 9.998918273493563e-06, "loss": 7.1192, "step": 650 }, { "epoch": 0.0006896116895092329, "grad_norm": 1.271194338798523, "learning_rate": 9.998819755596803e-06, "loss": 7.0922, "step": 700 }, { "epoch": 0.000738869667331321, "grad_norm": 1.4047073125839233, "learning_rate": 9.998721237700041e-06, "loss": 7.0221, "step": 750 }, { "epoch": 0.000788127645153409, "grad_norm": 1.6794075965881348, "learning_rate": 9.99862271980328e-06, "loss": 6.9713, "step": 800 }, { "epoch": 0.0008373856229754971, "grad_norm": 1.4108102321624756, "learning_rate": 9.998524201906518e-06, "loss": 6.9433, "step": 850 }, { "epoch": 0.0008866436007975852, "grad_norm": 1.5267258882522583, "learning_rate": 9.998425684009758e-06, "loss": 6.9022, "step": 900 }, { "epoch": 0.0009359015786196733, "grad_norm": 1.275080919265747, "learning_rate": 9.998327166112996e-06, "loss": 6.8109, "step": 950 }, { "epoch": 0.0009851595564417614, "grad_norm": 3.133509635925293, "learning_rate": 9.998228648216236e-06, "loss": 6.7884, "step": 1000 }, { "epoch": 0.0010344175342638495, "grad_norm": 1.3738231658935547, "learning_rate": 9.998130130319475e-06, "loss": 6.7652, "step": 1050 }, { "epoch": 0.0010836755120859375, "grad_norm": 1.615710973739624, "learning_rate": 9.998031612422713e-06, "loss": 6.7464, "step": 1100 }, { "epoch": 0.0011329334899080254, "grad_norm": 1.3281502723693848, "learning_rate": 9.997933094525953e-06, "loss": 6.6762, "step": 1150 }, { "epoch": 0.0011821914677301135, "grad_norm": 1.5175045728683472, "learning_rate": 9.997834576629192e-06, "loss": 6.6743, "step": 1200 }, { "epoch": 0.0012314494455522016, "grad_norm": 1.5772520303726196, "learning_rate": 9.99773605873243e-06, "loss": 6.6188, "step": 1250 }, { "epoch": 0.0012807074233742897, "grad_norm": 1.855621576309204, "learning_rate": 9.997637540835668e-06, "loss": 6.5788, "step": 1300 }, { "epoch": 0.0013299654011963778, "grad_norm": 2.193713665008545, "learning_rate": 9.997539022938908e-06, "loss": 6.5943, "step": 1350 }, { "epoch": 0.0013792233790184659, "grad_norm": 1.454984188079834, "learning_rate": 9.997440505042146e-06, "loss": 6.5445, "step": 1400 }, { "epoch": 0.001428481356840554, "grad_norm": 1.607843041419983, "learning_rate": 9.997341987145386e-06, "loss": 6.516, "step": 1450 }, { "epoch": 0.001477739334662642, "grad_norm": 2.0937602519989014, "learning_rate": 9.997243469248623e-06, "loss": 6.4946, "step": 1500 }, { "epoch": 0.00152699731248473, "grad_norm": 1.8227548599243164, "learning_rate": 9.997144951351863e-06, "loss": 6.4632, "step": 1550 }, { "epoch": 0.001576255290306818, "grad_norm": 1.6639997959136963, "learning_rate": 9.997046433455103e-06, "loss": 6.4916, "step": 1600 }, { "epoch": 0.001625513268128906, "grad_norm": 1.8181363344192505, "learning_rate": 9.996947915558342e-06, "loss": 6.4132, "step": 1650 }, { "epoch": 0.0016747712459509942, "grad_norm": 1.9472105503082275, "learning_rate": 9.99684939766158e-06, "loss": 6.3929, "step": 1700 }, { "epoch": 0.0017240292237730823, "grad_norm": 2.028184175491333, "learning_rate": 9.99675087976482e-06, "loss": 6.3564, "step": 1750 }, { "epoch": 0.0017732872015951704, "grad_norm": 1.6994073390960693, "learning_rate": 9.996652361868058e-06, "loss": 6.3804, "step": 1800 }, { "epoch": 0.0018225451794172585, "grad_norm": 2.174935817718506, "learning_rate": 9.996553843971296e-06, "loss": 6.357, "step": 1850 }, { "epoch": 0.0018718031572393466, "grad_norm": 2.056065797805786, "learning_rate": 9.996455326074535e-06, "loss": 6.3348, "step": 1900 }, { "epoch": 0.0019210611350614346, "grad_norm": 1.9429932832717896, "learning_rate": 9.996356808177773e-06, "loss": 6.3153, "step": 1950 }, { "epoch": 0.0019703191128835227, "grad_norm": 1.999122142791748, "learning_rate": 9.996258290281013e-06, "loss": 6.345, "step": 2000 }, { "epoch": 0.002019577090705611, "grad_norm": 1.873927116394043, "learning_rate": 9.996159772384253e-06, "loss": 6.2666, "step": 2050 }, { "epoch": 0.002068835068527699, "grad_norm": 2.1296603679656982, "learning_rate": 9.99606125448749e-06, "loss": 6.2563, "step": 2100 }, { "epoch": 0.002118093046349787, "grad_norm": 1.8446933031082153, "learning_rate": 9.99596273659073e-06, "loss": 6.2273, "step": 2150 }, { "epoch": 0.002167351024171875, "grad_norm": 1.9267503023147583, "learning_rate": 9.99586421869397e-06, "loss": 6.2162, "step": 2200 }, { "epoch": 0.0022166090019939627, "grad_norm": 2.406846523284912, "learning_rate": 9.995765700797208e-06, "loss": 6.1871, "step": 2250 }, { "epoch": 0.002265866979816051, "grad_norm": 2.456120729446411, "learning_rate": 9.995667182900447e-06, "loss": 6.2566, "step": 2300 }, { "epoch": 0.002315124957638139, "grad_norm": 1.8963744640350342, "learning_rate": 9.995568665003685e-06, "loss": 6.1969, "step": 2350 }, { "epoch": 0.002364382935460227, "grad_norm": 2.4525437355041504, "learning_rate": 9.995470147106923e-06, "loss": 6.1614, "step": 2400 }, { "epoch": 0.002413640913282315, "grad_norm": 2.212376832962036, "learning_rate": 9.995371629210163e-06, "loss": 6.1044, "step": 2450 }, { "epoch": 0.002462898891104403, "grad_norm": 1.9281485080718994, "learning_rate": 9.995273111313402e-06, "loss": 6.0902, "step": 2500 }, { "epoch": 0.0025121568689264913, "grad_norm": 2.088820695877075, "learning_rate": 9.99517459341664e-06, "loss": 6.0916, "step": 2550 }, { "epoch": 0.0025614148467485794, "grad_norm": 2.277050018310547, "learning_rate": 9.99507607551988e-06, "loss": 6.0823, "step": 2600 }, { "epoch": 0.0026106728245706675, "grad_norm": 2.2391433715820312, "learning_rate": 9.99497755762312e-06, "loss": 6.0875, "step": 2650 }, { "epoch": 0.0026599308023927556, "grad_norm": 2.389378547668457, "learning_rate": 9.994879039726358e-06, "loss": 6.068, "step": 2700 }, { "epoch": 0.0027091887802148436, "grad_norm": 2.047518491744995, "learning_rate": 9.994780521829597e-06, "loss": 6.0431, "step": 2750 }, { "epoch": 0.0027584467580369317, "grad_norm": 2.395382881164551, "learning_rate": 9.994682003932835e-06, "loss": 6.0413, "step": 2800 }, { "epoch": 0.00280770473585902, "grad_norm": 2.185030221939087, "learning_rate": 9.994583486036075e-06, "loss": 6.0214, "step": 2850 }, { "epoch": 0.002856962713681108, "grad_norm": 1.9633649587631226, "learning_rate": 9.994484968139313e-06, "loss": 5.9573, "step": 2900 }, { "epoch": 0.002906220691503196, "grad_norm": 2.1540608406066895, "learning_rate": 9.99438645024255e-06, "loss": 5.9708, "step": 2950 }, { "epoch": 0.002955478669325284, "grad_norm": 2.28556227684021, "learning_rate": 9.99428793234579e-06, "loss": 5.9707, "step": 3000 }, { "epoch": 0.003004736647147372, "grad_norm": 2.1143901348114014, "learning_rate": 9.99418941444903e-06, "loss": 5.9658, "step": 3050 }, { "epoch": 0.00305399462496946, "grad_norm": 2.667074680328369, "learning_rate": 9.99409089655227e-06, "loss": 5.9224, "step": 3100 }, { "epoch": 0.003103252602791548, "grad_norm": 2.3364250659942627, "learning_rate": 9.993992378655507e-06, "loss": 5.9221, "step": 3150 }, { "epoch": 0.003152510580613636, "grad_norm": 2.3552770614624023, "learning_rate": 9.993893860758747e-06, "loss": 5.9139, "step": 3200 }, { "epoch": 0.003201768558435724, "grad_norm": 2.1598031520843506, "learning_rate": 9.993795342861985e-06, "loss": 5.9354, "step": 3250 }, { "epoch": 0.003251026536257812, "grad_norm": 2.305964708328247, "learning_rate": 9.993696824965225e-06, "loss": 5.9267, "step": 3300 }, { "epoch": 0.0033002845140799003, "grad_norm": 2.497861623764038, "learning_rate": 9.993598307068463e-06, "loss": 5.9537, "step": 3350 }, { "epoch": 0.0033495424919019884, "grad_norm": 2.5343267917633057, "learning_rate": 9.993499789171702e-06, "loss": 5.9309, "step": 3400 }, { "epoch": 0.0033988004697240765, "grad_norm": 2.1720359325408936, "learning_rate": 9.99340127127494e-06, "loss": 5.8551, "step": 3450 }, { "epoch": 0.0034480584475461646, "grad_norm": 2.4061872959136963, "learning_rate": 9.99330275337818e-06, "loss": 5.8667, "step": 3500 }, { "epoch": 0.0034973164253682527, "grad_norm": 2.1035633087158203, "learning_rate": 9.99320423548142e-06, "loss": 5.8721, "step": 3550 }, { "epoch": 0.0035465744031903407, "grad_norm": 2.323695659637451, "learning_rate": 9.993105717584657e-06, "loss": 5.8444, "step": 3600 }, { "epoch": 0.003595832381012429, "grad_norm": 2.7823591232299805, "learning_rate": 9.993007199687897e-06, "loss": 5.9007, "step": 3650 }, { "epoch": 0.003645090358834517, "grad_norm": 2.4761390686035156, "learning_rate": 9.992908681791135e-06, "loss": 5.8517, "step": 3700 }, { "epoch": 0.003694348336656605, "grad_norm": 2.1898834705352783, "learning_rate": 9.992810163894374e-06, "loss": 5.8172, "step": 3750 }, { "epoch": 0.003743606314478693, "grad_norm": 2.3213911056518555, "learning_rate": 9.992711645997612e-06, "loss": 5.8537, "step": 3800 }, { "epoch": 0.003792864292300781, "grad_norm": 2.354266405105591, "learning_rate": 9.992613128100852e-06, "loss": 5.7941, "step": 3850 }, { "epoch": 0.0038421222701228693, "grad_norm": 2.4953129291534424, "learning_rate": 9.99251461020409e-06, "loss": 5.8024, "step": 3900 }, { "epoch": 0.0038913802479449574, "grad_norm": 2.5142929553985596, "learning_rate": 9.99241609230733e-06, "loss": 5.7685, "step": 3950 }, { "epoch": 0.0039406382257670455, "grad_norm": 2.313856601715088, "learning_rate": 9.992317574410567e-06, "loss": 5.8136, "step": 4000 }, { "epoch": 0.003989896203589133, "grad_norm": 2.510972738265991, "learning_rate": 9.992219056513807e-06, "loss": 5.7491, "step": 4050 }, { "epoch": 0.004039154181411222, "grad_norm": 2.5576400756835938, "learning_rate": 9.992120538617047e-06, "loss": 5.697, "step": 4100 }, { "epoch": 0.004088412159233309, "grad_norm": 2.7585551738739014, "learning_rate": 9.992022020720285e-06, "loss": 5.7728, "step": 4150 }, { "epoch": 0.004137670137055398, "grad_norm": 2.6345133781433105, "learning_rate": 9.991923502823524e-06, "loss": 5.7841, "step": 4200 }, { "epoch": 0.0041869281148774855, "grad_norm": 2.1408374309539795, "learning_rate": 9.991824984926762e-06, "loss": 5.7751, "step": 4250 }, { "epoch": 0.004236186092699574, "grad_norm": 2.6891562938690186, "learning_rate": 9.991726467030002e-06, "loss": 5.6952, "step": 4300 }, { "epoch": 0.004285444070521662, "grad_norm": 2.546168804168701, "learning_rate": 9.99162794913324e-06, "loss": 5.6943, "step": 4350 }, { "epoch": 0.00433470204834375, "grad_norm": 2.212948799133301, "learning_rate": 9.99152943123648e-06, "loss": 5.6808, "step": 4400 }, { "epoch": 0.004383960026165838, "grad_norm": 2.2889039516448975, "learning_rate": 9.991430913339717e-06, "loss": 5.6614, "step": 4450 }, { "epoch": 0.0044332180039879255, "grad_norm": 2.9201114177703857, "learning_rate": 9.991332395442957e-06, "loss": 5.7015, "step": 4500 }, { "epoch": 0.004482475981810014, "grad_norm": 2.0735507011413574, "learning_rate": 9.991233877546197e-06, "loss": 5.6942, "step": 4550 }, { "epoch": 0.004531733959632102, "grad_norm": 2.483539581298828, "learning_rate": 9.991135359649435e-06, "loss": 5.668, "step": 4600 }, { "epoch": 0.00458099193745419, "grad_norm": 2.944549798965454, "learning_rate": 9.991036841752674e-06, "loss": 5.6572, "step": 4650 }, { "epoch": 0.004630249915276278, "grad_norm": 2.0549709796905518, "learning_rate": 9.990938323855912e-06, "loss": 5.6971, "step": 4700 }, { "epoch": 0.004679507893098366, "grad_norm": 2.51755952835083, "learning_rate": 9.990839805959152e-06, "loss": 5.6556, "step": 4750 }, { "epoch": 0.004728765870920454, "grad_norm": 2.2347500324249268, "learning_rate": 9.99074128806239e-06, "loss": 5.635, "step": 4800 }, { "epoch": 0.0047780238487425426, "grad_norm": 2.4430387020111084, "learning_rate": 9.99064277016563e-06, "loss": 5.6139, "step": 4850 }, { "epoch": 0.00482728182656463, "grad_norm": 2.5289666652679443, "learning_rate": 9.990544252268867e-06, "loss": 5.6439, "step": 4900 }, { "epoch": 0.004876539804386719, "grad_norm": 2.662947177886963, "learning_rate": 9.990445734372107e-06, "loss": 5.6426, "step": 4950 }, { "epoch": 0.004925797782208806, "grad_norm": 2.731626272201538, "learning_rate": 9.990347216475346e-06, "loss": 5.5945, "step": 5000 }, { "epoch": 0.004975055760030895, "grad_norm": 2.1274847984313965, "learning_rate": 9.990248698578584e-06, "loss": 5.6053, "step": 5050 }, { "epoch": 0.005024313737852983, "grad_norm": 2.5701067447662354, "learning_rate": 9.990150180681824e-06, "loss": 5.6039, "step": 5100 }, { "epoch": 0.005073571715675071, "grad_norm": 2.9836716651916504, "learning_rate": 9.990051662785062e-06, "loss": 5.5815, "step": 5150 }, { "epoch": 0.005122829693497159, "grad_norm": 2.1876139640808105, "learning_rate": 9.989953144888302e-06, "loss": 5.5223, "step": 5200 }, { "epoch": 0.005172087671319247, "grad_norm": 2.5413544178009033, "learning_rate": 9.98985462699154e-06, "loss": 5.5988, "step": 5250 }, { "epoch": 0.005221345649141335, "grad_norm": 2.636404514312744, "learning_rate": 9.989756109094779e-06, "loss": 5.6048, "step": 5300 }, { "epoch": 0.005270603626963423, "grad_norm": 3.6950316429138184, "learning_rate": 9.989657591198017e-06, "loss": 5.6094, "step": 5350 }, { "epoch": 0.005319861604785511, "grad_norm": 2.6820759773254395, "learning_rate": 9.989559073301257e-06, "loss": 5.514, "step": 5400 }, { "epoch": 0.005369119582607599, "grad_norm": 2.682157278060913, "learning_rate": 9.989460555404495e-06, "loss": 5.6015, "step": 5450 }, { "epoch": 0.005418377560429687, "grad_norm": 2.904618740081787, "learning_rate": 9.989362037507734e-06, "loss": 5.5666, "step": 5500 }, { "epoch": 0.005467635538251775, "grad_norm": 2.792703628540039, "learning_rate": 9.989263519610974e-06, "loss": 5.5716, "step": 5550 }, { "epoch": 0.0055168935160738635, "grad_norm": 2.512916326522827, "learning_rate": 9.989165001714214e-06, "loss": 5.5335, "step": 5600 }, { "epoch": 0.005566151493895951, "grad_norm": 2.446918487548828, "learning_rate": 9.989066483817451e-06, "loss": 5.5122, "step": 5650 }, { "epoch": 0.00561540947171804, "grad_norm": 2.86995530128479, "learning_rate": 9.98896796592069e-06, "loss": 5.5549, "step": 5700 }, { "epoch": 0.005664667449540127, "grad_norm": 2.0753886699676514, "learning_rate": 9.988869448023929e-06, "loss": 5.4771, "step": 5750 }, { "epoch": 0.005713925427362216, "grad_norm": 2.271014451980591, "learning_rate": 9.988770930127167e-06, "loss": 5.5639, "step": 5800 }, { "epoch": 0.0057631834051843035, "grad_norm": 2.598052978515625, "learning_rate": 9.988672412230407e-06, "loss": 5.489, "step": 5850 }, { "epoch": 0.005812441383006392, "grad_norm": 2.9920969009399414, "learning_rate": 9.988573894333645e-06, "loss": 5.5314, "step": 5900 }, { "epoch": 0.00586169936082848, "grad_norm": 2.37494158744812, "learning_rate": 9.988475376436884e-06, "loss": 5.5744, "step": 5950 }, { "epoch": 0.005910957338650568, "grad_norm": 3.0005249977111816, "learning_rate": 9.988376858540124e-06, "loss": 5.4652, "step": 6000 }, { "epoch": 0.005960215316472656, "grad_norm": 2.7213375568389893, "learning_rate": 9.988278340643363e-06, "loss": 5.4926, "step": 6050 }, { "epoch": 0.006009473294294744, "grad_norm": 2.5693137645721436, "learning_rate": 9.988179822746601e-06, "loss": 5.4675, "step": 6100 }, { "epoch": 0.006058731272116832, "grad_norm": 2.4643566608428955, "learning_rate": 9.98808130484984e-06, "loss": 5.4316, "step": 6150 }, { "epoch": 0.00610798924993892, "grad_norm": 2.843510150909424, "learning_rate": 9.987982786953079e-06, "loss": 5.4439, "step": 6200 }, { "epoch": 0.006157247227761008, "grad_norm": 2.2801809310913086, "learning_rate": 9.987884269056317e-06, "loss": 5.4392, "step": 6250 }, { "epoch": 0.006206505205583096, "grad_norm": 2.228440046310425, "learning_rate": 9.987785751159556e-06, "loss": 5.4408, "step": 6300 }, { "epoch": 0.006255763183405184, "grad_norm": 2.654935598373413, "learning_rate": 9.987687233262794e-06, "loss": 5.4031, "step": 6350 }, { "epoch": 0.006305021161227272, "grad_norm": 2.7862391471862793, "learning_rate": 9.987588715366034e-06, "loss": 5.4508, "step": 6400 }, { "epoch": 0.006354279139049361, "grad_norm": 3.0758538246154785, "learning_rate": 9.987490197469274e-06, "loss": 5.4921, "step": 6450 }, { "epoch": 0.006403537116871448, "grad_norm": 2.832315683364868, "learning_rate": 9.987391679572512e-06, "loss": 5.462, "step": 6500 }, { "epoch": 0.006452795094693537, "grad_norm": 2.451906204223633, "learning_rate": 9.987293161675751e-06, "loss": 5.4419, "step": 6550 }, { "epoch": 0.006502053072515624, "grad_norm": 2.3465466499328613, "learning_rate": 9.98719464377899e-06, "loss": 5.4235, "step": 6600 }, { "epoch": 0.006551311050337713, "grad_norm": 2.684234619140625, "learning_rate": 9.987096125882229e-06, "loss": 5.4586, "step": 6650 }, { "epoch": 0.006600569028159801, "grad_norm": 2.792478084564209, "learning_rate": 9.986997607985467e-06, "loss": 5.428, "step": 6700 }, { "epoch": 0.006649827005981889, "grad_norm": 2.363053560256958, "learning_rate": 9.986899090088706e-06, "loss": 5.4461, "step": 6750 }, { "epoch": 0.006699084983803977, "grad_norm": 2.9717445373535156, "learning_rate": 9.986800572191944e-06, "loss": 5.3866, "step": 6800 }, { "epoch": 0.006748342961626065, "grad_norm": 3.2148444652557373, "learning_rate": 9.986702054295184e-06, "loss": 5.3826, "step": 6850 }, { "epoch": 0.006797600939448153, "grad_norm": 2.63067626953125, "learning_rate": 9.986603536398422e-06, "loss": 5.4061, "step": 6900 }, { "epoch": 0.0068468589172702415, "grad_norm": 2.5444979667663574, "learning_rate": 9.986505018501661e-06, "loss": 5.3423, "step": 6950 }, { "epoch": 0.006896116895092329, "grad_norm": 2.4615836143493652, "learning_rate": 9.986406500604901e-06, "loss": 5.3855, "step": 7000 }, { "epoch": 0.006945374872914418, "grad_norm": 2.4069976806640625, "learning_rate": 9.98630798270814e-06, "loss": 5.3411, "step": 7050 }, { "epoch": 0.006994632850736505, "grad_norm": 2.8284878730773926, "learning_rate": 9.986209464811379e-06, "loss": 5.3785, "step": 7100 }, { "epoch": 0.007043890828558593, "grad_norm": 2.9697582721710205, "learning_rate": 9.986110946914618e-06, "loss": 5.3951, "step": 7150 }, { "epoch": 0.0070931488063806815, "grad_norm": 3.478571891784668, "learning_rate": 9.986012429017856e-06, "loss": 5.3656, "step": 7200 }, { "epoch": 0.007142406784202769, "grad_norm": 2.390310525894165, "learning_rate": 9.985913911121094e-06, "loss": 5.3476, "step": 7250 }, { "epoch": 0.007191664762024858, "grad_norm": 3.101663112640381, "learning_rate": 9.985815393224334e-06, "loss": 5.3678, "step": 7300 }, { "epoch": 0.007240922739846945, "grad_norm": 2.633324146270752, "learning_rate": 9.985716875327572e-06, "loss": 5.3972, "step": 7350 }, { "epoch": 0.007290180717669034, "grad_norm": 2.532758951187134, "learning_rate": 9.985618357430811e-06, "loss": 5.2801, "step": 7400 }, { "epoch": 0.0073394386954911215, "grad_norm": 2.296886444091797, "learning_rate": 9.985519839534051e-06, "loss": 5.3737, "step": 7450 }, { "epoch": 0.00738869667331321, "grad_norm": 2.2987313270568848, "learning_rate": 9.98542132163729e-06, "loss": 5.3501, "step": 7500 }, { "epoch": 0.007437954651135298, "grad_norm": 2.866952419281006, "learning_rate": 9.985322803740528e-06, "loss": 5.4083, "step": 7550 }, { "epoch": 0.007487212628957386, "grad_norm": 2.6010262966156006, "learning_rate": 9.985224285843768e-06, "loss": 5.2717, "step": 7600 }, { "epoch": 0.007536470606779474, "grad_norm": 2.413188934326172, "learning_rate": 9.985125767947006e-06, "loss": 5.3104, "step": 7650 }, { "epoch": 0.007585728584601562, "grad_norm": 2.554912805557251, "learning_rate": 9.985027250050246e-06, "loss": 5.4004, "step": 7700 }, { "epoch": 0.00763498656242365, "grad_norm": 2.739072561264038, "learning_rate": 9.984928732153484e-06, "loss": 5.3099, "step": 7750 }, { "epoch": 0.007684244540245739, "grad_norm": 2.6271185874938965, "learning_rate": 9.984830214256722e-06, "loss": 5.2716, "step": 7800 }, { "epoch": 0.007733502518067826, "grad_norm": 2.8631014823913574, "learning_rate": 9.984731696359961e-06, "loss": 5.3577, "step": 7850 }, { "epoch": 0.007782760495889915, "grad_norm": 2.8305814266204834, "learning_rate": 9.9846331784632e-06, "loss": 5.3704, "step": 7900 }, { "epoch": 0.007832018473712002, "grad_norm": 2.467615842819214, "learning_rate": 9.984534660566439e-06, "loss": 5.2781, "step": 7950 }, { "epoch": 0.007881276451534091, "grad_norm": 2.7488834857940674, "learning_rate": 9.984436142669678e-06, "loss": 5.3023, "step": 8000 }, { "epoch": 0.007930534429356179, "grad_norm": 2.5197978019714355, "learning_rate": 9.984337624772918e-06, "loss": 5.289, "step": 8050 }, { "epoch": 0.007979792407178266, "grad_norm": 3.241250991821289, "learning_rate": 9.984239106876156e-06, "loss": 5.2463, "step": 8100 }, { "epoch": 0.008029050385000354, "grad_norm": 2.873776435852051, "learning_rate": 9.984140588979396e-06, "loss": 5.2921, "step": 8150 }, { "epoch": 0.008078308362822443, "grad_norm": 2.737545967102051, "learning_rate": 9.984042071082633e-06, "loss": 5.3002, "step": 8200 }, { "epoch": 0.008127566340644531, "grad_norm": 2.891793727874756, "learning_rate": 9.983943553185873e-06, "loss": 5.2682, "step": 8250 }, { "epoch": 0.008176824318466619, "grad_norm": 3.688206672668457, "learning_rate": 9.983845035289111e-06, "loss": 5.2973, "step": 8300 }, { "epoch": 0.008226082296288706, "grad_norm": 3.1400790214538574, "learning_rate": 9.98374651739235e-06, "loss": 5.2762, "step": 8350 }, { "epoch": 0.008275340274110796, "grad_norm": 2.4480161666870117, "learning_rate": 9.983647999495589e-06, "loss": 5.3078, "step": 8400 }, { "epoch": 0.008324598251932883, "grad_norm": 2.787797689437866, "learning_rate": 9.983549481598828e-06, "loss": 5.2399, "step": 8450 }, { "epoch": 0.008373856229754971, "grad_norm": 2.967879295349121, "learning_rate": 9.983450963702068e-06, "loss": 5.271, "step": 8500 }, { "epoch": 0.008423114207577059, "grad_norm": 2.8270304203033447, "learning_rate": 9.983352445805306e-06, "loss": 5.2504, "step": 8550 }, { "epoch": 0.008472372185399148, "grad_norm": 3.0329606533050537, "learning_rate": 9.983253927908545e-06, "loss": 5.1802, "step": 8600 }, { "epoch": 0.008521630163221236, "grad_norm": 3.035229444503784, "learning_rate": 9.983155410011783e-06, "loss": 5.2257, "step": 8650 }, { "epoch": 0.008570888141043323, "grad_norm": 3.1413824558258057, "learning_rate": 9.983056892115023e-06, "loss": 5.218, "step": 8700 }, { "epoch": 0.008620146118865411, "grad_norm": 2.30039644241333, "learning_rate": 9.982958374218261e-06, "loss": 5.1838, "step": 8750 }, { "epoch": 0.0086694040966875, "grad_norm": 2.7453882694244385, "learning_rate": 9.9828598563215e-06, "loss": 5.2439, "step": 8800 }, { "epoch": 0.008718662074509588, "grad_norm": 2.842217445373535, "learning_rate": 9.982761338424738e-06, "loss": 5.2346, "step": 8850 }, { "epoch": 0.008767920052331676, "grad_norm": 2.731189250946045, "learning_rate": 9.982662820527978e-06, "loss": 5.2316, "step": 8900 }, { "epoch": 0.008817178030153763, "grad_norm": 2.762725830078125, "learning_rate": 9.982564302631218e-06, "loss": 5.2461, "step": 8950 }, { "epoch": 0.008866436007975851, "grad_norm": 3.2438535690307617, "learning_rate": 9.982465784734456e-06, "loss": 5.202, "step": 9000 }, { "epoch": 0.00891569398579794, "grad_norm": 2.3306050300598145, "learning_rate": 9.982367266837695e-06, "loss": 5.225, "step": 9050 }, { "epoch": 0.008964951963620028, "grad_norm": 2.7309937477111816, "learning_rate": 9.982268748940933e-06, "loss": 5.1254, "step": 9100 }, { "epoch": 0.009014209941442116, "grad_norm": 2.607841730117798, "learning_rate": 9.982170231044173e-06, "loss": 5.2148, "step": 9150 }, { "epoch": 0.009063467919264203, "grad_norm": 2.752448320388794, "learning_rate": 9.98207171314741e-06, "loss": 5.1724, "step": 9200 }, { "epoch": 0.009112725897086293, "grad_norm": 2.578098773956299, "learning_rate": 9.98197319525065e-06, "loss": 5.1401, "step": 9250 }, { "epoch": 0.00916198387490838, "grad_norm": 2.506199598312378, "learning_rate": 9.981874677353888e-06, "loss": 5.222, "step": 9300 }, { "epoch": 0.009211241852730468, "grad_norm": 2.8980226516723633, "learning_rate": 9.981776159457128e-06, "loss": 5.2026, "step": 9350 }, { "epoch": 0.009260499830552556, "grad_norm": 2.5668675899505615, "learning_rate": 9.981677641560366e-06, "loss": 5.1399, "step": 9400 }, { "epoch": 0.009309757808374645, "grad_norm": 2.9774155616760254, "learning_rate": 9.981579123663605e-06, "loss": 5.1841, "step": 9450 }, { "epoch": 0.009359015786196733, "grad_norm": 2.784179449081421, "learning_rate": 9.981480605766845e-06, "loss": 5.1706, "step": 9500 }, { "epoch": 0.00940827376401882, "grad_norm": 3.276780128479004, "learning_rate": 9.981382087870083e-06, "loss": 5.2006, "step": 9550 }, { "epoch": 0.009457531741840908, "grad_norm": 2.45902156829834, "learning_rate": 9.981283569973323e-06, "loss": 5.1438, "step": 9600 }, { "epoch": 0.009506789719662997, "grad_norm": 3.334418535232544, "learning_rate": 9.98118505207656e-06, "loss": 5.1962, "step": 9650 }, { "epoch": 0.009556047697485085, "grad_norm": 2.549914836883545, "learning_rate": 9.9810865341798e-06, "loss": 5.2424, "step": 9700 }, { "epoch": 0.009605305675307173, "grad_norm": 3.135145664215088, "learning_rate": 9.980988016283038e-06, "loss": 5.1309, "step": 9750 }, { "epoch": 0.00965456365312926, "grad_norm": 2.222324848175049, "learning_rate": 9.980889498386278e-06, "loss": 5.1401, "step": 9800 }, { "epoch": 0.009703821630951348, "grad_norm": 2.6519627571105957, "learning_rate": 9.980790980489516e-06, "loss": 5.1435, "step": 9850 }, { "epoch": 0.009753079608773437, "grad_norm": 3.206404209136963, "learning_rate": 9.980692462592755e-06, "loss": 5.1377, "step": 9900 }, { "epoch": 0.009802337586595525, "grad_norm": 2.6864330768585205, "learning_rate": 9.980593944695995e-06, "loss": 5.0888, "step": 9950 }, { "epoch": 0.009851595564417613, "grad_norm": 3.300452947616577, "learning_rate": 9.980495426799233e-06, "loss": 5.1518, "step": 10000 }, { "epoch": 0.0099008535422397, "grad_norm": 2.7210471630096436, "learning_rate": 9.980396908902473e-06, "loss": 5.2172, "step": 10050 }, { "epoch": 0.00995011152006179, "grad_norm": 3.0081121921539307, "learning_rate": 9.98029839100571e-06, "loss": 5.1176, "step": 10100 }, { "epoch": 0.009999369497883877, "grad_norm": 2.973844289779663, "learning_rate": 9.98019987310895e-06, "loss": 5.1191, "step": 10150 }, { "epoch": 0.010048627475705965, "grad_norm": 3.0125725269317627, "learning_rate": 9.980101355212188e-06, "loss": 5.119, "step": 10200 }, { "epoch": 0.010097885453528053, "grad_norm": 2.811295747756958, "learning_rate": 9.980002837315428e-06, "loss": 5.1327, "step": 10250 }, { "epoch": 0.010147143431350142, "grad_norm": 3.0833497047424316, "learning_rate": 9.979904319418666e-06, "loss": 5.0468, "step": 10300 }, { "epoch": 0.01019640140917223, "grad_norm": 2.67097544670105, "learning_rate": 9.979805801521905e-06, "loss": 5.1153, "step": 10350 }, { "epoch": 0.010245659386994318, "grad_norm": 2.8564915657043457, "learning_rate": 9.979707283625145e-06, "loss": 5.1419, "step": 10400 }, { "epoch": 0.010294917364816405, "grad_norm": 2.9845869541168213, "learning_rate": 9.979608765728383e-06, "loss": 5.1225, "step": 10450 }, { "epoch": 0.010344175342638495, "grad_norm": 3.286940574645996, "learning_rate": 9.979510247831622e-06, "loss": 5.1463, "step": 10500 }, { "epoch": 0.010393433320460582, "grad_norm": 2.777956962585449, "learning_rate": 9.97941172993486e-06, "loss": 5.1447, "step": 10550 }, { "epoch": 0.01044269129828267, "grad_norm": 3.1841282844543457, "learning_rate": 9.9793132120381e-06, "loss": 5.0844, "step": 10600 }, { "epoch": 0.010491949276104758, "grad_norm": 2.8072447776794434, "learning_rate": 9.979214694141338e-06, "loss": 5.0622, "step": 10650 }, { "epoch": 0.010541207253926845, "grad_norm": 2.525725841522217, "learning_rate": 9.979116176244577e-06, "loss": 5.103, "step": 10700 }, { "epoch": 0.010590465231748935, "grad_norm": 2.84185528755188, "learning_rate": 9.979017658347815e-06, "loss": 5.078, "step": 10750 }, { "epoch": 0.010639723209571022, "grad_norm": 2.6805336475372314, "learning_rate": 9.978919140451055e-06, "loss": 5.1111, "step": 10800 }, { "epoch": 0.01068898118739311, "grad_norm": 2.621917247772217, "learning_rate": 9.978820622554295e-06, "loss": 5.1216, "step": 10850 }, { "epoch": 0.010738239165215198, "grad_norm": 3.3322038650512695, "learning_rate": 9.978722104657533e-06, "loss": 5.1474, "step": 10900 }, { "epoch": 0.010787497143037287, "grad_norm": 2.7139110565185547, "learning_rate": 9.978623586760772e-06, "loss": 5.1347, "step": 10950 }, { "epoch": 0.010836755120859375, "grad_norm": 2.7151482105255127, "learning_rate": 9.978525068864012e-06, "loss": 5.0979, "step": 11000 }, { "epoch": 0.010886013098681462, "grad_norm": 2.9608211517333984, "learning_rate": 9.97842655096725e-06, "loss": 5.0786, "step": 11050 }, { "epoch": 0.01093527107650355, "grad_norm": 2.5886597633361816, "learning_rate": 9.978328033070488e-06, "loss": 5.0645, "step": 11100 }, { "epoch": 0.01098452905432564, "grad_norm": 2.6722819805145264, "learning_rate": 9.978229515173727e-06, "loss": 5.0679, "step": 11150 }, { "epoch": 0.011033787032147727, "grad_norm": 2.6561288833618164, "learning_rate": 9.978130997276965e-06, "loss": 5.0565, "step": 11200 }, { "epoch": 0.011083045009969815, "grad_norm": 3.1985936164855957, "learning_rate": 9.978032479380205e-06, "loss": 5.0584, "step": 11250 }, { "epoch": 0.011132302987791902, "grad_norm": 3.015976905822754, "learning_rate": 9.977933961483443e-06, "loss": 5.0309, "step": 11300 }, { "epoch": 0.011181560965613992, "grad_norm": 2.7622482776641846, "learning_rate": 9.977835443586682e-06, "loss": 5.0371, "step": 11350 }, { "epoch": 0.01123081894343608, "grad_norm": 2.7675328254699707, "learning_rate": 9.977736925689922e-06, "loss": 5.0582, "step": 11400 }, { "epoch": 0.011280076921258167, "grad_norm": 2.6566014289855957, "learning_rate": 9.977638407793162e-06, "loss": 5.0632, "step": 11450 }, { "epoch": 0.011329334899080255, "grad_norm": 2.5212440490722656, "learning_rate": 9.9775398898964e-06, "loss": 5.0305, "step": 11500 }, { "epoch": 0.011378592876902342, "grad_norm": 3.1915969848632812, "learning_rate": 9.97744137199964e-06, "loss": 5.0243, "step": 11550 }, { "epoch": 0.011427850854724432, "grad_norm": 2.747652053833008, "learning_rate": 9.977342854102877e-06, "loss": 5.0453, "step": 11600 }, { "epoch": 0.01147710883254652, "grad_norm": 3.1639904975891113, "learning_rate": 9.977244336206115e-06, "loss": 5.0106, "step": 11650 }, { "epoch": 0.011526366810368607, "grad_norm": 2.8592841625213623, "learning_rate": 9.977145818309355e-06, "loss": 5.0245, "step": 11700 }, { "epoch": 0.011575624788190695, "grad_norm": 2.4575510025024414, "learning_rate": 9.977047300412593e-06, "loss": 5.0459, "step": 11750 }, { "epoch": 0.011624882766012784, "grad_norm": 2.752303123474121, "learning_rate": 9.976948782515832e-06, "loss": 5.0125, "step": 11800 }, { "epoch": 0.011674140743834872, "grad_norm": 2.4985873699188232, "learning_rate": 9.976850264619072e-06, "loss": 5.0599, "step": 11850 }, { "epoch": 0.01172339872165696, "grad_norm": 2.650714159011841, "learning_rate": 9.97675174672231e-06, "loss": 5.005, "step": 11900 }, { "epoch": 0.011772656699479047, "grad_norm": 2.8964450359344482, "learning_rate": 9.97665322882555e-06, "loss": 5.0384, "step": 11950 }, { "epoch": 0.011821914677301136, "grad_norm": 2.7454681396484375, "learning_rate": 9.976554710928789e-06, "loss": 4.9572, "step": 12000 }, { "epoch": 0.011871172655123224, "grad_norm": 4.804060459136963, "learning_rate": 9.976456193032027e-06, "loss": 4.9991, "step": 12050 }, { "epoch": 0.011920430632945312, "grad_norm": 2.895745277404785, "learning_rate": 9.976357675135267e-06, "loss": 5.0209, "step": 12100 }, { "epoch": 0.0119696886107674, "grad_norm": 3.107665777206421, "learning_rate": 9.976259157238505e-06, "loss": 5.0109, "step": 12150 }, { "epoch": 0.012018946588589489, "grad_norm": 3.312875986099243, "learning_rate": 9.976160639341743e-06, "loss": 5.0715, "step": 12200 }, { "epoch": 0.012068204566411576, "grad_norm": 2.6124379634857178, "learning_rate": 9.976062121444982e-06, "loss": 5.0115, "step": 12250 }, { "epoch": 0.012117462544233664, "grad_norm": 2.636563301086426, "learning_rate": 9.975963603548222e-06, "loss": 5.011, "step": 12300 }, { "epoch": 0.012166720522055752, "grad_norm": 2.8454091548919678, "learning_rate": 9.97586508565146e-06, "loss": 4.9804, "step": 12350 }, { "epoch": 0.01221597849987784, "grad_norm": 2.3826451301574707, "learning_rate": 9.9757665677547e-06, "loss": 5.0208, "step": 12400 }, { "epoch": 0.012265236477699929, "grad_norm": 2.883273124694824, "learning_rate": 9.975668049857939e-06, "loss": 5.0381, "step": 12450 }, { "epoch": 0.012314494455522016, "grad_norm": 3.6011040210723877, "learning_rate": 9.975569531961177e-06, "loss": 4.9563, "step": 12500 }, { "epoch": 0.012363752433344104, "grad_norm": 8.041337013244629, "learning_rate": 9.975471014064417e-06, "loss": 4.9735, "step": 12550 }, { "epoch": 0.012413010411166192, "grad_norm": 2.9154751300811768, "learning_rate": 9.975372496167654e-06, "loss": 4.9955, "step": 12600 }, { "epoch": 0.012462268388988281, "grad_norm": 3.00058650970459, "learning_rate": 9.975273978270894e-06, "loss": 4.9427, "step": 12650 }, { "epoch": 0.012511526366810369, "grad_norm": 3.041849136352539, "learning_rate": 9.975175460374132e-06, "loss": 4.9327, "step": 12700 }, { "epoch": 0.012560784344632456, "grad_norm": 2.5672147274017334, "learning_rate": 9.97507694247737e-06, "loss": 5.0007, "step": 12750 }, { "epoch": 0.012610042322454544, "grad_norm": 2.7387607097625732, "learning_rate": 9.97497842458061e-06, "loss": 4.9586, "step": 12800 }, { "epoch": 0.012659300300276633, "grad_norm": 2.7960760593414307, "learning_rate": 9.97487990668385e-06, "loss": 4.9412, "step": 12850 }, { "epoch": 0.012708558278098721, "grad_norm": 2.8948588371276855, "learning_rate": 9.974781388787089e-06, "loss": 5.0998, "step": 12900 }, { "epoch": 0.012757816255920809, "grad_norm": 2.80606746673584, "learning_rate": 9.974682870890327e-06, "loss": 4.9639, "step": 12950 }, { "epoch": 0.012807074233742896, "grad_norm": 2.617866039276123, "learning_rate": 9.974584352993566e-06, "loss": 4.9566, "step": 13000 }, { "epoch": 0.012856332211564986, "grad_norm": 2.61118483543396, "learning_rate": 9.974485835096804e-06, "loss": 4.9398, "step": 13050 }, { "epoch": 0.012905590189387074, "grad_norm": 3.115861654281616, "learning_rate": 9.974387317200044e-06, "loss": 4.9141, "step": 13100 }, { "epoch": 0.012954848167209161, "grad_norm": 3.14267635345459, "learning_rate": 9.974288799303282e-06, "loss": 4.9804, "step": 13150 }, { "epoch": 0.013004106145031249, "grad_norm": 2.8500235080718994, "learning_rate": 9.97419028140652e-06, "loss": 4.9494, "step": 13200 }, { "epoch": 0.013053364122853336, "grad_norm": 2.4847493171691895, "learning_rate": 9.97409176350976e-06, "loss": 4.8897, "step": 13250 }, { "epoch": 0.013102622100675426, "grad_norm": 2.5797436237335205, "learning_rate": 9.973993245612999e-06, "loss": 4.9435, "step": 13300 }, { "epoch": 0.013151880078497514, "grad_norm": 2.6346473693847656, "learning_rate": 9.973894727716239e-06, "loss": 4.9417, "step": 13350 }, { "epoch": 0.013201138056319601, "grad_norm": 2.570680618286133, "learning_rate": 9.973796209819477e-06, "loss": 4.9819, "step": 13400 }, { "epoch": 0.013250396034141689, "grad_norm": 2.6807267665863037, "learning_rate": 9.973697691922716e-06, "loss": 4.9395, "step": 13450 }, { "epoch": 0.013299654011963778, "grad_norm": 3.142136573791504, "learning_rate": 9.973599174025954e-06, "loss": 4.9729, "step": 13500 }, { "epoch": 0.013348911989785866, "grad_norm": 2.413271427154541, "learning_rate": 9.973500656129194e-06, "loss": 4.927, "step": 13550 }, { "epoch": 0.013398169967607954, "grad_norm": 2.633683681488037, "learning_rate": 9.973402138232432e-06, "loss": 4.9453, "step": 13600 }, { "epoch": 0.013447427945430041, "grad_norm": 2.6855924129486084, "learning_rate": 9.973303620335671e-06, "loss": 4.9406, "step": 13650 }, { "epoch": 0.01349668592325213, "grad_norm": 2.8274478912353516, "learning_rate": 9.97320510243891e-06, "loss": 4.9191, "step": 13700 }, { "epoch": 0.013545943901074218, "grad_norm": 2.3939945697784424, "learning_rate": 9.973106584542149e-06, "loss": 4.9049, "step": 13750 }, { "epoch": 0.013595201878896306, "grad_norm": 2.7994632720947266, "learning_rate": 9.973008066645387e-06, "loss": 4.9746, "step": 13800 }, { "epoch": 0.013644459856718394, "grad_norm": 2.558964490890503, "learning_rate": 9.972909548748627e-06, "loss": 4.8646, "step": 13850 }, { "epoch": 0.013693717834540483, "grad_norm": 2.6282644271850586, "learning_rate": 9.972811030851866e-06, "loss": 4.9234, "step": 13900 }, { "epoch": 0.01374297581236257, "grad_norm": 2.561087131500244, "learning_rate": 9.972712512955104e-06, "loss": 4.9091, "step": 13950 }, { "epoch": 0.013792233790184658, "grad_norm": 2.9112539291381836, "learning_rate": 9.972613995058344e-06, "loss": 4.9158, "step": 14000 }, { "epoch": 0.013841491768006746, "grad_norm": 2.4566030502319336, "learning_rate": 9.972515477161582e-06, "loss": 4.9312, "step": 14050 }, { "epoch": 0.013890749745828835, "grad_norm": 2.7782633304595947, "learning_rate": 9.972416959264821e-06, "loss": 4.9544, "step": 14100 }, { "epoch": 0.013940007723650923, "grad_norm": 2.9850313663482666, "learning_rate": 9.97231844136806e-06, "loss": 4.8732, "step": 14150 }, { "epoch": 0.01398926570147301, "grad_norm": 3.229119062423706, "learning_rate": 9.972219923471299e-06, "loss": 4.9412, "step": 14200 }, { "epoch": 0.014038523679295098, "grad_norm": 2.7228994369506836, "learning_rate": 9.972121405574537e-06, "loss": 4.9139, "step": 14250 }, { "epoch": 0.014087781657117186, "grad_norm": 2.9174795150756836, "learning_rate": 9.972022887677776e-06, "loss": 4.8683, "step": 14300 }, { "epoch": 0.014137039634939275, "grad_norm": 2.9443442821502686, "learning_rate": 9.971924369781016e-06, "loss": 4.8991, "step": 14350 }, { "epoch": 0.014186297612761363, "grad_norm": 3.1987316608428955, "learning_rate": 9.971825851884254e-06, "loss": 4.8839, "step": 14400 }, { "epoch": 0.01423555559058345, "grad_norm": 3.0276005268096924, "learning_rate": 9.971727333987494e-06, "loss": 4.9147, "step": 14450 }, { "epoch": 0.014284813568405538, "grad_norm": 3.049004316329956, "learning_rate": 9.971628816090732e-06, "loss": 4.8882, "step": 14500 }, { "epoch": 0.014334071546227628, "grad_norm": 3.058237314224243, "learning_rate": 9.971530298193971e-06, "loss": 4.919, "step": 14550 }, { "epoch": 0.014383329524049715, "grad_norm": 2.641164541244507, "learning_rate": 9.971431780297209e-06, "loss": 4.9429, "step": 14600 }, { "epoch": 0.014432587501871803, "grad_norm": 2.5120842456817627, "learning_rate": 9.971333262400449e-06, "loss": 4.8806, "step": 14650 }, { "epoch": 0.01448184547969389, "grad_norm": 2.727837324142456, "learning_rate": 9.971234744503687e-06, "loss": 4.9088, "step": 14700 }, { "epoch": 0.01453110345751598, "grad_norm": 2.8373379707336426, "learning_rate": 9.971136226606926e-06, "loss": 4.8997, "step": 14750 }, { "epoch": 0.014580361435338068, "grad_norm": 2.5231852531433105, "learning_rate": 9.971037708710166e-06, "loss": 4.8688, "step": 14800 }, { "epoch": 0.014629619413160155, "grad_norm": 2.866428852081299, "learning_rate": 9.970939190813404e-06, "loss": 4.8645, "step": 14850 }, { "epoch": 0.014678877390982243, "grad_norm": 3.116302490234375, "learning_rate": 9.970840672916643e-06, "loss": 4.9037, "step": 14900 }, { "epoch": 0.014728135368804332, "grad_norm": 2.9273884296417236, "learning_rate": 9.970742155019881e-06, "loss": 4.8069, "step": 14950 }, { "epoch": 0.01477739334662642, "grad_norm": 2.5544352531433105, "learning_rate": 9.970643637123121e-06, "loss": 4.8998, "step": 15000 }, { "epoch": 0.014826651324448508, "grad_norm": 2.3461215496063232, "learning_rate": 9.970545119226359e-06, "loss": 4.8263, "step": 15050 }, { "epoch": 0.014875909302270595, "grad_norm": 2.9023947715759277, "learning_rate": 9.970446601329599e-06, "loss": 4.8763, "step": 15100 }, { "epoch": 0.014925167280092683, "grad_norm": 2.755872964859009, "learning_rate": 9.970348083432836e-06, "loss": 4.8345, "step": 15150 }, { "epoch": 0.014974425257914772, "grad_norm": 3.4369239807128906, "learning_rate": 9.970249565536076e-06, "loss": 4.8348, "step": 15200 }, { "epoch": 0.01502368323573686, "grad_norm": 2.513559341430664, "learning_rate": 9.970151047639314e-06, "loss": 4.8278, "step": 15250 }, { "epoch": 0.015072941213558948, "grad_norm": 2.857710838317871, "learning_rate": 9.970052529742554e-06, "loss": 4.8655, "step": 15300 }, { "epoch": 0.015122199191381035, "grad_norm": 2.9492580890655518, "learning_rate": 9.969954011845793e-06, "loss": 4.836, "step": 15350 }, { "epoch": 0.015171457169203125, "grad_norm": 2.8338775634765625, "learning_rate": 9.969855493949031e-06, "loss": 4.832, "step": 15400 }, { "epoch": 0.015220715147025212, "grad_norm": 2.7897562980651855, "learning_rate": 9.969756976052271e-06, "loss": 4.8706, "step": 15450 }, { "epoch": 0.0152699731248473, "grad_norm": 2.9784884452819824, "learning_rate": 9.969658458155509e-06, "loss": 4.8397, "step": 15500 }, { "epoch": 0.015319231102669388, "grad_norm": 2.539855718612671, "learning_rate": 9.969559940258748e-06, "loss": 4.7733, "step": 15550 }, { "epoch": 0.015368489080491477, "grad_norm": 2.8186769485473633, "learning_rate": 9.969461422361986e-06, "loss": 4.8489, "step": 15600 }, { "epoch": 0.015417747058313565, "grad_norm": 2.567842483520508, "learning_rate": 9.969362904465226e-06, "loss": 4.7802, "step": 15650 }, { "epoch": 0.015467005036135652, "grad_norm": 2.962284564971924, "learning_rate": 9.969264386568464e-06, "loss": 4.7659, "step": 15700 }, { "epoch": 0.01551626301395774, "grad_norm": 2.9043500423431396, "learning_rate": 9.969165868671704e-06, "loss": 4.8493, "step": 15750 }, { "epoch": 0.01556552099177983, "grad_norm": 2.8906874656677246, "learning_rate": 9.969067350774943e-06, "loss": 4.832, "step": 15800 }, { "epoch": 0.015614778969601917, "grad_norm": 2.635098457336426, "learning_rate": 9.968968832878183e-06, "loss": 4.7984, "step": 15850 }, { "epoch": 0.015664036947424003, "grad_norm": 2.9309582710266113, "learning_rate": 9.96887031498142e-06, "loss": 4.8289, "step": 15900 }, { "epoch": 0.015713294925246094, "grad_norm": 2.7389392852783203, "learning_rate": 9.968771797084659e-06, "loss": 4.8491, "step": 15950 }, { "epoch": 0.015762552903068182, "grad_norm": 2.7155375480651855, "learning_rate": 9.968673279187898e-06, "loss": 4.817, "step": 16000 }, { "epoch": 0.01581181088089027, "grad_norm": 3.205986976623535, "learning_rate": 9.968574761291136e-06, "loss": 4.7906, "step": 16050 }, { "epoch": 0.015861068858712357, "grad_norm": 2.7359631061553955, "learning_rate": 9.968476243394376e-06, "loss": 4.7927, "step": 16100 }, { "epoch": 0.015910326836534445, "grad_norm": 2.73769474029541, "learning_rate": 9.968377725497614e-06, "loss": 4.8115, "step": 16150 }, { "epoch": 0.015959584814356532, "grad_norm": 2.722442388534546, "learning_rate": 9.968279207600853e-06, "loss": 4.8113, "step": 16200 }, { "epoch": 0.01600884279217862, "grad_norm": 3.076115846633911, "learning_rate": 9.968180689704093e-06, "loss": 4.8335, "step": 16250 }, { "epoch": 0.016058100770000708, "grad_norm": 3.232151985168457, "learning_rate": 9.968082171807331e-06, "loss": 4.7885, "step": 16300 }, { "epoch": 0.0161073587478228, "grad_norm": 2.7500979900360107, "learning_rate": 9.96798365391057e-06, "loss": 4.8502, "step": 16350 }, { "epoch": 0.016156616725644887, "grad_norm": 3.8684093952178955, "learning_rate": 9.96788513601381e-06, "loss": 4.8037, "step": 16400 }, { "epoch": 0.016205874703466974, "grad_norm": 3.7351253032684326, "learning_rate": 9.967786618117048e-06, "loss": 4.7569, "step": 16450 }, { "epoch": 0.016255132681289062, "grad_norm": 2.939333438873291, "learning_rate": 9.967688100220286e-06, "loss": 4.8306, "step": 16500 }, { "epoch": 0.01630439065911115, "grad_norm": 3.126208543777466, "learning_rate": 9.967589582323526e-06, "loss": 4.8278, "step": 16550 }, { "epoch": 0.016353648636933237, "grad_norm": 2.5775349140167236, "learning_rate": 9.967491064426764e-06, "loss": 4.838, "step": 16600 }, { "epoch": 0.016402906614755325, "grad_norm": 2.6388967037200928, "learning_rate": 9.967392546530003e-06, "loss": 4.7967, "step": 16650 }, { "epoch": 0.016452164592577413, "grad_norm": 3.136052131652832, "learning_rate": 9.967294028633241e-06, "loss": 4.8092, "step": 16700 }, { "epoch": 0.016501422570399504, "grad_norm": 2.507441520690918, "learning_rate": 9.96719551073648e-06, "loss": 4.8197, "step": 16750 }, { "epoch": 0.01655068054822159, "grad_norm": 2.833686351776123, "learning_rate": 9.96709699283972e-06, "loss": 4.7749, "step": 16800 }, { "epoch": 0.01659993852604368, "grad_norm": 3.32494854927063, "learning_rate": 9.96699847494296e-06, "loss": 4.7853, "step": 16850 }, { "epoch": 0.016649196503865767, "grad_norm": 2.7150046825408936, "learning_rate": 9.966899957046198e-06, "loss": 4.7526, "step": 16900 }, { "epoch": 0.016698454481687854, "grad_norm": 2.52634859085083, "learning_rate": 9.966801439149438e-06, "loss": 4.7693, "step": 16950 }, { "epoch": 0.016747712459509942, "grad_norm": 3.0011000633239746, "learning_rate": 9.966702921252676e-06, "loss": 4.8027, "step": 17000 }, { "epoch": 0.01679697043733203, "grad_norm": 3.262936592102051, "learning_rate": 9.966604403355913e-06, "loss": 4.7472, "step": 17050 }, { "epoch": 0.016846228415154117, "grad_norm": 2.754255771636963, "learning_rate": 9.966505885459153e-06, "loss": 4.7705, "step": 17100 }, { "epoch": 0.016895486392976205, "grad_norm": 3.0748441219329834, "learning_rate": 9.966407367562391e-06, "loss": 4.7362, "step": 17150 }, { "epoch": 0.016944744370798296, "grad_norm": 2.397820472717285, "learning_rate": 9.96630884966563e-06, "loss": 4.7954, "step": 17200 }, { "epoch": 0.016994002348620384, "grad_norm": 2.6865358352661133, "learning_rate": 9.96621033176887e-06, "loss": 4.7928, "step": 17250 }, { "epoch": 0.01704326032644247, "grad_norm": 2.6370294094085693, "learning_rate": 9.96611181387211e-06, "loss": 4.7236, "step": 17300 }, { "epoch": 0.01709251830426456, "grad_norm": 2.90108060836792, "learning_rate": 9.966013295975348e-06, "loss": 4.756, "step": 17350 }, { "epoch": 0.017141776282086647, "grad_norm": 2.5440845489501953, "learning_rate": 9.965914778078587e-06, "loss": 4.7545, "step": 17400 }, { "epoch": 0.017191034259908734, "grad_norm": 2.8412258625030518, "learning_rate": 9.965816260181825e-06, "loss": 4.8121, "step": 17450 }, { "epoch": 0.017240292237730822, "grad_norm": 2.730078935623169, "learning_rate": 9.965717742285065e-06, "loss": 4.7686, "step": 17500 }, { "epoch": 0.01728955021555291, "grad_norm": 3.051495313644409, "learning_rate": 9.965619224388303e-06, "loss": 4.7846, "step": 17550 }, { "epoch": 0.017338808193375, "grad_norm": 3.127047300338745, "learning_rate": 9.965520706491541e-06, "loss": 4.7092, "step": 17600 }, { "epoch": 0.01738806617119709, "grad_norm": 2.6953890323638916, "learning_rate": 9.96542218859478e-06, "loss": 4.7411, "step": 17650 }, { "epoch": 0.017437324149019176, "grad_norm": 2.555135488510132, "learning_rate": 9.96532367069802e-06, "loss": 4.7444, "step": 17700 }, { "epoch": 0.017486582126841264, "grad_norm": 2.8850784301757812, "learning_rate": 9.965225152801258e-06, "loss": 4.7264, "step": 17750 }, { "epoch": 0.01753584010466335, "grad_norm": 3.1482346057891846, "learning_rate": 9.965126634904498e-06, "loss": 4.7424, "step": 17800 }, { "epoch": 0.01758509808248544, "grad_norm": 2.9815826416015625, "learning_rate": 9.965028117007737e-06, "loss": 4.7988, "step": 17850 }, { "epoch": 0.017634356060307527, "grad_norm": 2.593313455581665, "learning_rate": 9.964929599110975e-06, "loss": 4.8004, "step": 17900 }, { "epoch": 0.017683614038129614, "grad_norm": 2.5208353996276855, "learning_rate": 9.964831081214215e-06, "loss": 4.7578, "step": 17950 }, { "epoch": 0.017732872015951702, "grad_norm": 2.3754539489746094, "learning_rate": 9.964732563317453e-06, "loss": 4.7409, "step": 18000 }, { "epoch": 0.017782129993773793, "grad_norm": 3.008375644683838, "learning_rate": 9.964634045420692e-06, "loss": 4.784, "step": 18050 }, { "epoch": 0.01783138797159588, "grad_norm": 3.5501465797424316, "learning_rate": 9.96453552752393e-06, "loss": 4.7189, "step": 18100 }, { "epoch": 0.01788064594941797, "grad_norm": 2.732070207595825, "learning_rate": 9.96443700962717e-06, "loss": 4.6844, "step": 18150 }, { "epoch": 0.017929903927240056, "grad_norm": 2.8019258975982666, "learning_rate": 9.964338491730408e-06, "loss": 4.6937, "step": 18200 }, { "epoch": 0.017979161905062144, "grad_norm": 2.3495283126831055, "learning_rate": 9.964239973833648e-06, "loss": 4.7496, "step": 18250 }, { "epoch": 0.01802841988288423, "grad_norm": 2.951258897781372, "learning_rate": 9.964141455936887e-06, "loss": 4.7037, "step": 18300 }, { "epoch": 0.01807767786070632, "grad_norm": 2.8172523975372314, "learning_rate": 9.964042938040125e-06, "loss": 4.7194, "step": 18350 }, { "epoch": 0.018126935838528407, "grad_norm": 2.6205809116363525, "learning_rate": 9.963944420143365e-06, "loss": 4.7109, "step": 18400 }, { "epoch": 0.018176193816350498, "grad_norm": 2.723106861114502, "learning_rate": 9.963845902246603e-06, "loss": 4.7064, "step": 18450 }, { "epoch": 0.018225451794172585, "grad_norm": 2.867903709411621, "learning_rate": 9.963747384349842e-06, "loss": 4.7304, "step": 18500 }, { "epoch": 0.018274709771994673, "grad_norm": 2.6120004653930664, "learning_rate": 9.96364886645308e-06, "loss": 4.6865, "step": 18550 }, { "epoch": 0.01832396774981676, "grad_norm": 2.9803898334503174, "learning_rate": 9.96355034855632e-06, "loss": 4.6462, "step": 18600 }, { "epoch": 0.01837322572763885, "grad_norm": 2.8633246421813965, "learning_rate": 9.963451830659558e-06, "loss": 4.643, "step": 18650 }, { "epoch": 0.018422483705460936, "grad_norm": 2.9035658836364746, "learning_rate": 9.963353312762797e-06, "loss": 4.6612, "step": 18700 }, { "epoch": 0.018471741683283024, "grad_norm": 3.018044948577881, "learning_rate": 9.963254794866037e-06, "loss": 4.7231, "step": 18750 }, { "epoch": 0.01852099966110511, "grad_norm": 3.025055408477783, "learning_rate": 9.963156276969275e-06, "loss": 4.7165, "step": 18800 }, { "epoch": 0.0185702576389272, "grad_norm": 3.1283817291259766, "learning_rate": 9.963057759072515e-06, "loss": 4.6978, "step": 18850 }, { "epoch": 0.01861951561674929, "grad_norm": 2.801934003829956, "learning_rate": 9.962959241175753e-06, "loss": 4.6834, "step": 18900 }, { "epoch": 0.018668773594571378, "grad_norm": 2.7277987003326416, "learning_rate": 9.962860723278992e-06, "loss": 4.6815, "step": 18950 }, { "epoch": 0.018718031572393466, "grad_norm": 2.6831300258636475, "learning_rate": 9.96276220538223e-06, "loss": 4.6825, "step": 19000 }, { "epoch": 0.018767289550215553, "grad_norm": 3.4934778213500977, "learning_rate": 9.96266368748547e-06, "loss": 4.7283, "step": 19050 }, { "epoch": 0.01881654752803764, "grad_norm": 2.8354766368865967, "learning_rate": 9.962565169588708e-06, "loss": 4.664, "step": 19100 }, { "epoch": 0.01886580550585973, "grad_norm": 2.8361477851867676, "learning_rate": 9.962466651691947e-06, "loss": 4.7284, "step": 19150 }, { "epoch": 0.018915063483681816, "grad_norm": 2.7482402324676514, "learning_rate": 9.962368133795185e-06, "loss": 4.691, "step": 19200 }, { "epoch": 0.018964321461503904, "grad_norm": 3.1883468627929688, "learning_rate": 9.962269615898425e-06, "loss": 4.6753, "step": 19250 }, { "epoch": 0.019013579439325995, "grad_norm": 2.7762362957000732, "learning_rate": 9.962171098001664e-06, "loss": 4.7052, "step": 19300 }, { "epoch": 0.019062837417148083, "grad_norm": 2.7757973670959473, "learning_rate": 9.962072580104902e-06, "loss": 4.6868, "step": 19350 }, { "epoch": 0.01911209539497017, "grad_norm": 3.002589702606201, "learning_rate": 9.961974062208142e-06, "loss": 4.7017, "step": 19400 }, { "epoch": 0.019161353372792258, "grad_norm": 3.270817279815674, "learning_rate": 9.96187554431138e-06, "loss": 4.6161, "step": 19450 }, { "epoch": 0.019210611350614346, "grad_norm": 2.584317922592163, "learning_rate": 9.96177702641462e-06, "loss": 4.6854, "step": 19500 }, { "epoch": 0.019259869328436433, "grad_norm": 2.954659938812256, "learning_rate": 9.961678508517858e-06, "loss": 4.7057, "step": 19550 }, { "epoch": 0.01930912730625852, "grad_norm": 2.62088942527771, "learning_rate": 9.961579990621097e-06, "loss": 4.631, "step": 19600 }, { "epoch": 0.01935838528408061, "grad_norm": 2.6912055015563965, "learning_rate": 9.961481472724335e-06, "loss": 4.6033, "step": 19650 }, { "epoch": 0.019407643261902696, "grad_norm": 3.1379692554473877, "learning_rate": 9.961382954827575e-06, "loss": 4.6644, "step": 19700 }, { "epoch": 0.019456901239724787, "grad_norm": 2.9732141494750977, "learning_rate": 9.961284436930814e-06, "loss": 4.6448, "step": 19750 }, { "epoch": 0.019506159217546875, "grad_norm": 3.068929672241211, "learning_rate": 9.961185919034052e-06, "loss": 4.611, "step": 19800 }, { "epoch": 0.019555417195368963, "grad_norm": 3.177074432373047, "learning_rate": 9.961087401137292e-06, "loss": 4.5738, "step": 19850 }, { "epoch": 0.01960467517319105, "grad_norm": 2.8266665935516357, "learning_rate": 9.96098888324053e-06, "loss": 4.6535, "step": 19900 }, { "epoch": 0.019653933151013138, "grad_norm": 2.889270782470703, "learning_rate": 9.96089036534377e-06, "loss": 4.6278, "step": 19950 }, { "epoch": 0.019703191128835226, "grad_norm": 2.630666971206665, "learning_rate": 9.960791847447007e-06, "loss": 4.6061, "step": 20000 }, { "epoch": 0.019752449106657313, "grad_norm": 2.609053611755371, "learning_rate": 9.960693329550247e-06, "loss": 4.6736, "step": 20050 }, { "epoch": 0.0198017070844794, "grad_norm": 2.826141357421875, "learning_rate": 9.960594811653485e-06, "loss": 4.6866, "step": 20100 }, { "epoch": 0.019850965062301492, "grad_norm": 2.97489333152771, "learning_rate": 9.960496293756725e-06, "loss": 4.6179, "step": 20150 }, { "epoch": 0.01990022304012358, "grad_norm": 2.913787364959717, "learning_rate": 9.960397775859964e-06, "loss": 4.6686, "step": 20200 }, { "epoch": 0.019949481017945667, "grad_norm": 2.4863719940185547, "learning_rate": 9.960299257963202e-06, "loss": 4.6685, "step": 20250 }, { "epoch": 0.019998738995767755, "grad_norm": 2.8387584686279297, "learning_rate": 9.960200740066442e-06, "loss": 4.6156, "step": 20300 }, { "epoch": 0.020047996973589843, "grad_norm": 2.749788284301758, "learning_rate": 9.96010222216968e-06, "loss": 4.648, "step": 20350 }, { "epoch": 0.02009725495141193, "grad_norm": 2.7358319759368896, "learning_rate": 9.96000370427292e-06, "loss": 4.591, "step": 20400 }, { "epoch": 0.020146512929234018, "grad_norm": 2.9210987091064453, "learning_rate": 9.959905186376157e-06, "loss": 4.6453, "step": 20450 }, { "epoch": 0.020195770907056106, "grad_norm": 2.8509223461151123, "learning_rate": 9.959806668479397e-06, "loss": 4.6533, "step": 20500 }, { "epoch": 0.020245028884878193, "grad_norm": 2.7001538276672363, "learning_rate": 9.959708150582635e-06, "loss": 4.5922, "step": 20550 }, { "epoch": 0.020294286862700284, "grad_norm": 3.0613067150115967, "learning_rate": 9.959609632685874e-06, "loss": 4.6449, "step": 20600 }, { "epoch": 0.020343544840522372, "grad_norm": 2.8858015537261963, "learning_rate": 9.959511114789114e-06, "loss": 4.6368, "step": 20650 }, { "epoch": 0.02039280281834446, "grad_norm": 2.850172281265259, "learning_rate": 9.959412596892352e-06, "loss": 4.5766, "step": 20700 }, { "epoch": 0.020442060796166547, "grad_norm": 4.303860187530518, "learning_rate": 9.959314078995592e-06, "loss": 4.6691, "step": 20750 }, { "epoch": 0.020491318773988635, "grad_norm": 3.1398849487304688, "learning_rate": 9.95921556109883e-06, "loss": 4.6266, "step": 20800 }, { "epoch": 0.020540576751810723, "grad_norm": 3.1535465717315674, "learning_rate": 9.95911704320207e-06, "loss": 4.6393, "step": 20850 }, { "epoch": 0.02058983472963281, "grad_norm": 2.909069299697876, "learning_rate": 9.959018525305307e-06, "loss": 4.6261, "step": 20900 }, { "epoch": 0.020639092707454898, "grad_norm": 3.134897470474243, "learning_rate": 9.958920007408547e-06, "loss": 4.6733, "step": 20950 }, { "epoch": 0.02068835068527699, "grad_norm": 3.3675618171691895, "learning_rate": 9.958821489511785e-06, "loss": 4.6452, "step": 21000 }, { "epoch": 0.020737608663099077, "grad_norm": 3.3791799545288086, "learning_rate": 9.958722971615024e-06, "loss": 4.5812, "step": 21050 }, { "epoch": 0.020786866640921164, "grad_norm": 3.0720391273498535, "learning_rate": 9.958624453718262e-06, "loss": 4.5883, "step": 21100 }, { "epoch": 0.020836124618743252, "grad_norm": 3.6765329837799072, "learning_rate": 9.958525935821502e-06, "loss": 4.604, "step": 21150 }, { "epoch": 0.02088538259656534, "grad_norm": 2.7886910438537598, "learning_rate": 9.958427417924742e-06, "loss": 4.6039, "step": 21200 }, { "epoch": 0.020934640574387427, "grad_norm": 2.8640899658203125, "learning_rate": 9.958328900027981e-06, "loss": 4.6267, "step": 21250 }, { "epoch": 0.020983898552209515, "grad_norm": 2.540313243865967, "learning_rate": 9.958230382131219e-06, "loss": 4.5353, "step": 21300 }, { "epoch": 0.021033156530031603, "grad_norm": 2.679269790649414, "learning_rate": 9.958131864234457e-06, "loss": 4.6805, "step": 21350 }, { "epoch": 0.02108241450785369, "grad_norm": 2.53497314453125, "learning_rate": 9.958033346337697e-06, "loss": 4.6009, "step": 21400 }, { "epoch": 0.02113167248567578, "grad_norm": 2.8101539611816406, "learning_rate": 9.957934828440935e-06, "loss": 4.5344, "step": 21450 }, { "epoch": 0.02118093046349787, "grad_norm": 2.8896231651306152, "learning_rate": 9.957836310544174e-06, "loss": 4.6157, "step": 21500 }, { "epoch": 0.021230188441319957, "grad_norm": 2.7836410999298096, "learning_rate": 9.957737792647412e-06, "loss": 4.5424, "step": 21550 }, { "epoch": 0.021279446419142044, "grad_norm": 3.245649576187134, "learning_rate": 9.957639274750652e-06, "loss": 4.5937, "step": 21600 }, { "epoch": 0.021328704396964132, "grad_norm": 3.2713537216186523, "learning_rate": 9.957540756853891e-06, "loss": 4.5765, "step": 21650 }, { "epoch": 0.02137796237478622, "grad_norm": 2.6175053119659424, "learning_rate": 9.95744223895713e-06, "loss": 4.5474, "step": 21700 }, { "epoch": 0.021427220352608307, "grad_norm": 2.647395372390747, "learning_rate": 9.957343721060369e-06, "loss": 4.6384, "step": 21750 }, { "epoch": 0.021476478330430395, "grad_norm": 2.63905930519104, "learning_rate": 9.957245203163609e-06, "loss": 4.6055, "step": 21800 }, { "epoch": 0.021525736308252486, "grad_norm": 3.3471617698669434, "learning_rate": 9.957146685266846e-06, "loss": 4.6197, "step": 21850 }, { "epoch": 0.021574994286074574, "grad_norm": 2.6453027725219727, "learning_rate": 9.957048167370084e-06, "loss": 4.5625, "step": 21900 }, { "epoch": 0.02162425226389666, "grad_norm": 2.775284767150879, "learning_rate": 9.956949649473324e-06, "loss": 4.5531, "step": 21950 }, { "epoch": 0.02167351024171875, "grad_norm": 2.6043055057525635, "learning_rate": 9.956851131576562e-06, "loss": 4.6084, "step": 22000 }, { "epoch": 0.021722768219540837, "grad_norm": 2.8373444080352783, "learning_rate": 9.956752613679802e-06, "loss": 4.617, "step": 22050 }, { "epoch": 0.021772026197362924, "grad_norm": 2.9421136379241943, "learning_rate": 9.956654095783041e-06, "loss": 4.5574, "step": 22100 }, { "epoch": 0.021821284175185012, "grad_norm": 2.6762993335723877, "learning_rate": 9.95655557788628e-06, "loss": 4.6044, "step": 22150 }, { "epoch": 0.0218705421530071, "grad_norm": 2.8368875980377197, "learning_rate": 9.956457059989519e-06, "loss": 4.5705, "step": 22200 }, { "epoch": 0.021919800130829187, "grad_norm": 2.7534544467926025, "learning_rate": 9.956358542092758e-06, "loss": 4.5357, "step": 22250 }, { "epoch": 0.02196905810865128, "grad_norm": 2.546405792236328, "learning_rate": 9.956260024195996e-06, "loss": 4.5745, "step": 22300 }, { "epoch": 0.022018316086473366, "grad_norm": 2.8086631298065186, "learning_rate": 9.956161506299236e-06, "loss": 4.5397, "step": 22350 }, { "epoch": 0.022067574064295454, "grad_norm": 3.1287665367126465, "learning_rate": 9.956062988402474e-06, "loss": 4.6081, "step": 22400 }, { "epoch": 0.02211683204211754, "grad_norm": 2.7218003273010254, "learning_rate": 9.955964470505712e-06, "loss": 4.6214, "step": 22450 }, { "epoch": 0.02216609001993963, "grad_norm": 2.703472375869751, "learning_rate": 9.955865952608951e-06, "loss": 4.5923, "step": 22500 }, { "epoch": 0.022215347997761717, "grad_norm": 2.7269394397735596, "learning_rate": 9.95576743471219e-06, "loss": 4.5588, "step": 22550 }, { "epoch": 0.022264605975583805, "grad_norm": 2.916447877883911, "learning_rate": 9.955668916815429e-06, "loss": 4.5693, "step": 22600 }, { "epoch": 0.022313863953405892, "grad_norm": 2.8139445781707764, "learning_rate": 9.955570398918669e-06, "loss": 4.5775, "step": 22650 }, { "epoch": 0.022363121931227983, "grad_norm": 2.853302478790283, "learning_rate": 9.955471881021908e-06, "loss": 4.5705, "step": 22700 }, { "epoch": 0.02241237990905007, "grad_norm": 2.9814963340759277, "learning_rate": 9.955373363125146e-06, "loss": 4.5372, "step": 22750 }, { "epoch": 0.02246163788687216, "grad_norm": 3.0024280548095703, "learning_rate": 9.955274845228386e-06, "loss": 4.5645, "step": 22800 }, { "epoch": 0.022510895864694246, "grad_norm": 3.3001363277435303, "learning_rate": 9.955176327331624e-06, "loss": 4.5601, "step": 22850 }, { "epoch": 0.022560153842516334, "grad_norm": 2.813891887664795, "learning_rate": 9.955077809434863e-06, "loss": 4.548, "step": 22900 }, { "epoch": 0.02260941182033842, "grad_norm": 2.678762674331665, "learning_rate": 9.954979291538101e-06, "loss": 4.5673, "step": 22950 }, { "epoch": 0.02265866979816051, "grad_norm": 2.853184938430786, "learning_rate": 9.95488077364134e-06, "loss": 4.5617, "step": 23000 }, { "epoch": 0.022707927775982597, "grad_norm": 3.1897239685058594, "learning_rate": 9.954782255744579e-06, "loss": 4.5146, "step": 23050 }, { "epoch": 0.022757185753804685, "grad_norm": 2.755086898803711, "learning_rate": 9.954683737847819e-06, "loss": 4.5292, "step": 23100 }, { "epoch": 0.022806443731626776, "grad_norm": 2.6911087036132812, "learning_rate": 9.954585219951058e-06, "loss": 4.5316, "step": 23150 }, { "epoch": 0.022855701709448863, "grad_norm": 3.1968307495117188, "learning_rate": 9.954486702054296e-06, "loss": 4.5349, "step": 23200 }, { "epoch": 0.02290495968727095, "grad_norm": 3.1381263732910156, "learning_rate": 9.954388184157536e-06, "loss": 4.5069, "step": 23250 }, { "epoch": 0.02295421766509304, "grad_norm": 2.8112380504608154, "learning_rate": 9.954289666260774e-06, "loss": 4.4765, "step": 23300 }, { "epoch": 0.023003475642915126, "grad_norm": 2.9694907665252686, "learning_rate": 9.954191148364013e-06, "loss": 4.5732, "step": 23350 }, { "epoch": 0.023052733620737214, "grad_norm": 2.9523050785064697, "learning_rate": 9.954092630467251e-06, "loss": 4.4865, "step": 23400 }, { "epoch": 0.0231019915985593, "grad_norm": 3.0660581588745117, "learning_rate": 9.95399411257049e-06, "loss": 4.558, "step": 23450 }, { "epoch": 0.02315124957638139, "grad_norm": 2.924361228942871, "learning_rate": 9.953895594673729e-06, "loss": 4.5436, "step": 23500 }, { "epoch": 0.02320050755420348, "grad_norm": 2.704207181930542, "learning_rate": 9.953797076776968e-06, "loss": 4.5111, "step": 23550 }, { "epoch": 0.023249765532025568, "grad_norm": 2.8359827995300293, "learning_rate": 9.953698558880206e-06, "loss": 4.5091, "step": 23600 }, { "epoch": 0.023299023509847656, "grad_norm": 2.9842169284820557, "learning_rate": 9.953600040983446e-06, "loss": 4.5456, "step": 23650 }, { "epoch": 0.023348281487669743, "grad_norm": 3.054476499557495, "learning_rate": 9.953501523086686e-06, "loss": 4.5192, "step": 23700 }, { "epoch": 0.02339753946549183, "grad_norm": 3.146836280822754, "learning_rate": 9.953403005189923e-06, "loss": 4.538, "step": 23750 }, { "epoch": 0.02344679744331392, "grad_norm": 3.0552377700805664, "learning_rate": 9.953304487293163e-06, "loss": 4.5192, "step": 23800 }, { "epoch": 0.023496055421136006, "grad_norm": 2.7679977416992188, "learning_rate": 9.953205969396401e-06, "loss": 4.5015, "step": 23850 }, { "epoch": 0.023545313398958094, "grad_norm": 2.6137256622314453, "learning_rate": 9.95310745149964e-06, "loss": 4.4609, "step": 23900 }, { "epoch": 0.02359457137678018, "grad_norm": 3.070784091949463, "learning_rate": 9.953008933602879e-06, "loss": 4.5464, "step": 23950 }, { "epoch": 0.023643829354602273, "grad_norm": 2.8737852573394775, "learning_rate": 9.952910415706118e-06, "loss": 4.5648, "step": 24000 }, { "epoch": 0.02369308733242436, "grad_norm": 2.717620372772217, "learning_rate": 9.952811897809356e-06, "loss": 4.5248, "step": 24050 }, { "epoch": 0.023742345310246448, "grad_norm": 2.733323097229004, "learning_rate": 9.952713379912596e-06, "loss": 4.5215, "step": 24100 }, { "epoch": 0.023791603288068536, "grad_norm": 2.8634002208709717, "learning_rate": 9.952614862015835e-06, "loss": 4.501, "step": 24150 }, { "epoch": 0.023840861265890623, "grad_norm": 3.025319814682007, "learning_rate": 9.952516344119073e-06, "loss": 4.5525, "step": 24200 }, { "epoch": 0.02389011924371271, "grad_norm": 2.849571466445923, "learning_rate": 9.952417826222313e-06, "loss": 4.5105, "step": 24250 }, { "epoch": 0.0239393772215348, "grad_norm": 2.958639621734619, "learning_rate": 9.952319308325551e-06, "loss": 4.4538, "step": 24300 }, { "epoch": 0.023988635199356886, "grad_norm": 3.1624443531036377, "learning_rate": 9.95222079042879e-06, "loss": 4.5027, "step": 24350 }, { "epoch": 0.024037893177178977, "grad_norm": 2.622002124786377, "learning_rate": 9.952122272532028e-06, "loss": 4.4158, "step": 24400 }, { "epoch": 0.024087151155001065, "grad_norm": 3.0104432106018066, "learning_rate": 9.952023754635268e-06, "loss": 4.4742, "step": 24450 }, { "epoch": 0.024136409132823153, "grad_norm": 2.6320149898529053, "learning_rate": 9.951925236738506e-06, "loss": 4.5087, "step": 24500 }, { "epoch": 0.02418566711064524, "grad_norm": 2.9587395191192627, "learning_rate": 9.951826718841746e-06, "loss": 4.4056, "step": 24550 }, { "epoch": 0.024234925088467328, "grad_norm": 2.920125722885132, "learning_rate": 9.951728200944985e-06, "loss": 4.4541, "step": 24600 }, { "epoch": 0.024284183066289416, "grad_norm": 3.174835205078125, "learning_rate": 9.951629683048223e-06, "loss": 4.5304, "step": 24650 }, { "epoch": 0.024333441044111503, "grad_norm": 2.8356573581695557, "learning_rate": 9.951531165151463e-06, "loss": 4.4722, "step": 24700 }, { "epoch": 0.02438269902193359, "grad_norm": 2.919393301010132, "learning_rate": 9.9514326472547e-06, "loss": 4.505, "step": 24750 }, { "epoch": 0.02443195699975568, "grad_norm": 2.974968194961548, "learning_rate": 9.95133412935794e-06, "loss": 4.4704, "step": 24800 }, { "epoch": 0.02448121497757777, "grad_norm": 3.164583683013916, "learning_rate": 9.951235611461178e-06, "loss": 4.461, "step": 24850 }, { "epoch": 0.024530472955399858, "grad_norm": 2.5640127658843994, "learning_rate": 9.951137093564418e-06, "loss": 4.4747, "step": 24900 }, { "epoch": 0.024579730933221945, "grad_norm": 2.9221560955047607, "learning_rate": 9.951038575667656e-06, "loss": 4.4707, "step": 24950 }, { "epoch": 0.024628988911044033, "grad_norm": 2.738352060317993, "learning_rate": 9.950940057770896e-06, "loss": 4.4911, "step": 25000 }, { "epoch": 0.02467824688886612, "grad_norm": 2.7208471298217773, "learning_rate": 9.950841539874133e-06, "loss": 4.4562, "step": 25050 }, { "epoch": 0.024727504866688208, "grad_norm": 3.376863718032837, "learning_rate": 9.950743021977373e-06, "loss": 4.4455, "step": 25100 }, { "epoch": 0.024776762844510296, "grad_norm": 2.95296049118042, "learning_rate": 9.950644504080613e-06, "loss": 4.4928, "step": 25150 }, { "epoch": 0.024826020822332383, "grad_norm": 3.044597625732422, "learning_rate": 9.95054598618385e-06, "loss": 4.418, "step": 25200 }, { "epoch": 0.024875278800154475, "grad_norm": 3.21134614944458, "learning_rate": 9.95044746828709e-06, "loss": 4.5426, "step": 25250 }, { "epoch": 0.024924536777976562, "grad_norm": 2.8330888748168945, "learning_rate": 9.950348950390328e-06, "loss": 4.4309, "step": 25300 }, { "epoch": 0.02497379475579865, "grad_norm": 2.7223148345947266, "learning_rate": 9.950250432493568e-06, "loss": 4.5098, "step": 25350 }, { "epoch": 0.025023052733620738, "grad_norm": 2.5765719413757324, "learning_rate": 9.950151914596806e-06, "loss": 4.4615, "step": 25400 }, { "epoch": 0.025072310711442825, "grad_norm": 2.6913654804229736, "learning_rate": 9.950053396700045e-06, "loss": 4.4401, "step": 25450 }, { "epoch": 0.025121568689264913, "grad_norm": 2.719801664352417, "learning_rate": 9.949954878803283e-06, "loss": 4.481, "step": 25500 }, { "epoch": 0.025170826667087, "grad_norm": 2.844313383102417, "learning_rate": 9.949856360906523e-06, "loss": 4.4655, "step": 25550 }, { "epoch": 0.025220084644909088, "grad_norm": 2.9843645095825195, "learning_rate": 9.949757843009763e-06, "loss": 4.462, "step": 25600 }, { "epoch": 0.025269342622731176, "grad_norm": 3.145681142807007, "learning_rate": 9.949659325113002e-06, "loss": 4.4722, "step": 25650 }, { "epoch": 0.025318600600553267, "grad_norm": 2.826963424682617, "learning_rate": 9.94956080721624e-06, "loss": 4.3509, "step": 25700 }, { "epoch": 0.025367858578375355, "grad_norm": 2.685471773147583, "learning_rate": 9.949462289319478e-06, "loss": 4.4905, "step": 25750 }, { "epoch": 0.025417116556197442, "grad_norm": 2.5837581157684326, "learning_rate": 9.949363771422718e-06, "loss": 4.4478, "step": 25800 }, { "epoch": 0.02546637453401953, "grad_norm": 2.8016183376312256, "learning_rate": 9.949265253525956e-06, "loss": 4.4359, "step": 25850 }, { "epoch": 0.025515632511841618, "grad_norm": 3.0903573036193848, "learning_rate": 9.949166735629195e-06, "loss": 4.4128, "step": 25900 }, { "epoch": 0.025564890489663705, "grad_norm": 2.566577434539795, "learning_rate": 9.949068217732433e-06, "loss": 4.4265, "step": 25950 }, { "epoch": 0.025614148467485793, "grad_norm": 2.682568073272705, "learning_rate": 9.948969699835673e-06, "loss": 4.5022, "step": 26000 }, { "epoch": 0.02566340644530788, "grad_norm": 2.708730459213257, "learning_rate": 9.948871181938912e-06, "loss": 4.4241, "step": 26050 }, { "epoch": 0.02571266442312997, "grad_norm": 2.6141726970672607, "learning_rate": 9.94877266404215e-06, "loss": 4.4263, "step": 26100 }, { "epoch": 0.02576192240095206, "grad_norm": 2.7675726413726807, "learning_rate": 9.94867414614539e-06, "loss": 4.3811, "step": 26150 }, { "epoch": 0.025811180378774147, "grad_norm": 2.8282270431518555, "learning_rate": 9.94857562824863e-06, "loss": 4.4519, "step": 26200 }, { "epoch": 0.025860438356596235, "grad_norm": 2.713754892349243, "learning_rate": 9.948477110351868e-06, "loss": 4.3615, "step": 26250 }, { "epoch": 0.025909696334418322, "grad_norm": 2.759967088699341, "learning_rate": 9.948378592455105e-06, "loss": 4.3905, "step": 26300 }, { "epoch": 0.02595895431224041, "grad_norm": 3.0386972427368164, "learning_rate": 9.948280074558345e-06, "loss": 4.4516, "step": 26350 }, { "epoch": 0.026008212290062498, "grad_norm": 2.6274845600128174, "learning_rate": 9.948181556661583e-06, "loss": 4.3733, "step": 26400 }, { "epoch": 0.026057470267884585, "grad_norm": 2.746767044067383, "learning_rate": 9.948083038764823e-06, "loss": 4.3587, "step": 26450 }, { "epoch": 0.026106728245706673, "grad_norm": 2.960312843322754, "learning_rate": 9.94798452086806e-06, "loss": 4.4111, "step": 26500 }, { "epoch": 0.026155986223528764, "grad_norm": 2.8746421337127686, "learning_rate": 9.9478860029713e-06, "loss": 4.3949, "step": 26550 }, { "epoch": 0.02620524420135085, "grad_norm": 2.835115432739258, "learning_rate": 9.94778748507454e-06, "loss": 4.4458, "step": 26600 }, { "epoch": 0.02625450217917294, "grad_norm": 2.494474411010742, "learning_rate": 9.94768896717778e-06, "loss": 4.3916, "step": 26650 }, { "epoch": 0.026303760156995027, "grad_norm": 2.898473024368286, "learning_rate": 9.947590449281017e-06, "loss": 4.4364, "step": 26700 }, { "epoch": 0.026353018134817115, "grad_norm": 2.974879503250122, "learning_rate": 9.947491931384257e-06, "loss": 4.4252, "step": 26750 }, { "epoch": 0.026402276112639202, "grad_norm": 3.0099704265594482, "learning_rate": 9.947393413487495e-06, "loss": 4.433, "step": 26800 }, { "epoch": 0.02645153409046129, "grad_norm": 2.701627254486084, "learning_rate": 9.947294895590733e-06, "loss": 4.427, "step": 26850 }, { "epoch": 0.026500792068283378, "grad_norm": 2.887014865875244, "learning_rate": 9.947196377693973e-06, "loss": 4.4071, "step": 26900 }, { "epoch": 0.02655005004610547, "grad_norm": 2.961686134338379, "learning_rate": 9.94709785979721e-06, "loss": 4.3608, "step": 26950 }, { "epoch": 0.026599308023927556, "grad_norm": 2.853254556655884, "learning_rate": 9.94699934190045e-06, "loss": 4.4153, "step": 27000 }, { "epoch": 0.026648566001749644, "grad_norm": 2.50703763961792, "learning_rate": 9.94690082400369e-06, "loss": 4.3889, "step": 27050 }, { "epoch": 0.026697823979571732, "grad_norm": 2.8789684772491455, "learning_rate": 9.94680230610693e-06, "loss": 4.3725, "step": 27100 }, { "epoch": 0.02674708195739382, "grad_norm": 2.7770514488220215, "learning_rate": 9.946703788210167e-06, "loss": 4.3919, "step": 27150 }, { "epoch": 0.026796339935215907, "grad_norm": 3.4222967624664307, "learning_rate": 9.946605270313407e-06, "loss": 4.3957, "step": 27200 }, { "epoch": 0.026845597913037995, "grad_norm": 2.6575770378112793, "learning_rate": 9.946506752416645e-06, "loss": 4.3981, "step": 27250 }, { "epoch": 0.026894855890860082, "grad_norm": 2.8452625274658203, "learning_rate": 9.946408234519884e-06, "loss": 4.3767, "step": 27300 }, { "epoch": 0.02694411386868217, "grad_norm": 3.266728639602661, "learning_rate": 9.946309716623122e-06, "loss": 4.436, "step": 27350 }, { "epoch": 0.02699337184650426, "grad_norm": 2.686967134475708, "learning_rate": 9.94621119872636e-06, "loss": 4.3606, "step": 27400 }, { "epoch": 0.02704262982432635, "grad_norm": 3.0076820850372314, "learning_rate": 9.9461126808296e-06, "loss": 4.4213, "step": 27450 }, { "epoch": 0.027091887802148436, "grad_norm": 3.0195279121398926, "learning_rate": 9.94601416293284e-06, "loss": 4.374, "step": 27500 }, { "epoch": 0.027141145779970524, "grad_norm": 3.0727953910827637, "learning_rate": 9.945915645036077e-06, "loss": 4.325, "step": 27550 }, { "epoch": 0.027190403757792612, "grad_norm": 2.797142505645752, "learning_rate": 9.945817127139317e-06, "loss": 4.3611, "step": 27600 }, { "epoch": 0.0272396617356147, "grad_norm": 2.734865665435791, "learning_rate": 9.945718609242557e-06, "loss": 4.3325, "step": 27650 }, { "epoch": 0.027288919713436787, "grad_norm": 2.6199657917022705, "learning_rate": 9.945620091345795e-06, "loss": 4.4078, "step": 27700 }, { "epoch": 0.027338177691258875, "grad_norm": 2.6894443035125732, "learning_rate": 9.945521573449034e-06, "loss": 4.3877, "step": 27750 }, { "epoch": 0.027387435669080966, "grad_norm": 2.8690712451934814, "learning_rate": 9.945423055552272e-06, "loss": 4.3637, "step": 27800 }, { "epoch": 0.027436693646903054, "grad_norm": 2.703543186187744, "learning_rate": 9.945324537655512e-06, "loss": 4.3674, "step": 27850 }, { "epoch": 0.02748595162472514, "grad_norm": 3.5251989364624023, "learning_rate": 9.94522601975875e-06, "loss": 4.392, "step": 27900 }, { "epoch": 0.02753520960254723, "grad_norm": 2.43642520904541, "learning_rate": 9.94512750186199e-06, "loss": 4.3497, "step": 27950 }, { "epoch": 0.027584467580369317, "grad_norm": 3.0210397243499756, "learning_rate": 9.945028983965227e-06, "loss": 4.325, "step": 28000 }, { "epoch": 0.027633725558191404, "grad_norm": 2.75996470451355, "learning_rate": 9.944930466068467e-06, "loss": 4.3611, "step": 28050 }, { "epoch": 0.027682983536013492, "grad_norm": 2.9363393783569336, "learning_rate": 9.944831948171707e-06, "loss": 4.3001, "step": 28100 }, { "epoch": 0.02773224151383558, "grad_norm": 3.142714738845825, "learning_rate": 9.944733430274945e-06, "loss": 4.3954, "step": 28150 }, { "epoch": 0.02778149949165767, "grad_norm": 2.8299307823181152, "learning_rate": 9.944634912378184e-06, "loss": 4.2651, "step": 28200 }, { "epoch": 0.027830757469479758, "grad_norm": 2.5900282859802246, "learning_rate": 9.944536394481422e-06, "loss": 4.3741, "step": 28250 }, { "epoch": 0.027880015447301846, "grad_norm": 3.699406623840332, "learning_rate": 9.944437876584662e-06, "loss": 4.3389, "step": 28300 }, { "epoch": 0.027929273425123934, "grad_norm": 3.636477470397949, "learning_rate": 9.9443393586879e-06, "loss": 4.3641, "step": 28350 }, { "epoch": 0.02797853140294602, "grad_norm": 4.3456130027771, "learning_rate": 9.944240840791138e-06, "loss": 4.4359, "step": 28400 }, { "epoch": 0.02802778938076811, "grad_norm": 2.626979351043701, "learning_rate": 9.944142322894377e-06, "loss": 4.2957, "step": 28450 }, { "epoch": 0.028077047358590197, "grad_norm": 2.863654375076294, "learning_rate": 9.944043804997617e-06, "loss": 4.3753, "step": 28500 }, { "epoch": 0.028126305336412284, "grad_norm": 3.0978944301605225, "learning_rate": 9.943945287100856e-06, "loss": 4.3368, "step": 28550 }, { "epoch": 0.028175563314234372, "grad_norm": 2.9103078842163086, "learning_rate": 9.943846769204094e-06, "loss": 4.3345, "step": 28600 }, { "epoch": 0.028224821292056463, "grad_norm": 2.957070827484131, "learning_rate": 9.943748251307334e-06, "loss": 4.2989, "step": 28650 }, { "epoch": 0.02827407926987855, "grad_norm": 2.9758403301239014, "learning_rate": 9.943649733410572e-06, "loss": 4.3536, "step": 28700 }, { "epoch": 0.02832333724770064, "grad_norm": 2.878831624984741, "learning_rate": 9.943551215513812e-06, "loss": 4.3593, "step": 28750 }, { "epoch": 0.028372595225522726, "grad_norm": 2.9945123195648193, "learning_rate": 9.94345269761705e-06, "loss": 4.3802, "step": 28800 }, { "epoch": 0.028421853203344814, "grad_norm": 2.517561197280884, "learning_rate": 9.943354179720289e-06, "loss": 4.292, "step": 28850 }, { "epoch": 0.0284711111811669, "grad_norm": 3.021172285079956, "learning_rate": 9.943255661823527e-06, "loss": 4.3913, "step": 28900 }, { "epoch": 0.02852036915898899, "grad_norm": 2.8447377681732178, "learning_rate": 9.943157143926767e-06, "loss": 4.3048, "step": 28950 }, { "epoch": 0.028569627136811077, "grad_norm": 2.4886667728424072, "learning_rate": 9.943058626030005e-06, "loss": 4.3455, "step": 29000 }, { "epoch": 0.028618885114633168, "grad_norm": 2.483572483062744, "learning_rate": 9.942960108133244e-06, "loss": 4.2838, "step": 29050 }, { "epoch": 0.028668143092455255, "grad_norm": 2.782280206680298, "learning_rate": 9.942861590236484e-06, "loss": 4.3446, "step": 29100 }, { "epoch": 0.028717401070277343, "grad_norm": 2.9190547466278076, "learning_rate": 9.942763072339722e-06, "loss": 4.3156, "step": 29150 }, { "epoch": 0.02876665904809943, "grad_norm": 3.011664390563965, "learning_rate": 9.942664554442961e-06, "loss": 4.315, "step": 29200 }, { "epoch": 0.02881591702592152, "grad_norm": 3.0116662979125977, "learning_rate": 9.9425660365462e-06, "loss": 4.3606, "step": 29250 }, { "epoch": 0.028865175003743606, "grad_norm": 2.777803659439087, "learning_rate": 9.942467518649439e-06, "loss": 4.3271, "step": 29300 }, { "epoch": 0.028914432981565694, "grad_norm": 2.8867990970611572, "learning_rate": 9.942369000752677e-06, "loss": 4.305, "step": 29350 }, { "epoch": 0.02896369095938778, "grad_norm": 2.7769391536712646, "learning_rate": 9.942270482855917e-06, "loss": 4.3109, "step": 29400 }, { "epoch": 0.02901294893720987, "grad_norm": 2.857863187789917, "learning_rate": 9.942171964959155e-06, "loss": 4.3277, "step": 29450 }, { "epoch": 0.02906220691503196, "grad_norm": 2.756167411804199, "learning_rate": 9.942073447062394e-06, "loss": 4.3733, "step": 29500 }, { "epoch": 0.029111464892854048, "grad_norm": 3.189030647277832, "learning_rate": 9.941974929165634e-06, "loss": 4.3219, "step": 29550 }, { "epoch": 0.029160722870676135, "grad_norm": 2.9630534648895264, "learning_rate": 9.941876411268872e-06, "loss": 4.267, "step": 29600 }, { "epoch": 0.029209980848498223, "grad_norm": 2.941082239151001, "learning_rate": 9.941777893372111e-06, "loss": 4.2504, "step": 29650 }, { "epoch": 0.02925923882632031, "grad_norm": 2.5006723403930664, "learning_rate": 9.94167937547535e-06, "loss": 4.3189, "step": 29700 }, { "epoch": 0.0293084968041424, "grad_norm": 3.473111152648926, "learning_rate": 9.941580857578589e-06, "loss": 4.29, "step": 29750 }, { "epoch": 0.029357754781964486, "grad_norm": 3.0181171894073486, "learning_rate": 9.941482339681827e-06, "loss": 4.3411, "step": 29800 }, { "epoch": 0.029407012759786574, "grad_norm": 2.606924057006836, "learning_rate": 9.941383821785066e-06, "loss": 4.3287, "step": 29850 }, { "epoch": 0.029456270737608665, "grad_norm": 2.7728309631347656, "learning_rate": 9.941285303888304e-06, "loss": 4.2773, "step": 29900 }, { "epoch": 0.029505528715430752, "grad_norm": 3.063476085662842, "learning_rate": 9.941186785991544e-06, "loss": 4.3055, "step": 29950 }, { "epoch": 0.02955478669325284, "grad_norm": 2.701085090637207, "learning_rate": 9.941088268094784e-06, "loss": 4.3183, "step": 30000 }, { "epoch": 0.029604044671074928, "grad_norm": 2.775357723236084, "learning_rate": 9.940989750198022e-06, "loss": 4.3379, "step": 30050 }, { "epoch": 0.029653302648897015, "grad_norm": 2.993272304534912, "learning_rate": 9.940891232301261e-06, "loss": 4.2457, "step": 30100 }, { "epoch": 0.029702560626719103, "grad_norm": 3.1036784648895264, "learning_rate": 9.940792714404499e-06, "loss": 4.3579, "step": 30150 }, { "epoch": 0.02975181860454119, "grad_norm": 3.0962276458740234, "learning_rate": 9.940694196507739e-06, "loss": 4.3211, "step": 30200 }, { "epoch": 0.02980107658236328, "grad_norm": 3.196259021759033, "learning_rate": 9.940595678610977e-06, "loss": 4.2959, "step": 30250 }, { "epoch": 0.029850334560185366, "grad_norm": 2.890075445175171, "learning_rate": 9.940497160714216e-06, "loss": 4.2774, "step": 30300 }, { "epoch": 0.029899592538007457, "grad_norm": 2.652261972427368, "learning_rate": 9.940398642817454e-06, "loss": 4.2812, "step": 30350 }, { "epoch": 0.029948850515829545, "grad_norm": 2.8092494010925293, "learning_rate": 9.940300124920694e-06, "loss": 4.3164, "step": 30400 }, { "epoch": 0.029998108493651632, "grad_norm": 2.6249241828918457, "learning_rate": 9.940201607023933e-06, "loss": 4.324, "step": 30450 }, { "epoch": 0.03004736647147372, "grad_norm": 2.8325743675231934, "learning_rate": 9.940103089127171e-06, "loss": 4.2736, "step": 30500 }, { "epoch": 0.030096624449295808, "grad_norm": 2.9740049839019775, "learning_rate": 9.940004571230411e-06, "loss": 4.2682, "step": 30550 }, { "epoch": 0.030145882427117895, "grad_norm": 2.6152381896972656, "learning_rate": 9.939906053333649e-06, "loss": 4.3285, "step": 30600 }, { "epoch": 0.030195140404939983, "grad_norm": 3.0052435398101807, "learning_rate": 9.939807535436889e-06, "loss": 4.163, "step": 30650 }, { "epoch": 0.03024439838276207, "grad_norm": 2.832188606262207, "learning_rate": 9.939709017540127e-06, "loss": 4.2838, "step": 30700 }, { "epoch": 0.030293656360584162, "grad_norm": 3.2174715995788574, "learning_rate": 9.939610499643366e-06, "loss": 4.2741, "step": 30750 }, { "epoch": 0.03034291433840625, "grad_norm": 3.045895576477051, "learning_rate": 9.939511981746604e-06, "loss": 4.3186, "step": 30800 }, { "epoch": 0.030392172316228337, "grad_norm": 2.9008102416992188, "learning_rate": 9.939413463849844e-06, "loss": 4.2653, "step": 30850 }, { "epoch": 0.030441430294050425, "grad_norm": 3.228961706161499, "learning_rate": 9.939314945953082e-06, "loss": 4.263, "step": 30900 }, { "epoch": 0.030490688271872513, "grad_norm": 2.837714195251465, "learning_rate": 9.939216428056321e-06, "loss": 4.2651, "step": 30950 }, { "epoch": 0.0305399462496946, "grad_norm": 2.987471580505371, "learning_rate": 9.939117910159561e-06, "loss": 4.2813, "step": 31000 }, { "epoch": 0.030589204227516688, "grad_norm": 2.773286819458008, "learning_rate": 9.9390193922628e-06, "loss": 4.2788, "step": 31050 }, { "epoch": 0.030638462205338775, "grad_norm": 5.4688825607299805, "learning_rate": 9.938920874366038e-06, "loss": 4.2912, "step": 31100 }, { "epoch": 0.030687720183160863, "grad_norm": 2.573085308074951, "learning_rate": 9.938822356469276e-06, "loss": 4.2504, "step": 31150 }, { "epoch": 0.030736978160982954, "grad_norm": 3.1248066425323486, "learning_rate": 9.938723838572516e-06, "loss": 4.279, "step": 31200 }, { "epoch": 0.030786236138805042, "grad_norm": 2.7262656688690186, "learning_rate": 9.938625320675754e-06, "loss": 4.2661, "step": 31250 }, { "epoch": 0.03083549411662713, "grad_norm": 2.6642582416534424, "learning_rate": 9.938526802778994e-06, "loss": 4.2884, "step": 31300 }, { "epoch": 0.030884752094449217, "grad_norm": 2.5909652709960938, "learning_rate": 9.938428284882232e-06, "loss": 4.2218, "step": 31350 }, { "epoch": 0.030934010072271305, "grad_norm": 3.19968843460083, "learning_rate": 9.938329766985471e-06, "loss": 4.2665, "step": 31400 }, { "epoch": 0.030983268050093393, "grad_norm": 2.6201486587524414, "learning_rate": 9.93823124908871e-06, "loss": 4.2467, "step": 31450 }, { "epoch": 0.03103252602791548, "grad_norm": 2.6821377277374268, "learning_rate": 9.93813273119195e-06, "loss": 4.2395, "step": 31500 }, { "epoch": 0.031081784005737568, "grad_norm": 2.692659616470337, "learning_rate": 9.938034213295188e-06, "loss": 4.27, "step": 31550 }, { "epoch": 0.03113104198355966, "grad_norm": 2.770172357559204, "learning_rate": 9.937935695398428e-06, "loss": 4.2525, "step": 31600 }, { "epoch": 0.031180299961381747, "grad_norm": 2.8454737663269043, "learning_rate": 9.937837177501666e-06, "loss": 4.3315, "step": 31650 }, { "epoch": 0.031229557939203834, "grad_norm": 2.8132808208465576, "learning_rate": 9.937738659604904e-06, "loss": 4.2667, "step": 31700 }, { "epoch": 0.03127881591702592, "grad_norm": 3.2848119735717773, "learning_rate": 9.937640141708143e-06, "loss": 4.2363, "step": 31750 }, { "epoch": 0.031328073894848006, "grad_norm": 2.7837979793548584, "learning_rate": 9.937541623811381e-06, "loss": 4.2621, "step": 31800 }, { "epoch": 0.0313773318726701, "grad_norm": 3.278162956237793, "learning_rate": 9.937443105914621e-06, "loss": 4.2258, "step": 31850 }, { "epoch": 0.03142658985049219, "grad_norm": 3.696763038635254, "learning_rate": 9.93734458801786e-06, "loss": 4.2227, "step": 31900 }, { "epoch": 0.03147584782831427, "grad_norm": 3.0131752490997314, "learning_rate": 9.937246070121099e-06, "loss": 4.1841, "step": 31950 }, { "epoch": 0.031525105806136364, "grad_norm": 2.6339046955108643, "learning_rate": 9.937147552224338e-06, "loss": 4.2495, "step": 32000 }, { "epoch": 0.03157436378395845, "grad_norm": 2.831568956375122, "learning_rate": 9.937049034327578e-06, "loss": 4.2901, "step": 32050 }, { "epoch": 0.03162362176178054, "grad_norm": 3.0567262172698975, "learning_rate": 9.936950516430816e-06, "loss": 4.2202, "step": 32100 }, { "epoch": 0.03167287973960262, "grad_norm": 2.884934663772583, "learning_rate": 9.936851998534055e-06, "loss": 4.2218, "step": 32150 }, { "epoch": 0.031722137717424714, "grad_norm": 2.675477981567383, "learning_rate": 9.936753480637293e-06, "loss": 4.2183, "step": 32200 }, { "epoch": 0.031771395695246805, "grad_norm": 2.8141536712646484, "learning_rate": 9.936654962740531e-06, "loss": 4.2555, "step": 32250 }, { "epoch": 0.03182065367306889, "grad_norm": 2.88698148727417, "learning_rate": 9.936556444843771e-06, "loss": 4.2358, "step": 32300 }, { "epoch": 0.03186991165089098, "grad_norm": 2.5037083625793457, "learning_rate": 9.936457926947009e-06, "loss": 4.266, "step": 32350 }, { "epoch": 0.031919169628713065, "grad_norm": 2.6572444438934326, "learning_rate": 9.936359409050248e-06, "loss": 4.2571, "step": 32400 }, { "epoch": 0.031968427606535156, "grad_norm": 2.7781388759613037, "learning_rate": 9.936260891153488e-06, "loss": 4.2409, "step": 32450 }, { "epoch": 0.03201768558435724, "grad_norm": 3.4566988945007324, "learning_rate": 9.936162373256728e-06, "loss": 4.2466, "step": 32500 }, { "epoch": 0.03206694356217933, "grad_norm": 3.0501184463500977, "learning_rate": 9.936063855359966e-06, "loss": 4.2242, "step": 32550 }, { "epoch": 0.032116201540001416, "grad_norm": 2.704448699951172, "learning_rate": 9.935965337463205e-06, "loss": 4.263, "step": 32600 }, { "epoch": 0.03216545951782351, "grad_norm": 2.9793906211853027, "learning_rate": 9.935866819566443e-06, "loss": 4.2366, "step": 32650 }, { "epoch": 0.0322147174956456, "grad_norm": 2.798672676086426, "learning_rate": 9.935768301669683e-06, "loss": 4.1261, "step": 32700 }, { "epoch": 0.03226397547346768, "grad_norm": 2.760988235473633, "learning_rate": 9.93566978377292e-06, "loss": 4.2116, "step": 32750 }, { "epoch": 0.03231323345128977, "grad_norm": 2.995474338531494, "learning_rate": 9.935571265876159e-06, "loss": 4.2328, "step": 32800 }, { "epoch": 0.03236249142911186, "grad_norm": 2.649573802947998, "learning_rate": 9.935472747979398e-06, "loss": 4.2374, "step": 32850 }, { "epoch": 0.03241174940693395, "grad_norm": 3.1019508838653564, "learning_rate": 9.935374230082638e-06, "loss": 4.1998, "step": 32900 }, { "epoch": 0.03246100738475603, "grad_norm": 2.9016926288604736, "learning_rate": 9.935275712185878e-06, "loss": 4.2147, "step": 32950 }, { "epoch": 0.032510265362578124, "grad_norm": 2.883260488510132, "learning_rate": 9.935177194289115e-06, "loss": 4.2134, "step": 33000 }, { "epoch": 0.03255952334040021, "grad_norm": 3.6488003730773926, "learning_rate": 9.935078676392355e-06, "loss": 4.1728, "step": 33050 }, { "epoch": 0.0326087813182223, "grad_norm": 2.7432384490966797, "learning_rate": 9.934980158495593e-06, "loss": 4.2468, "step": 33100 }, { "epoch": 0.03265803929604439, "grad_norm": 2.9957826137542725, "learning_rate": 9.934881640598833e-06, "loss": 4.2498, "step": 33150 }, { "epoch": 0.032707297273866474, "grad_norm": 2.6980412006378174, "learning_rate": 9.93478312270207e-06, "loss": 4.1658, "step": 33200 }, { "epoch": 0.032756555251688566, "grad_norm": 2.9208524227142334, "learning_rate": 9.93468460480531e-06, "loss": 4.204, "step": 33250 }, { "epoch": 0.03280581322951065, "grad_norm": 2.606506586074829, "learning_rate": 9.934586086908548e-06, "loss": 4.169, "step": 33300 }, { "epoch": 0.03285507120733274, "grad_norm": 2.5529887676239014, "learning_rate": 9.934487569011788e-06, "loss": 4.2361, "step": 33350 }, { "epoch": 0.032904329185154825, "grad_norm": 3.1340713500976562, "learning_rate": 9.934389051115026e-06, "loss": 4.1706, "step": 33400 }, { "epoch": 0.032953587162976916, "grad_norm": 2.99807071685791, "learning_rate": 9.934290533218265e-06, "loss": 4.2163, "step": 33450 }, { "epoch": 0.03300284514079901, "grad_norm": 3.128819465637207, "learning_rate": 9.934192015321505e-06, "loss": 4.1475, "step": 33500 }, { "epoch": 0.03305210311862109, "grad_norm": 2.877211570739746, "learning_rate": 9.934093497424743e-06, "loss": 4.2145, "step": 33550 }, { "epoch": 0.03310136109644318, "grad_norm": 2.9433438777923584, "learning_rate": 9.933994979527983e-06, "loss": 4.1927, "step": 33600 }, { "epoch": 0.03315061907426527, "grad_norm": 3.1519973278045654, "learning_rate": 9.93389646163122e-06, "loss": 4.2248, "step": 33650 }, { "epoch": 0.03319987705208736, "grad_norm": 3.2136318683624268, "learning_rate": 9.93379794373446e-06, "loss": 4.1946, "step": 33700 }, { "epoch": 0.03324913502990944, "grad_norm": 2.62773060798645, "learning_rate": 9.933699425837698e-06, "loss": 4.212, "step": 33750 }, { "epoch": 0.03329839300773153, "grad_norm": 2.5957818031311035, "learning_rate": 9.933600907940938e-06, "loss": 4.2211, "step": 33800 }, { "epoch": 0.03334765098555362, "grad_norm": 2.8922276496887207, "learning_rate": 9.933502390044176e-06, "loss": 4.1547, "step": 33850 }, { "epoch": 0.03339690896337571, "grad_norm": 2.91373348236084, "learning_rate": 9.933403872147415e-06, "loss": 4.128, "step": 33900 }, { "epoch": 0.0334461669411978, "grad_norm": 3.1384732723236084, "learning_rate": 9.933305354250655e-06, "loss": 4.1551, "step": 33950 }, { "epoch": 0.033495424919019884, "grad_norm": 3.0681543350219727, "learning_rate": 9.933206836353893e-06, "loss": 4.1817, "step": 34000 }, { "epoch": 0.033544682896841975, "grad_norm": 2.733565092086792, "learning_rate": 9.933108318457132e-06, "loss": 4.188, "step": 34050 }, { "epoch": 0.03359394087466406, "grad_norm": 2.9783501625061035, "learning_rate": 9.93300980056037e-06, "loss": 4.1784, "step": 34100 }, { "epoch": 0.03364319885248615, "grad_norm": 3.0292632579803467, "learning_rate": 9.93291128266361e-06, "loss": 4.2044, "step": 34150 }, { "epoch": 0.033692456830308234, "grad_norm": 2.689316749572754, "learning_rate": 9.932812764766848e-06, "loss": 4.1782, "step": 34200 }, { "epoch": 0.033741714808130326, "grad_norm": 2.7311851978302, "learning_rate": 9.932714246870087e-06, "loss": 4.212, "step": 34250 }, { "epoch": 0.03379097278595241, "grad_norm": 3.307145595550537, "learning_rate": 9.932615728973325e-06, "loss": 4.2233, "step": 34300 }, { "epoch": 0.0338402307637745, "grad_norm": 2.899125576019287, "learning_rate": 9.932517211076565e-06, "loss": 4.1904, "step": 34350 }, { "epoch": 0.03388948874159659, "grad_norm": 2.9214344024658203, "learning_rate": 9.932418693179805e-06, "loss": 4.1458, "step": 34400 }, { "epoch": 0.033938746719418676, "grad_norm": 3.1509246826171875, "learning_rate": 9.932320175283043e-06, "loss": 4.2167, "step": 34450 }, { "epoch": 0.03398800469724077, "grad_norm": 2.7806036472320557, "learning_rate": 9.932221657386282e-06, "loss": 4.1926, "step": 34500 }, { "epoch": 0.03403726267506285, "grad_norm": 2.930236339569092, "learning_rate": 9.93212313948952e-06, "loss": 4.111, "step": 34550 }, { "epoch": 0.03408652065288494, "grad_norm": 3.0260918140411377, "learning_rate": 9.93202462159276e-06, "loss": 4.1488, "step": 34600 }, { "epoch": 0.03413577863070703, "grad_norm": 2.7561604976654053, "learning_rate": 9.931926103695998e-06, "loss": 4.2183, "step": 34650 }, { "epoch": 0.03418503660852912, "grad_norm": 2.8858392238616943, "learning_rate": 9.931827585799237e-06, "loss": 4.2273, "step": 34700 }, { "epoch": 0.0342342945863512, "grad_norm": 2.719752550125122, "learning_rate": 9.931729067902475e-06, "loss": 4.1792, "step": 34750 }, { "epoch": 0.03428355256417329, "grad_norm": 2.6967825889587402, "learning_rate": 9.931630550005715e-06, "loss": 4.1824, "step": 34800 }, { "epoch": 0.034332810541995384, "grad_norm": 2.947338342666626, "learning_rate": 9.931532032108953e-06, "loss": 4.1162, "step": 34850 }, { "epoch": 0.03438206851981747, "grad_norm": 3.062485933303833, "learning_rate": 9.931433514212192e-06, "loss": 4.2112, "step": 34900 }, { "epoch": 0.03443132649763956, "grad_norm": 2.715425968170166, "learning_rate": 9.931334996315432e-06, "loss": 4.1605, "step": 34950 }, { "epoch": 0.034480584475461644, "grad_norm": 2.655729293823242, "learning_rate": 9.93123647841867e-06, "loss": 4.1399, "step": 35000 }, { "epoch": 0.034529842453283735, "grad_norm": 2.7333297729492188, "learning_rate": 9.93113796052191e-06, "loss": 4.1808, "step": 35050 }, { "epoch": 0.03457910043110582, "grad_norm": 2.8066625595092773, "learning_rate": 9.931039442625148e-06, "loss": 4.2149, "step": 35100 }, { "epoch": 0.03462835840892791, "grad_norm": 2.4432833194732666, "learning_rate": 9.930940924728387e-06, "loss": 4.1546, "step": 35150 }, { "epoch": 0.03467761638675, "grad_norm": 2.8933513164520264, "learning_rate": 9.930842406831625e-06, "loss": 4.1255, "step": 35200 }, { "epoch": 0.034726874364572086, "grad_norm": 2.8086421489715576, "learning_rate": 9.930743888934865e-06, "loss": 4.163, "step": 35250 }, { "epoch": 0.03477613234239418, "grad_norm": 3.1362359523773193, "learning_rate": 9.930645371038103e-06, "loss": 4.1517, "step": 35300 }, { "epoch": 0.03482539032021626, "grad_norm": 2.80601167678833, "learning_rate": 9.930546853141342e-06, "loss": 4.1405, "step": 35350 }, { "epoch": 0.03487464829803835, "grad_norm": 2.7374587059020996, "learning_rate": 9.930448335244582e-06, "loss": 4.1281, "step": 35400 }, { "epoch": 0.034923906275860436, "grad_norm": 2.8682756423950195, "learning_rate": 9.93034981734782e-06, "loss": 4.1333, "step": 35450 }, { "epoch": 0.03497316425368253, "grad_norm": 3.0086607933044434, "learning_rate": 9.93025129945106e-06, "loss": 4.1201, "step": 35500 }, { "epoch": 0.03502242223150461, "grad_norm": 2.7637526988983154, "learning_rate": 9.930152781554297e-06, "loss": 4.1463, "step": 35550 }, { "epoch": 0.0350716802093267, "grad_norm": 3.0745182037353516, "learning_rate": 9.930054263657537e-06, "loss": 4.1661, "step": 35600 }, { "epoch": 0.035120938187148794, "grad_norm": 2.8573169708251953, "learning_rate": 9.929955745760775e-06, "loss": 4.1086, "step": 35650 }, { "epoch": 0.03517019616497088, "grad_norm": 2.6791625022888184, "learning_rate": 9.929857227864015e-06, "loss": 4.1214, "step": 35700 }, { "epoch": 0.03521945414279297, "grad_norm": 3.7227554321289062, "learning_rate": 9.929758709967253e-06, "loss": 4.111, "step": 35750 }, { "epoch": 0.03526871212061505, "grad_norm": 2.7368383407592773, "learning_rate": 9.929660192070492e-06, "loss": 4.103, "step": 35800 }, { "epoch": 0.035317970098437144, "grad_norm": 3.2191810607910156, "learning_rate": 9.929561674173732e-06, "loss": 4.0781, "step": 35850 }, { "epoch": 0.03536722807625923, "grad_norm": 2.909776449203491, "learning_rate": 9.92946315627697e-06, "loss": 4.0883, "step": 35900 }, { "epoch": 0.03541648605408132, "grad_norm": 2.6579201221466064, "learning_rate": 9.92936463838021e-06, "loss": 4.1208, "step": 35950 }, { "epoch": 0.035465744031903404, "grad_norm": 2.800654172897339, "learning_rate": 9.929266120483447e-06, "loss": 4.1171, "step": 36000 }, { "epoch": 0.035515002009725495, "grad_norm": 2.6417629718780518, "learning_rate": 9.929167602586687e-06, "loss": 4.1728, "step": 36050 }, { "epoch": 0.035564259987547586, "grad_norm": 2.5177643299102783, "learning_rate": 9.929069084689925e-06, "loss": 4.1347, "step": 36100 }, { "epoch": 0.03561351796536967, "grad_norm": 2.960146903991699, "learning_rate": 9.928970566793165e-06, "loss": 4.1211, "step": 36150 }, { "epoch": 0.03566277594319176, "grad_norm": 2.9792191982269287, "learning_rate": 9.928872048896402e-06, "loss": 4.2129, "step": 36200 }, { "epoch": 0.035712033921013846, "grad_norm": 2.6205337047576904, "learning_rate": 9.928773530999642e-06, "loss": 4.1066, "step": 36250 }, { "epoch": 0.03576129189883594, "grad_norm": 2.703212261199951, "learning_rate": 9.928675013102882e-06, "loss": 4.1596, "step": 36300 }, { "epoch": 0.03581054987665802, "grad_norm": 2.9238173961639404, "learning_rate": 9.92857649520612e-06, "loss": 4.1132, "step": 36350 }, { "epoch": 0.03585980785448011, "grad_norm": 2.7000010013580322, "learning_rate": 9.92847797730936e-06, "loss": 4.1297, "step": 36400 }, { "epoch": 0.035909065832302196, "grad_norm": 3.70847749710083, "learning_rate": 9.928379459412599e-06, "loss": 4.1398, "step": 36450 }, { "epoch": 0.03595832381012429, "grad_norm": 3.3755671977996826, "learning_rate": 9.928280941515837e-06, "loss": 4.0694, "step": 36500 }, { "epoch": 0.03600758178794638, "grad_norm": 3.498547077178955, "learning_rate": 9.928182423619075e-06, "loss": 4.1035, "step": 36550 }, { "epoch": 0.03605683976576846, "grad_norm": 2.8539059162139893, "learning_rate": 9.928083905722314e-06, "loss": 4.2017, "step": 36600 }, { "epoch": 0.036106097743590554, "grad_norm": 2.741032123565674, "learning_rate": 9.927985387825552e-06, "loss": 4.1154, "step": 36650 }, { "epoch": 0.03615535572141264, "grad_norm": 2.9323079586029053, "learning_rate": 9.927886869928792e-06, "loss": 4.1154, "step": 36700 }, { "epoch": 0.03620461369923473, "grad_norm": 2.8776886463165283, "learning_rate": 9.92778835203203e-06, "loss": 4.0792, "step": 36750 }, { "epoch": 0.03625387167705681, "grad_norm": 2.8153183460235596, "learning_rate": 9.92768983413527e-06, "loss": 4.1233, "step": 36800 }, { "epoch": 0.036303129654878905, "grad_norm": 2.682251214981079, "learning_rate": 9.927591316238509e-06, "loss": 4.1254, "step": 36850 }, { "epoch": 0.036352387632700996, "grad_norm": 3.0005993843078613, "learning_rate": 9.998702870716563e-06, "loss": 4.0933, "step": 36900 }, { "epoch": 0.03640164561052308, "grad_norm": 2.998979330062866, "learning_rate": 9.998699343571177e-06, "loss": 4.0736, "step": 36950 }, { "epoch": 0.03645090358834517, "grad_norm": 3.270879030227661, "learning_rate": 9.998695811637427e-06, "loss": 4.0595, "step": 37000 }, { "epoch": 0.036500161566167255, "grad_norm": 2.8358864784240723, "learning_rate": 9.998692274915318e-06, "loss": 4.0795, "step": 37050 }, { "epoch": 0.036549419543989346, "grad_norm": 2.8862786293029785, "learning_rate": 9.998688733404855e-06, "loss": 4.0613, "step": 37100 }, { "epoch": 0.03659867752181143, "grad_norm": 2.930955648422241, "learning_rate": 9.998685187106038e-06, "loss": 4.0911, "step": 37150 }, { "epoch": 0.03664793549963352, "grad_norm": 2.6923890113830566, "learning_rate": 9.998681636018872e-06, "loss": 4.1247, "step": 37200 }, { "epoch": 0.036697193477455606, "grad_norm": 2.752511501312256, "learning_rate": 9.99867808014336e-06, "loss": 4.0601, "step": 37250 }, { "epoch": 0.0367464514552777, "grad_norm": 2.8299922943115234, "learning_rate": 9.998674519479508e-06, "loss": 4.0436, "step": 37300 }, { "epoch": 0.03679570943309979, "grad_norm": 2.979118585586548, "learning_rate": 9.998670954027316e-06, "loss": 4.152, "step": 37350 }, { "epoch": 0.03684496741092187, "grad_norm": 2.846278190612793, "learning_rate": 9.998667383786789e-06, "loss": 4.0301, "step": 37400 }, { "epoch": 0.03689422538874396, "grad_norm": 2.6260616779327393, "learning_rate": 9.998663808757929e-06, "loss": 4.0717, "step": 37450 }, { "epoch": 0.03694348336656605, "grad_norm": 2.721881628036499, "learning_rate": 9.998660228940741e-06, "loss": 4.0508, "step": 37500 }, { "epoch": 0.03699274134438814, "grad_norm": 2.9743435382843018, "learning_rate": 9.998656644335228e-06, "loss": 4.0843, "step": 37550 }, { "epoch": 0.03704199932221022, "grad_norm": 2.772188901901245, "learning_rate": 9.998653054941392e-06, "loss": 4.1388, "step": 37600 }, { "epoch": 0.037091257300032314, "grad_norm": 2.910201072692871, "learning_rate": 9.99864946075924e-06, "loss": 4.1406, "step": 37650 }, { "epoch": 0.0371405152778544, "grad_norm": 2.79813551902771, "learning_rate": 9.998645861788772e-06, "loss": 4.0817, "step": 37700 }, { "epoch": 0.03718977325567649, "grad_norm": 2.7184360027313232, "learning_rate": 9.998642258029994e-06, "loss": 4.1197, "step": 37750 }, { "epoch": 0.03723903123349858, "grad_norm": 2.999150037765503, "learning_rate": 9.998638649482907e-06, "loss": 4.1145, "step": 37800 }, { "epoch": 0.037288289211320665, "grad_norm": 2.551774024963379, "learning_rate": 9.998635036147516e-06, "loss": 4.0886, "step": 37850 }, { "epoch": 0.037337547189142756, "grad_norm": 2.7964155673980713, "learning_rate": 9.998631418023823e-06, "loss": 4.0857, "step": 37900 }, { "epoch": 0.03738680516696484, "grad_norm": 2.9535751342773438, "learning_rate": 9.998627795111835e-06, "loss": 4.1155, "step": 37950 }, { "epoch": 0.03743606314478693, "grad_norm": 3.12491512298584, "learning_rate": 9.99862416741155e-06, "loss": 4.1098, "step": 38000 }, { "epoch": 0.037485321122609015, "grad_norm": 2.8504207134246826, "learning_rate": 9.998620534922976e-06, "loss": 4.101, "step": 38050 }, { "epoch": 0.037534579100431106, "grad_norm": 2.9374349117279053, "learning_rate": 9.998616897646115e-06, "loss": 4.027, "step": 38100 }, { "epoch": 0.03758383707825319, "grad_norm": 2.603107213973999, "learning_rate": 9.99861325558097e-06, "loss": 4.1027, "step": 38150 }, { "epoch": 0.03763309505607528, "grad_norm": 2.8215765953063965, "learning_rate": 9.998609608727545e-06, "loss": 4.1212, "step": 38200 }, { "epoch": 0.03768235303389737, "grad_norm": 2.7805447578430176, "learning_rate": 9.998605957085846e-06, "loss": 4.0814, "step": 38250 }, { "epoch": 0.03773161101171946, "grad_norm": 2.9574592113494873, "learning_rate": 9.998602300655871e-06, "loss": 4.0814, "step": 38300 }, { "epoch": 0.03778086898954155, "grad_norm": 2.7715060710906982, "learning_rate": 9.998598639437627e-06, "loss": 4.1118, "step": 38350 }, { "epoch": 0.03783012696736363, "grad_norm": 2.58461332321167, "learning_rate": 9.998594973431117e-06, "loss": 4.1173, "step": 38400 }, { "epoch": 0.03787938494518572, "grad_norm": 2.791839361190796, "learning_rate": 9.998591302636344e-06, "loss": 4.0453, "step": 38450 }, { "epoch": 0.03792864292300781, "grad_norm": 2.861480712890625, "learning_rate": 9.998587627053311e-06, "loss": 4.0709, "step": 38500 }, { "epoch": 0.0379779009008299, "grad_norm": 2.8120946884155273, "learning_rate": 9.998583946682024e-06, "loss": 4.0649, "step": 38550 }, { "epoch": 0.03802715887865199, "grad_norm": 2.9638471603393555, "learning_rate": 9.998580261522486e-06, "loss": 4.0673, "step": 38600 }, { "epoch": 0.038076416856474074, "grad_norm": 2.6576764583587646, "learning_rate": 9.998576571574698e-06, "loss": 4.1153, "step": 38650 }, { "epoch": 0.038125674834296165, "grad_norm": 2.9432432651519775, "learning_rate": 9.998572876838664e-06, "loss": 4.066, "step": 38700 }, { "epoch": 0.03817493281211825, "grad_norm": 3.0570549964904785, "learning_rate": 9.99856917731439e-06, "loss": 4.0585, "step": 38750 }, { "epoch": 0.03822419078994034, "grad_norm": 3.075031042098999, "learning_rate": 9.998565473001879e-06, "loss": 4.1043, "step": 38800 }, { "epoch": 0.038273448767762425, "grad_norm": 2.788705825805664, "learning_rate": 9.998561763901134e-06, "loss": 4.0385, "step": 38850 }, { "epoch": 0.038322706745584516, "grad_norm": 2.8631489276885986, "learning_rate": 9.998558050012155e-06, "loss": 4.0498, "step": 38900 }, { "epoch": 0.0383719647234066, "grad_norm": 2.827277898788452, "learning_rate": 9.99855433133495e-06, "loss": 4.0671, "step": 38950 }, { "epoch": 0.03842122270122869, "grad_norm": 2.749055862426758, "learning_rate": 9.998550607869525e-06, "loss": 4.0899, "step": 39000 }, { "epoch": 0.03847048067905078, "grad_norm": 2.762758731842041, "learning_rate": 9.998546879615876e-06, "loss": 4.0084, "step": 39050 }, { "epoch": 0.038519738656872866, "grad_norm": 2.815336227416992, "learning_rate": 9.998543146574012e-06, "loss": 4.1087, "step": 39100 }, { "epoch": 0.03856899663469496, "grad_norm": 2.723120927810669, "learning_rate": 9.998539408743935e-06, "loss": 4.0246, "step": 39150 }, { "epoch": 0.03861825461251704, "grad_norm": 3.1131174564361572, "learning_rate": 9.998535666125648e-06, "loss": 4.0957, "step": 39200 }, { "epoch": 0.03866751259033913, "grad_norm": 2.836998224258423, "learning_rate": 9.998531918719157e-06, "loss": 4.0328, "step": 39250 }, { "epoch": 0.03871677056816122, "grad_norm": 2.7810873985290527, "learning_rate": 9.99852816652446e-06, "loss": 4.0695, "step": 39300 }, { "epoch": 0.03876602854598331, "grad_norm": 2.840212345123291, "learning_rate": 9.998524409541568e-06, "loss": 4.0878, "step": 39350 }, { "epoch": 0.03881528652380539, "grad_norm": 2.7455148696899414, "learning_rate": 9.99852064777048e-06, "loss": 4.0332, "step": 39400 }, { "epoch": 0.038864544501627483, "grad_norm": 2.9797582626342773, "learning_rate": 9.9985168812112e-06, "loss": 4.0475, "step": 39450 }, { "epoch": 0.038913802479449575, "grad_norm": 3.239074945449829, "learning_rate": 9.998513109863734e-06, "loss": 4.0432, "step": 39500 }, { "epoch": 0.03896306045727166, "grad_norm": 3.2542123794555664, "learning_rate": 9.998509333728083e-06, "loss": 4.079, "step": 39550 }, { "epoch": 0.03901231843509375, "grad_norm": 2.9895012378692627, "learning_rate": 9.99850555280425e-06, "loss": 4.0879, "step": 39600 }, { "epoch": 0.039061576412915834, "grad_norm": 2.6337368488311768, "learning_rate": 9.998501767092241e-06, "loss": 4.0793, "step": 39650 }, { "epoch": 0.039110834390737925, "grad_norm": 2.9258251190185547, "learning_rate": 9.99849797659206e-06, "loss": 3.9796, "step": 39700 }, { "epoch": 0.03916009236856001, "grad_norm": 2.780320882797241, "learning_rate": 9.998494181303709e-06, "loss": 4.118, "step": 39750 }, { "epoch": 0.0392093503463821, "grad_norm": 2.6904499530792236, "learning_rate": 9.99849038122719e-06, "loss": 4.0857, "step": 39800 }, { "epoch": 0.039258608324204185, "grad_norm": 2.6314303874969482, "learning_rate": 9.998486576362511e-06, "loss": 4.0408, "step": 39850 }, { "epoch": 0.039307866302026276, "grad_norm": 2.7986602783203125, "learning_rate": 9.998482766709672e-06, "loss": 4.0728, "step": 39900 }, { "epoch": 0.03935712427984837, "grad_norm": 2.933122396469116, "learning_rate": 9.998478952268677e-06, "loss": 4.0494, "step": 39950 }, { "epoch": 0.03940638225767045, "grad_norm": 2.844691753387451, "learning_rate": 9.998475133039534e-06, "loss": 4.0467, "step": 40000 }, { "epoch": 0.03945564023549254, "grad_norm": 2.870483160018921, "learning_rate": 9.998471309022239e-06, "loss": 4.1035, "step": 40050 }, { "epoch": 0.039504898213314626, "grad_norm": 2.829791307449341, "learning_rate": 9.998467480216802e-06, "loss": 4.0624, "step": 40100 }, { "epoch": 0.03955415619113672, "grad_norm": 2.665922164916992, "learning_rate": 9.998463646623225e-06, "loss": 4.0059, "step": 40150 }, { "epoch": 0.0396034141689588, "grad_norm": 2.625274181365967, "learning_rate": 9.998459808241512e-06, "loss": 4.0042, "step": 40200 }, { "epoch": 0.03965267214678089, "grad_norm": 2.7577829360961914, "learning_rate": 9.998455965071664e-06, "loss": 4.0405, "step": 40250 }, { "epoch": 0.039701930124602984, "grad_norm": 3.1029510498046875, "learning_rate": 9.998452117113687e-06, "loss": 4.0802, "step": 40300 }, { "epoch": 0.03975118810242507, "grad_norm": 2.890066385269165, "learning_rate": 9.998448264367584e-06, "loss": 3.9937, "step": 40350 }, { "epoch": 0.03980044608024716, "grad_norm": 2.9326283931732178, "learning_rate": 9.998444406833361e-06, "loss": 4.0449, "step": 40400 }, { "epoch": 0.039849704058069244, "grad_norm": 2.78877329826355, "learning_rate": 9.998440544511017e-06, "loss": 3.9658, "step": 40450 }, { "epoch": 0.039898962035891335, "grad_norm": 2.731938362121582, "learning_rate": 9.99843667740056e-06, "loss": 4.0374, "step": 40500 }, { "epoch": 0.03994822001371342, "grad_norm": 2.751181125640869, "learning_rate": 9.998432805501992e-06, "loss": 4.0805, "step": 40550 }, { "epoch": 0.03999747799153551, "grad_norm": 2.676797389984131, "learning_rate": 9.998428928815316e-06, "loss": 3.9896, "step": 40600 }, { "epoch": 0.040046735969357594, "grad_norm": 2.81471586227417, "learning_rate": 9.998425047340537e-06, "loss": 3.9935, "step": 40650 }, { "epoch": 0.040095993947179685, "grad_norm": 2.654538631439209, "learning_rate": 9.998421161077658e-06, "loss": 4.021, "step": 40700 }, { "epoch": 0.040145251925001776, "grad_norm": 3.0738894939422607, "learning_rate": 9.998417270026684e-06, "loss": 4.0458, "step": 40750 }, { "epoch": 0.04019450990282386, "grad_norm": 2.681791305541992, "learning_rate": 9.998413374187616e-06, "loss": 4.0371, "step": 40800 }, { "epoch": 0.04024376788064595, "grad_norm": 2.538512706756592, "learning_rate": 9.99840947356046e-06, "loss": 4.0911, "step": 40850 }, { "epoch": 0.040293025858468036, "grad_norm": 3.414949417114258, "learning_rate": 9.99840556814522e-06, "loss": 4.0768, "step": 40900 }, { "epoch": 0.04034228383629013, "grad_norm": 2.793264150619507, "learning_rate": 9.998401657941897e-06, "loss": 3.9975, "step": 40950 }, { "epoch": 0.04039154181411221, "grad_norm": 2.9819114208221436, "learning_rate": 9.998397742950498e-06, "loss": 4.0483, "step": 41000 }, { "epoch": 0.0404407997919343, "grad_norm": 3.319779634475708, "learning_rate": 9.998393823171024e-06, "loss": 3.9783, "step": 41050 }, { "epoch": 0.04049005776975639, "grad_norm": 3.429198741912842, "learning_rate": 9.998389898603482e-06, "loss": 4.0345, "step": 41100 }, { "epoch": 0.04053931574757848, "grad_norm": 2.9804835319519043, "learning_rate": 9.998385969247872e-06, "loss": 3.9826, "step": 41150 }, { "epoch": 0.04058857372540057, "grad_norm": 2.816620111465454, "learning_rate": 9.998382035104201e-06, "loss": 4.007, "step": 41200 }, { "epoch": 0.04063783170322265, "grad_norm": 3.1493327617645264, "learning_rate": 9.998378096172472e-06, "loss": 4.0055, "step": 41250 }, { "epoch": 0.040687089681044744, "grad_norm": 2.6561801433563232, "learning_rate": 9.998374152452688e-06, "loss": 4.041, "step": 41300 }, { "epoch": 0.04073634765886683, "grad_norm": 2.9765233993530273, "learning_rate": 9.998370203944851e-06, "loss": 4.0281, "step": 41350 }, { "epoch": 0.04078560563668892, "grad_norm": 2.6032211780548096, "learning_rate": 9.998366250648968e-06, "loss": 4.0591, "step": 41400 }, { "epoch": 0.040834863614511004, "grad_norm": 3.0853097438812256, "learning_rate": 9.998362292565043e-06, "loss": 4.0403, "step": 41450 }, { "epoch": 0.040884121592333095, "grad_norm": 2.9708056449890137, "learning_rate": 9.998358329693076e-06, "loss": 3.972, "step": 41500 }, { "epoch": 0.04093337957015518, "grad_norm": 2.916759967803955, "learning_rate": 9.998354362033074e-06, "loss": 4.0309, "step": 41550 }, { "epoch": 0.04098263754797727, "grad_norm": 2.687000036239624, "learning_rate": 9.99835038958504e-06, "loss": 4.0237, "step": 41600 }, { "epoch": 0.04103189552579936, "grad_norm": 2.680605173110962, "learning_rate": 9.998346412348977e-06, "loss": 4.0151, "step": 41650 }, { "epoch": 0.041081153503621445, "grad_norm": 2.9819421768188477, "learning_rate": 9.998342430324893e-06, "loss": 4.0057, "step": 41700 }, { "epoch": 0.041130411481443536, "grad_norm": 3.719841957092285, "learning_rate": 9.998338443512785e-06, "loss": 4.0621, "step": 41750 }, { "epoch": 0.04117966945926562, "grad_norm": 2.829129934310913, "learning_rate": 9.998334451912659e-06, "loss": 4.077, "step": 41800 }, { "epoch": 0.04122892743708771, "grad_norm": 2.880561351776123, "learning_rate": 9.998330455524523e-06, "loss": 3.9817, "step": 41850 }, { "epoch": 0.041278185414909796, "grad_norm": 2.7640485763549805, "learning_rate": 9.998326454348376e-06, "loss": 4.0404, "step": 41900 }, { "epoch": 0.04132744339273189, "grad_norm": 2.9133119583129883, "learning_rate": 9.998322448384225e-06, "loss": 4.0292, "step": 41950 }, { "epoch": 0.04137670137055398, "grad_norm": 2.625391721725464, "learning_rate": 9.99831843763207e-06, "loss": 4.0255, "step": 42000 }, { "epoch": 0.04142595934837606, "grad_norm": 2.824221134185791, "learning_rate": 9.99831442209192e-06, "loss": 4.08, "step": 42050 }, { "epoch": 0.041475217326198154, "grad_norm": 2.952834129333496, "learning_rate": 9.998310401763776e-06, "loss": 4.0051, "step": 42100 }, { "epoch": 0.04152447530402024, "grad_norm": 2.7279679775238037, "learning_rate": 9.998306376647642e-06, "loss": 4.0249, "step": 42150 }, { "epoch": 0.04157373328184233, "grad_norm": 2.78761625289917, "learning_rate": 9.99830234674352e-06, "loss": 4.0253, "step": 42200 }, { "epoch": 0.04162299125966441, "grad_norm": 3.137406349182129, "learning_rate": 9.998298312051417e-06, "loss": 3.8607, "step": 42250 }, { "epoch": 0.041672249237486504, "grad_norm": 2.8308167457580566, "learning_rate": 9.998294272571336e-06, "loss": 3.9479, "step": 42300 }, { "epoch": 0.04172150721530859, "grad_norm": 4.4350056648254395, "learning_rate": 9.99829022830328e-06, "loss": 3.9547, "step": 42350 }, { "epoch": 0.04177076519313068, "grad_norm": 2.8853492736816406, "learning_rate": 9.998286179247253e-06, "loss": 3.9796, "step": 42400 }, { "epoch": 0.04182002317095277, "grad_norm": 3.0448482036590576, "learning_rate": 9.99828212540326e-06, "loss": 4.0104, "step": 42450 }, { "epoch": 0.041869281148774855, "grad_norm": 2.9808459281921387, "learning_rate": 9.998278066771303e-06, "loss": 4.0083, "step": 42500 }, { "epoch": 0.041918539126596946, "grad_norm": 3.439976692199707, "learning_rate": 9.998274003351389e-06, "loss": 4.0077, "step": 42550 }, { "epoch": 0.04196779710441903, "grad_norm": 3.0069916248321533, "learning_rate": 9.998269935143519e-06, "loss": 3.9632, "step": 42600 }, { "epoch": 0.04201705508224112, "grad_norm": 2.7382593154907227, "learning_rate": 9.998265862147698e-06, "loss": 4.0104, "step": 42650 }, { "epoch": 0.042066313060063205, "grad_norm": 2.9899399280548096, "learning_rate": 9.998261784363928e-06, "loss": 4.0595, "step": 42700 }, { "epoch": 0.0421155710378853, "grad_norm": 2.828990936279297, "learning_rate": 9.998257701792216e-06, "loss": 3.9882, "step": 42750 }, { "epoch": 0.04216482901570738, "grad_norm": 2.753329038619995, "learning_rate": 9.998253614432565e-06, "loss": 3.9874, "step": 42800 }, { "epoch": 0.04221408699352947, "grad_norm": 2.9647271633148193, "learning_rate": 9.998249522284977e-06, "loss": 4.0674, "step": 42850 }, { "epoch": 0.04226334497135156, "grad_norm": 2.7331337928771973, "learning_rate": 9.998245425349458e-06, "loss": 3.9639, "step": 42900 }, { "epoch": 0.04231260294917365, "grad_norm": 2.717965841293335, "learning_rate": 9.998241323626011e-06, "loss": 3.9842, "step": 42950 }, { "epoch": 0.04236186092699574, "grad_norm": 2.9853599071502686, "learning_rate": 9.998237217114641e-06, "loss": 3.9829, "step": 43000 }, { "epoch": 0.04241111890481782, "grad_norm": 3.0525457859039307, "learning_rate": 9.998233105815351e-06, "loss": 3.9696, "step": 43050 }, { "epoch": 0.042460376882639914, "grad_norm": 2.831200361251831, "learning_rate": 9.998228989728145e-06, "loss": 4.0234, "step": 43100 }, { "epoch": 0.042509634860462, "grad_norm": 2.6662328243255615, "learning_rate": 9.998224868853025e-06, "loss": 3.9821, "step": 43150 }, { "epoch": 0.04255889283828409, "grad_norm": 3.6581227779388428, "learning_rate": 9.99822074319e-06, "loss": 4.0025, "step": 43200 }, { "epoch": 0.04260815081610617, "grad_norm": 3.1624743938446045, "learning_rate": 9.99821661273907e-06, "loss": 3.9493, "step": 43250 }, { "epoch": 0.042657408793928264, "grad_norm": 2.837441921234131, "learning_rate": 9.99821247750024e-06, "loss": 3.9152, "step": 43300 }, { "epoch": 0.042706666771750355, "grad_norm": 2.699232339859009, "learning_rate": 9.998208337473512e-06, "loss": 3.9054, "step": 43350 }, { "epoch": 0.04275592474957244, "grad_norm": 3.163109064102173, "learning_rate": 9.998204192658894e-06, "loss": 4.0368, "step": 43400 }, { "epoch": 0.04280518272739453, "grad_norm": 2.7115020751953125, "learning_rate": 9.998200043056388e-06, "loss": 3.9917, "step": 43450 }, { "epoch": 0.042854440705216615, "grad_norm": 2.9153828620910645, "learning_rate": 9.998195888665995e-06, "loss": 4.0745, "step": 43500 }, { "epoch": 0.042903698683038706, "grad_norm": 2.712707281112671, "learning_rate": 9.998191729487724e-06, "loss": 3.9769, "step": 43550 }, { "epoch": 0.04295295666086079, "grad_norm": 2.638631820678711, "learning_rate": 9.998187565521576e-06, "loss": 3.9965, "step": 43600 }, { "epoch": 0.04300221463868288, "grad_norm": 2.7745962142944336, "learning_rate": 9.998183396767556e-06, "loss": 4.023, "step": 43650 }, { "epoch": 0.04305147261650497, "grad_norm": 2.624850273132324, "learning_rate": 9.998179223225668e-06, "loss": 4.0351, "step": 43700 }, { "epoch": 0.04310073059432706, "grad_norm": 2.922191858291626, "learning_rate": 9.998175044895916e-06, "loss": 4.0039, "step": 43750 }, { "epoch": 0.04314998857214915, "grad_norm": 2.808725357055664, "learning_rate": 9.998170861778302e-06, "loss": 3.9714, "step": 43800 }, { "epoch": 0.04319924654997123, "grad_norm": 2.8285109996795654, "learning_rate": 9.998166673872834e-06, "loss": 3.9834, "step": 43850 }, { "epoch": 0.04324850452779332, "grad_norm": 2.8687593936920166, "learning_rate": 9.998162481179512e-06, "loss": 3.9885, "step": 43900 }, { "epoch": 0.04329776250561541, "grad_norm": 3.2354822158813477, "learning_rate": 9.998158283698343e-06, "loss": 3.9727, "step": 43950 }, { "epoch": 0.0433470204834375, "grad_norm": 3.126126289367676, "learning_rate": 9.998154081429327e-06, "loss": 3.9721, "step": 44000 }, { "epoch": 0.04339627846125958, "grad_norm": 2.744157075881958, "learning_rate": 9.998149874372473e-06, "loss": 3.9549, "step": 44050 }, { "epoch": 0.043445536439081674, "grad_norm": 2.799764394760132, "learning_rate": 9.998145662527784e-06, "loss": 3.9611, "step": 44100 }, { "epoch": 0.043494794416903765, "grad_norm": 2.676302433013916, "learning_rate": 9.998141445895259e-06, "loss": 3.9342, "step": 44150 }, { "epoch": 0.04354405239472585, "grad_norm": 2.9476280212402344, "learning_rate": 9.99813722447491e-06, "loss": 4.0128, "step": 44200 }, { "epoch": 0.04359331037254794, "grad_norm": 2.8259193897247314, "learning_rate": 9.998132998266734e-06, "loss": 4.0099, "step": 44250 }, { "epoch": 0.043642568350370024, "grad_norm": 2.9133212566375732, "learning_rate": 9.998128767270741e-06, "loss": 3.945, "step": 44300 }, { "epoch": 0.043691826328192115, "grad_norm": 2.631702423095703, "learning_rate": 9.998124531486929e-06, "loss": 3.9934, "step": 44350 }, { "epoch": 0.0437410843060142, "grad_norm": 3.502730369567871, "learning_rate": 9.998120290915307e-06, "loss": 3.9934, "step": 44400 }, { "epoch": 0.04379034228383629, "grad_norm": 2.8433287143707275, "learning_rate": 9.998116045555875e-06, "loss": 4.0292, "step": 44450 }, { "epoch": 0.043839600261658375, "grad_norm": 2.8420639038085938, "learning_rate": 9.998111795408642e-06, "loss": 3.9876, "step": 44500 }, { "epoch": 0.043888858239480466, "grad_norm": 5.383781433105469, "learning_rate": 9.998107540473606e-06, "loss": 3.9193, "step": 44550 }, { "epoch": 0.04393811621730256, "grad_norm": 3.1629178524017334, "learning_rate": 9.998103280750778e-06, "loss": 3.9782, "step": 44600 }, { "epoch": 0.04398737419512464, "grad_norm": 2.86757755279541, "learning_rate": 9.998099016240157e-06, "loss": 3.9504, "step": 44650 }, { "epoch": 0.04403663217294673, "grad_norm": 3.108323812484741, "learning_rate": 9.998094746941748e-06, "loss": 4.0086, "step": 44700 }, { "epoch": 0.04408589015076882, "grad_norm": 3.0131120681762695, "learning_rate": 9.998090472855554e-06, "loss": 4.002, "step": 44750 }, { "epoch": 0.04413514812859091, "grad_norm": 2.864246368408203, "learning_rate": 9.998086193981584e-06, "loss": 3.9502, "step": 44800 }, { "epoch": 0.04418440610641299, "grad_norm": 3.1510934829711914, "learning_rate": 9.998081910319837e-06, "loss": 3.9966, "step": 44850 }, { "epoch": 0.04423366408423508, "grad_norm": 3.1508381366729736, "learning_rate": 9.99807762187032e-06, "loss": 3.9853, "step": 44900 }, { "epoch": 0.044282922062057174, "grad_norm": 2.776519775390625, "learning_rate": 9.998073328633035e-06, "loss": 3.9902, "step": 44950 }, { "epoch": 0.04433218003987926, "grad_norm": 2.839163303375244, "learning_rate": 9.998069030607988e-06, "loss": 3.8926, "step": 45000 }, { "epoch": 0.04438143801770135, "grad_norm": 2.8651914596557617, "learning_rate": 9.99806472779518e-06, "loss": 3.8854, "step": 45050 }, { "epoch": 0.044430695995523434, "grad_norm": 2.934861183166504, "learning_rate": 9.99806042019462e-06, "loss": 3.9618, "step": 45100 }, { "epoch": 0.044479953973345525, "grad_norm": 2.8418431282043457, "learning_rate": 9.99805610780631e-06, "loss": 3.9133, "step": 45150 }, { "epoch": 0.04452921195116761, "grad_norm": 2.765542984008789, "learning_rate": 9.998051790630249e-06, "loss": 3.9646, "step": 45200 }, { "epoch": 0.0445784699289897, "grad_norm": 2.6040894985198975, "learning_rate": 9.99804746866645e-06, "loss": 3.9463, "step": 45250 }, { "epoch": 0.044627727906811784, "grad_norm": 3.2018420696258545, "learning_rate": 9.998043141914913e-06, "loss": 3.9415, "step": 45300 }, { "epoch": 0.044676985884633875, "grad_norm": 2.7809054851531982, "learning_rate": 9.99803881037564e-06, "loss": 3.9434, "step": 45350 }, { "epoch": 0.04472624386245597, "grad_norm": 2.8362269401550293, "learning_rate": 9.99803447404864e-06, "loss": 3.9273, "step": 45400 }, { "epoch": 0.04477550184027805, "grad_norm": 2.647585153579712, "learning_rate": 9.998030132933912e-06, "loss": 3.9475, "step": 45450 }, { "epoch": 0.04482475981810014, "grad_norm": 2.858994722366333, "learning_rate": 9.998025787031462e-06, "loss": 3.9677, "step": 45500 }, { "epoch": 0.044874017795922226, "grad_norm": 2.8087899684906006, "learning_rate": 9.998021436341295e-06, "loss": 3.9695, "step": 45550 }, { "epoch": 0.04492327577374432, "grad_norm": 2.6973493099212646, "learning_rate": 9.998017080863416e-06, "loss": 3.8895, "step": 45600 }, { "epoch": 0.0449725337515664, "grad_norm": 3.176442861557007, "learning_rate": 9.998012720597828e-06, "loss": 3.9548, "step": 45650 }, { "epoch": 0.04502179172938849, "grad_norm": 3.080256223678589, "learning_rate": 9.998008355544534e-06, "loss": 3.9139, "step": 45700 }, { "epoch": 0.04507104970721058, "grad_norm": 2.806225061416626, "learning_rate": 9.99800398570354e-06, "loss": 3.9353, "step": 45750 }, { "epoch": 0.04512030768503267, "grad_norm": 2.7489073276519775, "learning_rate": 9.99799961107485e-06, "loss": 3.9306, "step": 45800 }, { "epoch": 0.04516956566285476, "grad_norm": 3.1545615196228027, "learning_rate": 9.997995231658467e-06, "loss": 3.9437, "step": 45850 }, { "epoch": 0.04521882364067684, "grad_norm": 3.233289957046509, "learning_rate": 9.997990847454396e-06, "loss": 3.9478, "step": 45900 }, { "epoch": 0.045268081618498934, "grad_norm": 2.925522804260254, "learning_rate": 9.997986458462642e-06, "loss": 3.9102, "step": 45950 }, { "epoch": 0.04531733959632102, "grad_norm": 2.9344165325164795, "learning_rate": 9.997982064683207e-06, "loss": 3.9114, "step": 46000 }, { "epoch": 0.04536659757414311, "grad_norm": 2.7420713901519775, "learning_rate": 9.997977666116098e-06, "loss": 3.9843, "step": 46050 }, { "epoch": 0.045415855551965194, "grad_norm": 2.9668960571289062, "learning_rate": 9.997973262761319e-06, "loss": 3.9546, "step": 46100 }, { "epoch": 0.045465113529787285, "grad_norm": 3.048109531402588, "learning_rate": 9.99796885461887e-06, "loss": 3.9249, "step": 46150 }, { "epoch": 0.04551437150760937, "grad_norm": 2.8706977367401123, "learning_rate": 9.997964441688759e-06, "loss": 3.9465, "step": 46200 }, { "epoch": 0.04556362948543146, "grad_norm": 2.6603498458862305, "learning_rate": 9.997960023970989e-06, "loss": 3.9442, "step": 46250 }, { "epoch": 0.04561288746325355, "grad_norm": 3.519437789916992, "learning_rate": 9.997955601465565e-06, "loss": 3.8905, "step": 46300 }, { "epoch": 0.045662145441075636, "grad_norm": 2.6617069244384766, "learning_rate": 9.997951174172492e-06, "loss": 3.9277, "step": 46350 }, { "epoch": 0.04571140341889773, "grad_norm": 2.7618274688720703, "learning_rate": 9.997946742091773e-06, "loss": 3.9431, "step": 46400 }, { "epoch": 0.04576066139671981, "grad_norm": 2.935378313064575, "learning_rate": 9.997942305223411e-06, "loss": 3.9216, "step": 46450 }, { "epoch": 0.0458099193745419, "grad_norm": 2.6798624992370605, "learning_rate": 9.997937863567412e-06, "loss": 3.9104, "step": 46500 }, { "epoch": 0.045859177352363986, "grad_norm": 2.7616279125213623, "learning_rate": 9.997933417123782e-06, "loss": 3.8805, "step": 46550 }, { "epoch": 0.04590843533018608, "grad_norm": 3.3764054775238037, "learning_rate": 9.99792896589252e-06, "loss": 3.9348, "step": 46600 }, { "epoch": 0.04595769330800817, "grad_norm": 2.72837233543396, "learning_rate": 9.997924509873636e-06, "loss": 3.9121, "step": 46650 }, { "epoch": 0.04600695128583025, "grad_norm": 2.9798216819763184, "learning_rate": 9.99792004906713e-06, "loss": 3.9433, "step": 46700 }, { "epoch": 0.046056209263652344, "grad_norm": 2.8937313556671143, "learning_rate": 9.99791558347301e-06, "loss": 3.9411, "step": 46750 }, { "epoch": 0.04610546724147443, "grad_norm": 2.4357857704162598, "learning_rate": 9.997911113091277e-06, "loss": 3.9535, "step": 46800 }, { "epoch": 0.04615472521929652, "grad_norm": 2.5775749683380127, "learning_rate": 9.997906637921935e-06, "loss": 3.9757, "step": 46850 }, { "epoch": 0.0462039831971186, "grad_norm": 3.0139052867889404, "learning_rate": 9.997902157964992e-06, "loss": 3.8996, "step": 46900 }, { "epoch": 0.046253241174940694, "grad_norm": 2.7500407695770264, "learning_rate": 9.99789767322045e-06, "loss": 3.8929, "step": 46950 }, { "epoch": 0.04630249915276278, "grad_norm": 3.091683864593506, "learning_rate": 9.997893183688312e-06, "loss": 3.8896, "step": 47000 }, { "epoch": 0.04635175713058487, "grad_norm": 3.2092885971069336, "learning_rate": 9.997888689368584e-06, "loss": 3.9843, "step": 47050 }, { "epoch": 0.04640101510840696, "grad_norm": 6.170140266418457, "learning_rate": 9.997884190261272e-06, "loss": 3.8261, "step": 47100 }, { "epoch": 0.046450273086229045, "grad_norm": 2.982158899307251, "learning_rate": 9.997879686366376e-06, "loss": 3.982, "step": 47150 }, { "epoch": 0.046499531064051136, "grad_norm": 2.87933349609375, "learning_rate": 9.997875177683902e-06, "loss": 3.9332, "step": 47200 }, { "epoch": 0.04654878904187322, "grad_norm": 3.083808183670044, "learning_rate": 9.997870664213858e-06, "loss": 3.9288, "step": 47250 }, { "epoch": 0.04659804701969531, "grad_norm": 2.714901924133301, "learning_rate": 9.997866145956243e-06, "loss": 3.9272, "step": 47300 }, { "epoch": 0.046647304997517396, "grad_norm": 2.779989242553711, "learning_rate": 9.997861622911063e-06, "loss": 3.9487, "step": 47350 }, { "epoch": 0.04669656297533949, "grad_norm": 2.8960483074188232, "learning_rate": 9.997857095078325e-06, "loss": 3.9274, "step": 47400 }, { "epoch": 0.04674582095316157, "grad_norm": 3.3968732357025146, "learning_rate": 9.99785256245803e-06, "loss": 3.844, "step": 47450 }, { "epoch": 0.04679507893098366, "grad_norm": 2.9894254207611084, "learning_rate": 9.997848025050184e-06, "loss": 3.9598, "step": 47500 }, { "epoch": 0.04684433690880575, "grad_norm": 2.85400128364563, "learning_rate": 9.997843482854793e-06, "loss": 3.8948, "step": 47550 }, { "epoch": 0.04689359488662784, "grad_norm": 3.113671064376831, "learning_rate": 9.997838935871855e-06, "loss": 3.9034, "step": 47600 }, { "epoch": 0.04694285286444993, "grad_norm": 2.5373196601867676, "learning_rate": 9.997834384101382e-06, "loss": 3.9098, "step": 47650 }, { "epoch": 0.04699211084227201, "grad_norm": 2.9561591148376465, "learning_rate": 9.997829827543373e-06, "loss": 3.8714, "step": 47700 }, { "epoch": 0.047041368820094104, "grad_norm": 3.1219136714935303, "learning_rate": 9.997825266197837e-06, "loss": 3.8969, "step": 47750 }, { "epoch": 0.04709062679791619, "grad_norm": 2.982424020767212, "learning_rate": 9.997820700064773e-06, "loss": 3.936, "step": 47800 }, { "epoch": 0.04713988477573828, "grad_norm": 2.849760055541992, "learning_rate": 9.99781612914419e-06, "loss": 3.9427, "step": 47850 }, { "epoch": 0.04718914275356036, "grad_norm": 2.9320459365844727, "learning_rate": 9.997811553436089e-06, "loss": 3.9398, "step": 47900 }, { "epoch": 0.047238400731382454, "grad_norm": 2.6908745765686035, "learning_rate": 9.997806972940476e-06, "loss": 3.8829, "step": 47950 }, { "epoch": 0.047287658709204546, "grad_norm": 2.7936155796051025, "learning_rate": 9.997802387657356e-06, "loss": 3.8968, "step": 48000 }, { "epoch": 0.04733691668702663, "grad_norm": 2.6581978797912598, "learning_rate": 9.997797797586733e-06, "loss": 3.8878, "step": 48050 }, { "epoch": 0.04738617466484872, "grad_norm": 2.5780277252197266, "learning_rate": 9.99779320272861e-06, "loss": 3.9156, "step": 48100 }, { "epoch": 0.047435432642670805, "grad_norm": 3.2265820503234863, "learning_rate": 9.997788603082994e-06, "loss": 3.9044, "step": 48150 }, { "epoch": 0.047484690620492896, "grad_norm": 2.7583863735198975, "learning_rate": 9.997783998649886e-06, "loss": 3.9311, "step": 48200 }, { "epoch": 0.04753394859831498, "grad_norm": 2.8111581802368164, "learning_rate": 9.997779389429295e-06, "loss": 3.9372, "step": 48250 }, { "epoch": 0.04758320657613707, "grad_norm": 2.8359758853912354, "learning_rate": 9.997774775421221e-06, "loss": 3.9307, "step": 48300 }, { "epoch": 0.04763246455395916, "grad_norm": 2.854013204574585, "learning_rate": 9.99777015662567e-06, "loss": 3.8741, "step": 48350 }, { "epoch": 0.04768172253178125, "grad_norm": 2.9451024532318115, "learning_rate": 9.997765533042645e-06, "loss": 3.8618, "step": 48400 }, { "epoch": 0.04773098050960334, "grad_norm": 2.8137495517730713, "learning_rate": 9.997760904672154e-06, "loss": 4.0054, "step": 48450 }, { "epoch": 0.04778023848742542, "grad_norm": 3.0297200679779053, "learning_rate": 9.997756271514198e-06, "loss": 3.9282, "step": 48500 }, { "epoch": 0.04782949646524751, "grad_norm": 3.200852632522583, "learning_rate": 9.997751633568785e-06, "loss": 3.8727, "step": 48550 }, { "epoch": 0.0478787544430696, "grad_norm": 2.7026126384735107, "learning_rate": 9.997746990835918e-06, "loss": 3.958, "step": 48600 }, { "epoch": 0.04792801242089169, "grad_norm": 2.683682441711426, "learning_rate": 9.997742343315597e-06, "loss": 3.9041, "step": 48650 }, { "epoch": 0.04797727039871377, "grad_norm": 2.732779026031494, "learning_rate": 9.997737691007832e-06, "loss": 3.9321, "step": 48700 }, { "epoch": 0.048026528376535864, "grad_norm": 2.772275686264038, "learning_rate": 9.997733033912627e-06, "loss": 3.9673, "step": 48750 }, { "epoch": 0.048075786354357955, "grad_norm": 3.05098557472229, "learning_rate": 9.997728372029982e-06, "loss": 3.8978, "step": 48800 }, { "epoch": 0.04812504433218004, "grad_norm": 2.4665896892547607, "learning_rate": 9.997723705359908e-06, "loss": 3.9772, "step": 48850 }, { "epoch": 0.04817430231000213, "grad_norm": 2.8390681743621826, "learning_rate": 9.997719033902405e-06, "loss": 3.9259, "step": 48900 }, { "epoch": 0.048223560287824214, "grad_norm": 2.7310116291046143, "learning_rate": 9.997714357657477e-06, "loss": 3.8332, "step": 48950 }, { "epoch": 0.048272818265646306, "grad_norm": 2.7073545455932617, "learning_rate": 9.997709676625131e-06, "loss": 3.8932, "step": 49000 }, { "epoch": 0.04832207624346839, "grad_norm": 2.6311309337615967, "learning_rate": 9.99770499080537e-06, "loss": 3.866, "step": 49050 }, { "epoch": 0.04837133422129048, "grad_norm": 3.015855312347412, "learning_rate": 9.9977003001982e-06, "loss": 3.8292, "step": 49100 }, { "epoch": 0.048420592199112565, "grad_norm": 2.935037136077881, "learning_rate": 9.997695604803624e-06, "loss": 3.8886, "step": 49150 }, { "epoch": 0.048469850176934656, "grad_norm": 3.531795024871826, "learning_rate": 9.997690904621647e-06, "loss": 3.9088, "step": 49200 }, { "epoch": 0.04851910815475675, "grad_norm": 2.874725580215454, "learning_rate": 9.997686199652273e-06, "loss": 3.8848, "step": 49250 }, { "epoch": 0.04856836613257883, "grad_norm": 2.7801880836486816, "learning_rate": 9.997681489895507e-06, "loss": 3.9368, "step": 49300 }, { "epoch": 0.04861762411040092, "grad_norm": 2.7321619987487793, "learning_rate": 9.997676775351354e-06, "loss": 3.8599, "step": 49350 }, { "epoch": 0.04866688208822301, "grad_norm": 2.704500436782837, "learning_rate": 9.997672056019817e-06, "loss": 3.8923, "step": 49400 }, { "epoch": 0.0487161400660451, "grad_norm": 2.747087001800537, "learning_rate": 9.997667331900903e-06, "loss": 3.9252, "step": 49450 }, { "epoch": 0.04876539804386718, "grad_norm": 2.991956949234009, "learning_rate": 9.997662602994615e-06, "loss": 3.8412, "step": 49500 }, { "epoch": 0.04881465602168927, "grad_norm": 2.8603739738464355, "learning_rate": 9.997657869300957e-06, "loss": 3.8522, "step": 49550 }, { "epoch": 0.04886391399951136, "grad_norm": 2.782435417175293, "learning_rate": 9.997653130819933e-06, "loss": 3.907, "step": 49600 }, { "epoch": 0.04891317197733345, "grad_norm": 3.0265748500823975, "learning_rate": 9.99764838755155e-06, "loss": 3.9365, "step": 49650 }, { "epoch": 0.04896242995515554, "grad_norm": 3.169058322906494, "learning_rate": 9.997643639495811e-06, "loss": 3.9001, "step": 49700 }, { "epoch": 0.049011687932977624, "grad_norm": 2.8705403804779053, "learning_rate": 9.997638886652722e-06, "loss": 3.8899, "step": 49750 }, { "epoch": 0.049060945910799715, "grad_norm": 2.956397771835327, "learning_rate": 9.997634129022285e-06, "loss": 3.7742, "step": 49800 }, { "epoch": 0.0491102038886218, "grad_norm": 3.173853635787964, "learning_rate": 9.997629366604504e-06, "loss": 3.8971, "step": 49850 }, { "epoch": 0.04915946186644389, "grad_norm": 3.153721570968628, "learning_rate": 9.997624599399389e-06, "loss": 3.8229, "step": 49900 }, { "epoch": 0.049208719844265975, "grad_norm": 2.917570114135742, "learning_rate": 9.997619827406937e-06, "loss": 3.9339, "step": 49950 }, { "epoch": 0.049257977822088066, "grad_norm": 2.6499991416931152, "learning_rate": 9.997615050627161e-06, "loss": 3.861, "step": 50000 }, { "epoch": 0.04930723579991016, "grad_norm": 2.5623226165771484, "learning_rate": 9.997610269060058e-06, "loss": 3.8489, "step": 50050 }, { "epoch": 0.04935649377773224, "grad_norm": 2.7566964626312256, "learning_rate": 9.997605482705636e-06, "loss": 3.9035, "step": 50100 }, { "epoch": 0.04940575175555433, "grad_norm": 2.7969563007354736, "learning_rate": 9.9976006915639e-06, "loss": 3.9001, "step": 50150 }, { "epoch": 0.049455009733376416, "grad_norm": 2.8503568172454834, "learning_rate": 9.997595895634854e-06, "loss": 3.9836, "step": 50200 }, { "epoch": 0.04950426771119851, "grad_norm": 2.833604335784912, "learning_rate": 9.997591094918501e-06, "loss": 3.8187, "step": 50250 }, { "epoch": 0.04955352568902059, "grad_norm": 2.880197763442993, "learning_rate": 9.997586289414848e-06, "loss": 3.8982, "step": 50300 }, { "epoch": 0.04960278366684268, "grad_norm": 3.1043663024902344, "learning_rate": 9.9975814791239e-06, "loss": 3.8918, "step": 50350 }, { "epoch": 0.04965204164466477, "grad_norm": 2.8828938007354736, "learning_rate": 9.99757666404566e-06, "loss": 3.8425, "step": 50400 }, { "epoch": 0.04970129962248686, "grad_norm": 2.9447174072265625, "learning_rate": 9.997571844180131e-06, "loss": 3.9095, "step": 50450 }, { "epoch": 0.04975055760030895, "grad_norm": 2.6926653385162354, "learning_rate": 9.997567019527322e-06, "loss": 3.8409, "step": 50500 }, { "epoch": 0.04979981557813103, "grad_norm": 2.8311376571655273, "learning_rate": 9.997562190087233e-06, "loss": 3.8578, "step": 50550 }, { "epoch": 0.049849073555953125, "grad_norm": 2.9068715572357178, "learning_rate": 9.997557355859872e-06, "loss": 3.8501, "step": 50600 }, { "epoch": 0.04989833153377521, "grad_norm": 2.7963788509368896, "learning_rate": 9.997552516845241e-06, "loss": 3.873, "step": 50650 }, { "epoch": 0.0499475895115973, "grad_norm": 2.8737502098083496, "learning_rate": 9.997547673043348e-06, "loss": 3.8366, "step": 50700 }, { "epoch": 0.049996847489419384, "grad_norm": 2.575472116470337, "learning_rate": 9.997542824454195e-06, "loss": 3.8675, "step": 50750 }, { "epoch": 0.050046105467241475, "grad_norm": 3.202331781387329, "learning_rate": 9.997537971077787e-06, "loss": 3.9172, "step": 50800 }, { "epoch": 0.05009536344506356, "grad_norm": 3.1182198524475098, "learning_rate": 9.99753311291413e-06, "loss": 3.815, "step": 50850 }, { "epoch": 0.05014462142288565, "grad_norm": 2.7427101135253906, "learning_rate": 9.997528249963225e-06, "loss": 3.864, "step": 50900 }, { "epoch": 0.05019387940070774, "grad_norm": 2.807387113571167, "learning_rate": 9.997523382225081e-06, "loss": 3.8465, "step": 50950 }, { "epoch": 0.050243137378529826, "grad_norm": 2.960515022277832, "learning_rate": 9.997518509699702e-06, "loss": 3.8985, "step": 51000 }, { "epoch": 0.05029239535635192, "grad_norm": 3.088902235031128, "learning_rate": 9.99751363238709e-06, "loss": 3.8436, "step": 51050 }, { "epoch": 0.050341653334174, "grad_norm": 2.700357675552368, "learning_rate": 9.997508750287252e-06, "loss": 3.8459, "step": 51100 }, { "epoch": 0.05039091131199609, "grad_norm": 2.8286375999450684, "learning_rate": 9.99750386340019e-06, "loss": 3.8428, "step": 51150 }, { "epoch": 0.050440169289818176, "grad_norm": 2.547328472137451, "learning_rate": 9.997498971725912e-06, "loss": 3.9117, "step": 51200 }, { "epoch": 0.05048942726764027, "grad_norm": 2.795450210571289, "learning_rate": 9.997494075264423e-06, "loss": 3.8858, "step": 51250 }, { "epoch": 0.05053868524546235, "grad_norm": 3.2690069675445557, "learning_rate": 9.997489174015725e-06, "loss": 3.8855, "step": 51300 }, { "epoch": 0.05058794322328444, "grad_norm": 2.7202744483947754, "learning_rate": 9.997484267979822e-06, "loss": 3.8259, "step": 51350 }, { "epoch": 0.050637201201106534, "grad_norm": 2.6972599029541016, "learning_rate": 9.997479357156722e-06, "loss": 3.841, "step": 51400 }, { "epoch": 0.05068645917892862, "grad_norm": 3.017165422439575, "learning_rate": 9.997474441546428e-06, "loss": 3.7995, "step": 51450 }, { "epoch": 0.05073571715675071, "grad_norm": 2.9616281986236572, "learning_rate": 9.997469521148944e-06, "loss": 3.8514, "step": 51500 }, { "epoch": 0.05078497513457279, "grad_norm": 2.6211206912994385, "learning_rate": 9.997464595964278e-06, "loss": 3.8974, "step": 51550 }, { "epoch": 0.050834233112394885, "grad_norm": 2.8088982105255127, "learning_rate": 9.99745966599243e-06, "loss": 3.8445, "step": 51600 }, { "epoch": 0.05088349109021697, "grad_norm": 2.513404369354248, "learning_rate": 9.997454731233407e-06, "loss": 3.8675, "step": 51650 }, { "epoch": 0.05093274906803906, "grad_norm": 2.5563173294067383, "learning_rate": 9.997449791687214e-06, "loss": 3.8445, "step": 51700 }, { "epoch": 0.05098200704586115, "grad_norm": 2.7279183864593506, "learning_rate": 9.997444847353855e-06, "loss": 3.8824, "step": 51750 }, { "epoch": 0.051031265023683235, "grad_norm": 2.6972129344940186, "learning_rate": 9.997439898233336e-06, "loss": 3.9101, "step": 51800 }, { "epoch": 0.051080523001505326, "grad_norm": 2.407561779022217, "learning_rate": 9.997434944325662e-06, "loss": 3.861, "step": 51850 }, { "epoch": 0.05112978097932741, "grad_norm": 2.736022472381592, "learning_rate": 9.997429985630834e-06, "loss": 3.8725, "step": 51900 }, { "epoch": 0.0511790389571495, "grad_norm": 2.6911637783050537, "learning_rate": 9.997425022148862e-06, "loss": 3.8678, "step": 51950 }, { "epoch": 0.051228296934971586, "grad_norm": 2.6286942958831787, "learning_rate": 9.997420053879745e-06, "loss": 3.8596, "step": 52000 }, { "epoch": 0.05127755491279368, "grad_norm": 2.5649330615997314, "learning_rate": 9.997415080823494e-06, "loss": 3.9091, "step": 52050 }, { "epoch": 0.05132681289061576, "grad_norm": 2.964287519454956, "learning_rate": 9.99741010298011e-06, "loss": 3.8209, "step": 52100 }, { "epoch": 0.05137607086843785, "grad_norm": 2.688326358795166, "learning_rate": 9.997405120349598e-06, "loss": 3.8308, "step": 52150 }, { "epoch": 0.05142532884625994, "grad_norm": 3.0511207580566406, "learning_rate": 9.997400132931962e-06, "loss": 3.8628, "step": 52200 }, { "epoch": 0.05147458682408203, "grad_norm": 2.576521158218384, "learning_rate": 9.997395140727209e-06, "loss": 3.8031, "step": 52250 }, { "epoch": 0.05152384480190412, "grad_norm": 2.754725694656372, "learning_rate": 9.997390143735345e-06, "loss": 3.8878, "step": 52300 }, { "epoch": 0.0515731027797262, "grad_norm": 2.7631216049194336, "learning_rate": 9.997385141956368e-06, "loss": 3.7406, "step": 52350 }, { "epoch": 0.051622360757548294, "grad_norm": 2.752963066101074, "learning_rate": 9.99738013539029e-06, "loss": 3.7946, "step": 52400 }, { "epoch": 0.05167161873537038, "grad_norm": 2.733215093612671, "learning_rate": 9.997375124037113e-06, "loss": 3.8472, "step": 52450 }, { "epoch": 0.05172087671319247, "grad_norm": 3.5411031246185303, "learning_rate": 9.997370107896842e-06, "loss": 3.7764, "step": 52500 }, { "epoch": 0.051770134691014554, "grad_norm": 2.967059373855591, "learning_rate": 9.997365086969482e-06, "loss": 3.8453, "step": 52550 }, { "epoch": 0.051819392668836645, "grad_norm": 2.7348504066467285, "learning_rate": 9.997360061255037e-06, "loss": 3.849, "step": 52600 }, { "epoch": 0.051868650646658736, "grad_norm": 2.618776559829712, "learning_rate": 9.997355030753512e-06, "loss": 3.8766, "step": 52650 }, { "epoch": 0.05191790862448082, "grad_norm": 2.9408116340637207, "learning_rate": 9.997349995464913e-06, "loss": 3.8434, "step": 52700 }, { "epoch": 0.05196716660230291, "grad_norm": 2.8255062103271484, "learning_rate": 9.997344955389246e-06, "loss": 3.7812, "step": 52750 }, { "epoch": 0.052016424580124995, "grad_norm": 2.517199754714966, "learning_rate": 9.997339910526512e-06, "loss": 3.8116, "step": 52800 }, { "epoch": 0.052065682557947086, "grad_norm": 3.239884853363037, "learning_rate": 9.997334860876716e-06, "loss": 3.8507, "step": 52850 }, { "epoch": 0.05211494053576917, "grad_norm": 3.4149253368377686, "learning_rate": 9.997329806439867e-06, "loss": 3.8211, "step": 52900 }, { "epoch": 0.05216419851359126, "grad_norm": 2.7143008708953857, "learning_rate": 9.997324747215965e-06, "loss": 3.8318, "step": 52950 }, { "epoch": 0.052213456491413346, "grad_norm": 3.0660479068756104, "learning_rate": 9.997319683205019e-06, "loss": 3.8197, "step": 53000 }, { "epoch": 0.05226271446923544, "grad_norm": 2.659954786300659, "learning_rate": 9.997314614407032e-06, "loss": 3.8348, "step": 53050 }, { "epoch": 0.05231197244705753, "grad_norm": 2.7625796794891357, "learning_rate": 9.997309540822008e-06, "loss": 3.8442, "step": 53100 }, { "epoch": 0.05236123042487961, "grad_norm": 2.7676100730895996, "learning_rate": 9.997304462449954e-06, "loss": 3.8337, "step": 53150 }, { "epoch": 0.0524104884027017, "grad_norm": 2.8207457065582275, "learning_rate": 9.997299379290873e-06, "loss": 3.7716, "step": 53200 }, { "epoch": 0.05245974638052379, "grad_norm": 3.1832785606384277, "learning_rate": 9.99729429134477e-06, "loss": 3.8644, "step": 53250 }, { "epoch": 0.05250900435834588, "grad_norm": 2.90838360786438, "learning_rate": 9.99728919861165e-06, "loss": 3.8319, "step": 53300 }, { "epoch": 0.05255826233616796, "grad_norm": 3.9564828872680664, "learning_rate": 9.99728410109152e-06, "loss": 3.8446, "step": 53350 }, { "epoch": 0.052607520313990054, "grad_norm": 2.7111916542053223, "learning_rate": 9.997278998784383e-06, "loss": 3.836, "step": 53400 }, { "epoch": 0.052656778291812145, "grad_norm": 2.811368465423584, "learning_rate": 9.997273891690243e-06, "loss": 3.8567, "step": 53450 }, { "epoch": 0.05270603626963423, "grad_norm": 2.774480104446411, "learning_rate": 9.997268779809107e-06, "loss": 3.8137, "step": 53500 }, { "epoch": 0.05275529424745632, "grad_norm": 2.6644179821014404, "learning_rate": 9.997263663140977e-06, "loss": 3.8439, "step": 53550 }, { "epoch": 0.052804552225278405, "grad_norm": 2.5807442665100098, "learning_rate": 9.99725854168586e-06, "loss": 3.8733, "step": 53600 }, { "epoch": 0.052853810203100496, "grad_norm": 2.9254322052001953, "learning_rate": 9.997253415443763e-06, "loss": 3.7994, "step": 53650 }, { "epoch": 0.05290306818092258, "grad_norm": 2.7968738079071045, "learning_rate": 9.997248284414686e-06, "loss": 3.8004, "step": 53700 }, { "epoch": 0.05295232615874467, "grad_norm": 2.5174827575683594, "learning_rate": 9.997243148598638e-06, "loss": 3.8081, "step": 53750 }, { "epoch": 0.053001584136566755, "grad_norm": 3.047924280166626, "learning_rate": 9.997238007995622e-06, "loss": 3.7518, "step": 53800 }, { "epoch": 0.053050842114388846, "grad_norm": 2.904470682144165, "learning_rate": 9.997232862605644e-06, "loss": 3.7937, "step": 53850 }, { "epoch": 0.05310010009221094, "grad_norm": 2.952720880508423, "learning_rate": 9.997227712428706e-06, "loss": 3.8555, "step": 53900 }, { "epoch": 0.05314935807003302, "grad_norm": 2.68854022026062, "learning_rate": 9.997222557464817e-06, "loss": 3.8202, "step": 53950 }, { "epoch": 0.05319861604785511, "grad_norm": 2.819537878036499, "learning_rate": 9.99721739771398e-06, "loss": 3.8291, "step": 54000 }, { "epoch": 0.0532478740256772, "grad_norm": 2.603846788406372, "learning_rate": 9.9972122331762e-06, "loss": 3.8093, "step": 54050 }, { "epoch": 0.05329713200349929, "grad_norm": 2.9189209938049316, "learning_rate": 9.997207063851482e-06, "loss": 3.7898, "step": 54100 }, { "epoch": 0.05334638998132137, "grad_norm": 2.524442672729492, "learning_rate": 9.99720188973983e-06, "loss": 3.7895, "step": 54150 }, { "epoch": 0.053395647959143464, "grad_norm": 2.7834534645080566, "learning_rate": 9.997196710841251e-06, "loss": 3.7529, "step": 54200 }, { "epoch": 0.05344490593696555, "grad_norm": 2.882136821746826, "learning_rate": 9.997191527155747e-06, "loss": 3.839, "step": 54250 }, { "epoch": 0.05349416391478764, "grad_norm": 2.5844919681549072, "learning_rate": 9.997186338683327e-06, "loss": 3.7814, "step": 54300 }, { "epoch": 0.05354342189260973, "grad_norm": 2.851780891418457, "learning_rate": 9.997181145423993e-06, "loss": 3.8179, "step": 54350 }, { "epoch": 0.053592679870431814, "grad_norm": 2.8690907955169678, "learning_rate": 9.99717594737775e-06, "loss": 3.7966, "step": 54400 }, { "epoch": 0.053641937848253905, "grad_norm": 2.7862234115600586, "learning_rate": 9.997170744544606e-06, "loss": 3.8176, "step": 54450 }, { "epoch": 0.05369119582607599, "grad_norm": 2.701183319091797, "learning_rate": 9.997165536924562e-06, "loss": 3.7946, "step": 54500 }, { "epoch": 0.05374045380389808, "grad_norm": 2.8167624473571777, "learning_rate": 9.997160324517625e-06, "loss": 3.7749, "step": 54550 }, { "epoch": 0.053789711781720165, "grad_norm": 2.747720718383789, "learning_rate": 9.9971551073238e-06, "loss": 3.7775, "step": 54600 }, { "epoch": 0.053838969759542256, "grad_norm": 2.782846212387085, "learning_rate": 9.99714988534309e-06, "loss": 3.8121, "step": 54650 }, { "epoch": 0.05388822773736434, "grad_norm": 2.709993839263916, "learning_rate": 9.997144658575504e-06, "loss": 3.789, "step": 54700 }, { "epoch": 0.05393748571518643, "grad_norm": 2.776766538619995, "learning_rate": 9.997139427021045e-06, "loss": 3.7715, "step": 54750 }, { "epoch": 0.05398674369300852, "grad_norm": 2.906484842300415, "learning_rate": 9.997134190679716e-06, "loss": 3.8935, "step": 54800 }, { "epoch": 0.054036001670830607, "grad_norm": 2.7916693687438965, "learning_rate": 9.997128949551525e-06, "loss": 3.7786, "step": 54850 }, { "epoch": 0.0540852596486527, "grad_norm": 2.716115951538086, "learning_rate": 9.997123703636475e-06, "loss": 3.8842, "step": 54900 }, { "epoch": 0.05413451762647478, "grad_norm": 3.2908172607421875, "learning_rate": 9.997118452934572e-06, "loss": 3.7798, "step": 54950 }, { "epoch": 0.05418377560429687, "grad_norm": 2.5399866104125977, "learning_rate": 9.99711319744582e-06, "loss": 3.8102, "step": 55000 }, { "epoch": 0.05423303358211896, "grad_norm": 2.7346315383911133, "learning_rate": 9.997107937170228e-06, "loss": 3.8097, "step": 55050 }, { "epoch": 0.05428229155994105, "grad_norm": 2.875230312347412, "learning_rate": 9.997102672107794e-06, "loss": 3.8519, "step": 55100 }, { "epoch": 0.05433154953776314, "grad_norm": 2.673006296157837, "learning_rate": 9.997097402258528e-06, "loss": 3.7879, "step": 55150 }, { "epoch": 0.054380807515585224, "grad_norm": 2.6999597549438477, "learning_rate": 9.997092127622435e-06, "loss": 3.8414, "step": 55200 }, { "epoch": 0.054430065493407315, "grad_norm": 2.8893816471099854, "learning_rate": 9.997086848199518e-06, "loss": 3.7944, "step": 55250 }, { "epoch": 0.0544793234712294, "grad_norm": 2.879107713699341, "learning_rate": 9.997081563989784e-06, "loss": 3.792, "step": 55300 }, { "epoch": 0.05452858144905149, "grad_norm": 2.9256958961486816, "learning_rate": 9.997076274993238e-06, "loss": 3.8202, "step": 55350 }, { "epoch": 0.054577839426873574, "grad_norm": 2.9391939640045166, "learning_rate": 9.997070981209882e-06, "loss": 3.8124, "step": 55400 }, { "epoch": 0.054627097404695665, "grad_norm": 3.245795488357544, "learning_rate": 9.997065682639724e-06, "loss": 3.8092, "step": 55450 }, { "epoch": 0.05467635538251775, "grad_norm": 2.8257791996002197, "learning_rate": 9.99706037928277e-06, "loss": 3.8236, "step": 55500 }, { "epoch": 0.05472561336033984, "grad_norm": 2.634209156036377, "learning_rate": 9.99705507113902e-06, "loss": 3.7884, "step": 55550 }, { "epoch": 0.05477487133816193, "grad_norm": 2.9316630363464355, "learning_rate": 9.997049758208487e-06, "loss": 3.8354, "step": 55600 }, { "epoch": 0.054824129315984016, "grad_norm": 2.789832830429077, "learning_rate": 9.99704444049117e-06, "loss": 3.8054, "step": 55650 }, { "epoch": 0.05487338729380611, "grad_norm": 2.7652952671051025, "learning_rate": 9.997039117987074e-06, "loss": 3.8282, "step": 55700 }, { "epoch": 0.05492264527162819, "grad_norm": 3.1302547454833984, "learning_rate": 9.997033790696206e-06, "loss": 3.801, "step": 55750 }, { "epoch": 0.05497190324945028, "grad_norm": 2.7514939308166504, "learning_rate": 9.997028458618574e-06, "loss": 3.765, "step": 55800 }, { "epoch": 0.05502116122727237, "grad_norm": 2.4372856616973877, "learning_rate": 9.997023121754177e-06, "loss": 3.789, "step": 55850 }, { "epoch": 0.05507041920509446, "grad_norm": 2.518200159072876, "learning_rate": 9.997017780103024e-06, "loss": 3.7629, "step": 55900 }, { "epoch": 0.05511967718291654, "grad_norm": 2.8256750106811523, "learning_rate": 9.99701243366512e-06, "loss": 3.8043, "step": 55950 }, { "epoch": 0.05516893516073863, "grad_norm": 3.1529719829559326, "learning_rate": 9.997007082440468e-06, "loss": 3.8362, "step": 56000 }, { "epoch": 0.055218193138560724, "grad_norm": 2.9714698791503906, "learning_rate": 9.997001726429075e-06, "loss": 3.8361, "step": 56050 }, { "epoch": 0.05526745111638281, "grad_norm": 2.7263238430023193, "learning_rate": 9.996996365630947e-06, "loss": 3.7947, "step": 56100 }, { "epoch": 0.0553167090942049, "grad_norm": 2.9313087463378906, "learning_rate": 9.996991000046085e-06, "loss": 3.7969, "step": 56150 }, { "epoch": 0.055365967072026984, "grad_norm": 3.372980833053589, "learning_rate": 9.996985629674501e-06, "loss": 3.8201, "step": 56200 }, { "epoch": 0.055415225049849075, "grad_norm": 2.9852516651153564, "learning_rate": 9.996980254516194e-06, "loss": 3.8138, "step": 56250 }, { "epoch": 0.05546448302767116, "grad_norm": 2.5766215324401855, "learning_rate": 9.996974874571169e-06, "loss": 3.7994, "step": 56300 }, { "epoch": 0.05551374100549325, "grad_norm": 2.7387025356292725, "learning_rate": 9.996969489839436e-06, "loss": 3.7767, "step": 56350 }, { "epoch": 0.05556299898331534, "grad_norm": 2.615220785140991, "learning_rate": 9.996964100320996e-06, "loss": 3.8271, "step": 56400 }, { "epoch": 0.055612256961137425, "grad_norm": 2.8956141471862793, "learning_rate": 9.996958706015857e-06, "loss": 3.8913, "step": 56450 }, { "epoch": 0.055661514938959517, "grad_norm": 2.5934765338897705, "learning_rate": 9.996953306924022e-06, "loss": 3.8133, "step": 56500 }, { "epoch": 0.0557107729167816, "grad_norm": 2.5480055809020996, "learning_rate": 9.996947903045497e-06, "loss": 3.76, "step": 56550 }, { "epoch": 0.05576003089460369, "grad_norm": 2.7125661373138428, "learning_rate": 9.996942494380289e-06, "loss": 3.8328, "step": 56600 }, { "epoch": 0.055809288872425776, "grad_norm": 2.738781452178955, "learning_rate": 9.9969370809284e-06, "loss": 3.8444, "step": 56650 }, { "epoch": 0.05585854685024787, "grad_norm": 2.7964601516723633, "learning_rate": 9.996931662689835e-06, "loss": 3.8459, "step": 56700 }, { "epoch": 0.05590780482806995, "grad_norm": 2.6525187492370605, "learning_rate": 9.996926239664603e-06, "loss": 3.8073, "step": 56750 }, { "epoch": 0.05595706280589204, "grad_norm": 2.7038049697875977, "learning_rate": 9.996920811852707e-06, "loss": 3.7618, "step": 56800 }, { "epoch": 0.056006320783714134, "grad_norm": 3.1239099502563477, "learning_rate": 9.99691537925415e-06, "loss": 3.8331, "step": 56850 }, { "epoch": 0.05605557876153622, "grad_norm": 2.738351821899414, "learning_rate": 9.99690994186894e-06, "loss": 3.8287, "step": 56900 }, { "epoch": 0.05610483673935831, "grad_norm": 2.715238094329834, "learning_rate": 9.996904499697084e-06, "loss": 3.7652, "step": 56950 }, { "epoch": 0.05615409471718039, "grad_norm": 2.8009490966796875, "learning_rate": 9.996899052738582e-06, "loss": 3.8043, "step": 57000 }, { "epoch": 0.056203352695002484, "grad_norm": 2.6868886947631836, "learning_rate": 9.996893600993442e-06, "loss": 3.7626, "step": 57050 }, { "epoch": 0.05625261067282457, "grad_norm": 2.589235305786133, "learning_rate": 9.996888144461672e-06, "loss": 3.7993, "step": 57100 }, { "epoch": 0.05630186865064666, "grad_norm": 3.101914882659912, "learning_rate": 9.996882683143271e-06, "loss": 3.7456, "step": 57150 }, { "epoch": 0.056351126628468744, "grad_norm": 2.593919515609741, "learning_rate": 9.99687721703825e-06, "loss": 3.8419, "step": 57200 }, { "epoch": 0.056400384606290835, "grad_norm": 2.862473487854004, "learning_rate": 9.996871746146612e-06, "loss": 3.8377, "step": 57250 }, { "epoch": 0.056449642584112926, "grad_norm": 2.76829195022583, "learning_rate": 9.996866270468362e-06, "loss": 3.7794, "step": 57300 }, { "epoch": 0.05649890056193501, "grad_norm": 2.933870792388916, "learning_rate": 9.996860790003504e-06, "loss": 3.7912, "step": 57350 }, { "epoch": 0.0565481585397571, "grad_norm": 2.7253201007843018, "learning_rate": 9.996855304752045e-06, "loss": 3.8249, "step": 57400 }, { "epoch": 0.056597416517579185, "grad_norm": 2.575054883956909, "learning_rate": 9.996849814713991e-06, "loss": 3.7387, "step": 57450 }, { "epoch": 0.05664667449540128, "grad_norm": 2.791195869445801, "learning_rate": 9.996844319889346e-06, "loss": 3.7993, "step": 57500 }, { "epoch": 0.05669593247322336, "grad_norm": 2.7499754428863525, "learning_rate": 9.996838820278113e-06, "loss": 3.7296, "step": 57550 }, { "epoch": 0.05674519045104545, "grad_norm": 2.7251698970794678, "learning_rate": 9.996833315880303e-06, "loss": 3.8121, "step": 57600 }, { "epoch": 0.056794448428867536, "grad_norm": 3.0628533363342285, "learning_rate": 9.996827806695916e-06, "loss": 3.8075, "step": 57650 }, { "epoch": 0.05684370640668963, "grad_norm": 3.40836238861084, "learning_rate": 9.996822292724962e-06, "loss": 3.7851, "step": 57700 }, { "epoch": 0.05689296438451172, "grad_norm": 2.9646127223968506, "learning_rate": 9.99681677396744e-06, "loss": 3.7653, "step": 57750 }, { "epoch": 0.0569422223623338, "grad_norm": 2.5266225337982178, "learning_rate": 9.996811250423362e-06, "loss": 3.7511, "step": 57800 }, { "epoch": 0.056991480340155894, "grad_norm": 2.5867815017700195, "learning_rate": 9.996805722092727e-06, "loss": 3.8135, "step": 57850 }, { "epoch": 0.05704073831797798, "grad_norm": 2.687371253967285, "learning_rate": 9.996800188975545e-06, "loss": 3.7901, "step": 57900 }, { "epoch": 0.05708999629580007, "grad_norm": 2.585845947265625, "learning_rate": 9.99679465107182e-06, "loss": 3.7911, "step": 57950 }, { "epoch": 0.05713925427362215, "grad_norm": 2.6444971561431885, "learning_rate": 9.996789108381556e-06, "loss": 3.7977, "step": 58000 }, { "epoch": 0.057188512251444244, "grad_norm": 3.2465310096740723, "learning_rate": 9.99678356090476e-06, "loss": 3.7994, "step": 58050 }, { "epoch": 0.057237770229266335, "grad_norm": 2.729257822036743, "learning_rate": 9.996778008641437e-06, "loss": 3.767, "step": 58100 }, { "epoch": 0.05728702820708842, "grad_norm": 2.985215425491333, "learning_rate": 9.99677245159159e-06, "loss": 3.7526, "step": 58150 }, { "epoch": 0.05733628618491051, "grad_norm": 2.9066357612609863, "learning_rate": 9.996766889755228e-06, "loss": 3.7897, "step": 58200 }, { "epoch": 0.057385544162732595, "grad_norm": 2.7879366874694824, "learning_rate": 9.996761323132355e-06, "loss": 3.7648, "step": 58250 }, { "epoch": 0.057434802140554686, "grad_norm": 2.8415310382843018, "learning_rate": 9.996755751722975e-06, "loss": 3.8156, "step": 58300 }, { "epoch": 0.05748406011837677, "grad_norm": 2.735386371612549, "learning_rate": 9.996750175527094e-06, "loss": 3.7303, "step": 58350 }, { "epoch": 0.05753331809619886, "grad_norm": 2.484065055847168, "learning_rate": 9.996744594544719e-06, "loss": 3.7211, "step": 58400 }, { "epoch": 0.057582576074020946, "grad_norm": 2.826388120651245, "learning_rate": 9.996739008775851e-06, "loss": 3.7656, "step": 58450 }, { "epoch": 0.05763183405184304, "grad_norm": 2.840162754058838, "learning_rate": 9.9967334182205e-06, "loss": 3.7816, "step": 58500 }, { "epoch": 0.05768109202966513, "grad_norm": 3.03169322013855, "learning_rate": 9.996727822878672e-06, "loss": 3.7509, "step": 58550 }, { "epoch": 0.05773035000748721, "grad_norm": 2.7932119369506836, "learning_rate": 9.996722222750369e-06, "loss": 3.7482, "step": 58600 }, { "epoch": 0.0577796079853093, "grad_norm": 2.6114325523376465, "learning_rate": 9.996716617835595e-06, "loss": 3.826, "step": 58650 }, { "epoch": 0.05782886596313139, "grad_norm": 2.7125561237335205, "learning_rate": 9.99671100813436e-06, "loss": 3.7955, "step": 58700 }, { "epoch": 0.05787812394095348, "grad_norm": 2.8667004108428955, "learning_rate": 9.996705393646666e-06, "loss": 3.6536, "step": 58750 }, { "epoch": 0.05792738191877556, "grad_norm": 2.8250913619995117, "learning_rate": 9.996699774372519e-06, "loss": 3.8288, "step": 58800 }, { "epoch": 0.057976639896597654, "grad_norm": 2.67215633392334, "learning_rate": 9.996694150311925e-06, "loss": 3.7684, "step": 58850 }, { "epoch": 0.05802589787441974, "grad_norm": 2.6591062545776367, "learning_rate": 9.99668852146489e-06, "loss": 3.8017, "step": 58900 }, { "epoch": 0.05807515585224183, "grad_norm": 2.645397424697876, "learning_rate": 9.996682887831419e-06, "loss": 3.8416, "step": 58950 }, { "epoch": 0.05812441383006392, "grad_norm": 3.108062982559204, "learning_rate": 9.996677249411517e-06, "loss": 3.7391, "step": 59000 }, { "epoch": 0.058173671807886004, "grad_norm": 3.0808019638061523, "learning_rate": 9.99667160620519e-06, "loss": 3.7458, "step": 59050 }, { "epoch": 0.058222929785708095, "grad_norm": 2.9304213523864746, "learning_rate": 9.996665958212442e-06, "loss": 3.7824, "step": 59100 }, { "epoch": 0.05827218776353018, "grad_norm": 2.5487077236175537, "learning_rate": 9.996660305433278e-06, "loss": 3.8018, "step": 59150 }, { "epoch": 0.05832144574135227, "grad_norm": 2.507504940032959, "learning_rate": 9.996654647867708e-06, "loss": 3.7701, "step": 59200 }, { "epoch": 0.058370703719174355, "grad_norm": 2.793212413787842, "learning_rate": 9.99664898551573e-06, "loss": 3.7715, "step": 59250 }, { "epoch": 0.058419961696996446, "grad_norm": 2.6383514404296875, "learning_rate": 9.996643318377357e-06, "loss": 3.829, "step": 59300 }, { "epoch": 0.05846921967481853, "grad_norm": 3.466677665710449, "learning_rate": 9.996637646452589e-06, "loss": 3.7341, "step": 59350 }, { "epoch": 0.05851847765264062, "grad_norm": 2.646310567855835, "learning_rate": 9.996631969741435e-06, "loss": 3.7494, "step": 59400 }, { "epoch": 0.05856773563046271, "grad_norm": 2.6264052391052246, "learning_rate": 9.996626288243897e-06, "loss": 3.7765, "step": 59450 }, { "epoch": 0.0586169936082848, "grad_norm": 2.5519556999206543, "learning_rate": 9.996620601959983e-06, "loss": 3.8204, "step": 59500 }, { "epoch": 0.05866625158610689, "grad_norm": 2.620962619781494, "learning_rate": 9.996614910889697e-06, "loss": 3.8232, "step": 59550 }, { "epoch": 0.05871550956392897, "grad_norm": 2.7627553939819336, "learning_rate": 9.996609215033047e-06, "loss": 3.79, "step": 59600 }, { "epoch": 0.05876476754175106, "grad_norm": 2.370274066925049, "learning_rate": 9.996603514390036e-06, "loss": 3.7167, "step": 59650 }, { "epoch": 0.05881402551957315, "grad_norm": 2.7622532844543457, "learning_rate": 9.99659780896067e-06, "loss": 3.7203, "step": 59700 }, { "epoch": 0.05886328349739524, "grad_norm": 2.633330821990967, "learning_rate": 9.996592098744955e-06, "loss": 3.7304, "step": 59750 }, { "epoch": 0.05891254147521733, "grad_norm": 3.5819284915924072, "learning_rate": 9.996586383742895e-06, "loss": 3.7869, "step": 59800 }, { "epoch": 0.058961799453039414, "grad_norm": 3.0921247005462646, "learning_rate": 9.996580663954496e-06, "loss": 3.7221, "step": 59850 }, { "epoch": 0.059011057430861505, "grad_norm": 2.8721766471862793, "learning_rate": 9.996574939379766e-06, "loss": 3.7516, "step": 59900 }, { "epoch": 0.05906031540868359, "grad_norm": 2.776221990585327, "learning_rate": 9.996569210018708e-06, "loss": 3.7547, "step": 59950 }, { "epoch": 0.05910957338650568, "grad_norm": 2.741987466812134, "learning_rate": 9.996563475871327e-06, "loss": 3.7508, "step": 60000 }, { "epoch": 0.059158831364327764, "grad_norm": 2.4582152366638184, "learning_rate": 9.996557736937628e-06, "loss": 3.7539, "step": 60050 }, { "epoch": 0.059208089342149856, "grad_norm": 2.9588818550109863, "learning_rate": 9.99655199321762e-06, "loss": 3.8448, "step": 60100 }, { "epoch": 0.05925734731997194, "grad_norm": 2.7777903079986572, "learning_rate": 9.996546244711307e-06, "loss": 3.7646, "step": 60150 }, { "epoch": 0.05930660529779403, "grad_norm": 2.8564724922180176, "learning_rate": 9.996540491418693e-06, "loss": 3.7748, "step": 60200 }, { "epoch": 0.05935586327561612, "grad_norm": 2.779534101486206, "learning_rate": 9.996534733339785e-06, "loss": 3.79, "step": 60250 }, { "epoch": 0.059405121253438206, "grad_norm": 2.674382448196411, "learning_rate": 9.996528970474589e-06, "loss": 3.7342, "step": 60300 }, { "epoch": 0.0594543792312603, "grad_norm": 2.8330423831939697, "learning_rate": 9.996523202823108e-06, "loss": 3.7874, "step": 60350 }, { "epoch": 0.05950363720908238, "grad_norm": 2.768350601196289, "learning_rate": 9.996517430385348e-06, "loss": 3.7221, "step": 60400 }, { "epoch": 0.05955289518690447, "grad_norm": 2.5900590419769287, "learning_rate": 9.996511653161315e-06, "loss": 3.7502, "step": 60450 }, { "epoch": 0.05960215316472656, "grad_norm": 2.826695680618286, "learning_rate": 9.996505871151018e-06, "loss": 3.6925, "step": 60500 }, { "epoch": 0.05965141114254865, "grad_norm": 2.7383453845977783, "learning_rate": 9.99650008435446e-06, "loss": 3.7626, "step": 60550 }, { "epoch": 0.05970066912037073, "grad_norm": 3.161367177963257, "learning_rate": 9.996494292771643e-06, "loss": 3.7084, "step": 60600 }, { "epoch": 0.05974992709819282, "grad_norm": 3.196807384490967, "learning_rate": 9.996488496402578e-06, "loss": 3.7395, "step": 60650 }, { "epoch": 0.059799185076014914, "grad_norm": 2.693335771560669, "learning_rate": 9.996482695247268e-06, "loss": 3.7552, "step": 60700 }, { "epoch": 0.059848443053837, "grad_norm": 2.7385921478271484, "learning_rate": 9.996476889305718e-06, "loss": 3.7376, "step": 60750 }, { "epoch": 0.05989770103165909, "grad_norm": 2.898216962814331, "learning_rate": 9.996471078577936e-06, "loss": 3.7204, "step": 60800 }, { "epoch": 0.059946959009481174, "grad_norm": 3.002398729324341, "learning_rate": 9.996465263063923e-06, "loss": 3.7399, "step": 60850 }, { "epoch": 0.059996216987303265, "grad_norm": 2.7590394020080566, "learning_rate": 9.99645944276369e-06, "loss": 3.7711, "step": 60900 }, { "epoch": 0.06004547496512535, "grad_norm": 2.7998249530792236, "learning_rate": 9.99645361767724e-06, "loss": 3.7286, "step": 60950 }, { "epoch": 0.06009473294294744, "grad_norm": 2.8708784580230713, "learning_rate": 9.996447787804577e-06, "loss": 3.751, "step": 61000 }, { "epoch": 0.060143990920769524, "grad_norm": 2.7529208660125732, "learning_rate": 9.99644195314571e-06, "loss": 3.6778, "step": 61050 }, { "epoch": 0.060193248898591616, "grad_norm": 2.7081685066223145, "learning_rate": 9.996436113700643e-06, "loss": 3.7249, "step": 61100 }, { "epoch": 0.06024250687641371, "grad_norm": 3.028101921081543, "learning_rate": 9.99643026946938e-06, "loss": 3.6785, "step": 61150 }, { "epoch": 0.06029176485423579, "grad_norm": 2.640751838684082, "learning_rate": 9.996424420451929e-06, "loss": 3.7471, "step": 61200 }, { "epoch": 0.06034102283205788, "grad_norm": 2.8480353355407715, "learning_rate": 9.996418566648295e-06, "loss": 3.711, "step": 61250 }, { "epoch": 0.060390280809879966, "grad_norm": 2.6508543491363525, "learning_rate": 9.996412708058482e-06, "loss": 3.6734, "step": 61300 }, { "epoch": 0.06043953878770206, "grad_norm": 2.7539050579071045, "learning_rate": 9.996406844682498e-06, "loss": 3.7094, "step": 61350 }, { "epoch": 0.06048879676552414, "grad_norm": 2.7134745121002197, "learning_rate": 9.996400976520347e-06, "loss": 3.713, "step": 61400 }, { "epoch": 0.06053805474334623, "grad_norm": 2.97367262840271, "learning_rate": 9.996395103572035e-06, "loss": 3.7753, "step": 61450 }, { "epoch": 0.060587312721168324, "grad_norm": 2.785409688949585, "learning_rate": 9.996389225837569e-06, "loss": 3.7404, "step": 61500 }, { "epoch": 0.06063657069899041, "grad_norm": 2.693065881729126, "learning_rate": 9.996383343316951e-06, "loss": 3.7307, "step": 61550 }, { "epoch": 0.0606858286768125, "grad_norm": 2.6083180904388428, "learning_rate": 9.99637745601019e-06, "loss": 3.7253, "step": 61600 }, { "epoch": 0.06073508665463458, "grad_norm": 2.5235724449157715, "learning_rate": 9.996371563917293e-06, "loss": 3.7284, "step": 61650 }, { "epoch": 0.060784344632456674, "grad_norm": 2.820072889328003, "learning_rate": 9.99636566703826e-06, "loss": 3.7328, "step": 61700 }, { "epoch": 0.06083360261027876, "grad_norm": 2.5571258068084717, "learning_rate": 9.996359765373102e-06, "loss": 3.6691, "step": 61750 }, { "epoch": 0.06088286058810085, "grad_norm": 2.911647081375122, "learning_rate": 9.996353858921822e-06, "loss": 3.6961, "step": 61800 }, { "epoch": 0.060932118565922934, "grad_norm": 2.669754981994629, "learning_rate": 9.996347947684425e-06, "loss": 3.6748, "step": 61850 }, { "epoch": 0.060981376543745025, "grad_norm": 2.7714967727661133, "learning_rate": 9.996342031660919e-06, "loss": 3.7938, "step": 61900 }, { "epoch": 0.061030634521567116, "grad_norm": 2.675990104675293, "learning_rate": 9.99633611085131e-06, "loss": 3.7755, "step": 61950 }, { "epoch": 0.0610798924993892, "grad_norm": 2.9145185947418213, "learning_rate": 9.9963301852556e-06, "loss": 3.7465, "step": 62000 }, { "epoch": 0.06112915047721129, "grad_norm": 2.7616021633148193, "learning_rate": 9.996324254873798e-06, "loss": 3.7126, "step": 62050 }, { "epoch": 0.061178408455033376, "grad_norm": 2.7643511295318604, "learning_rate": 9.996318319705908e-06, "loss": 3.7685, "step": 62100 }, { "epoch": 0.06122766643285547, "grad_norm": 3.007451295852661, "learning_rate": 9.996312379751937e-06, "loss": 3.7003, "step": 62150 }, { "epoch": 0.06127692441067755, "grad_norm": 2.5740201473236084, "learning_rate": 9.996306435011889e-06, "loss": 3.7065, "step": 62200 }, { "epoch": 0.06132618238849964, "grad_norm": 2.876936912536621, "learning_rate": 9.996300485485773e-06, "loss": 3.7407, "step": 62250 }, { "epoch": 0.061375440366321726, "grad_norm": 2.756286859512329, "learning_rate": 9.996294531173588e-06, "loss": 3.696, "step": 62300 }, { "epoch": 0.06142469834414382, "grad_norm": 2.903721570968628, "learning_rate": 9.996288572075348e-06, "loss": 3.678, "step": 62350 }, { "epoch": 0.06147395632196591, "grad_norm": 2.936685085296631, "learning_rate": 9.996282608191054e-06, "loss": 3.7227, "step": 62400 }, { "epoch": 0.06152321429978799, "grad_norm": 2.588590145111084, "learning_rate": 9.996276639520713e-06, "loss": 3.7181, "step": 62450 }, { "epoch": 0.061572472277610084, "grad_norm": 2.789505958557129, "learning_rate": 9.996270666064328e-06, "loss": 3.6941, "step": 62500 }, { "epoch": 0.06162173025543217, "grad_norm": 2.6898155212402344, "learning_rate": 9.996264687821908e-06, "loss": 3.7163, "step": 62550 }, { "epoch": 0.06167098823325426, "grad_norm": 2.727890968322754, "learning_rate": 9.996258704793459e-06, "loss": 3.6795, "step": 62600 }, { "epoch": 0.06172024621107634, "grad_norm": 2.646885633468628, "learning_rate": 9.996252716978983e-06, "loss": 3.6687, "step": 62650 }, { "epoch": 0.061769504188898434, "grad_norm": 2.4528701305389404, "learning_rate": 9.99624672437849e-06, "loss": 3.7218, "step": 62700 }, { "epoch": 0.06181876216672052, "grad_norm": 2.754321813583374, "learning_rate": 9.996240726991984e-06, "loss": 3.7014, "step": 62750 }, { "epoch": 0.06186802014454261, "grad_norm": 2.4906885623931885, "learning_rate": 9.99623472481947e-06, "loss": 3.7453, "step": 62800 }, { "epoch": 0.0619172781223647, "grad_norm": 2.6868820190429688, "learning_rate": 9.996228717860954e-06, "loss": 3.7176, "step": 62850 }, { "epoch": 0.061966536100186785, "grad_norm": 2.894132614135742, "learning_rate": 9.996222706116443e-06, "loss": 3.732, "step": 62900 }, { "epoch": 0.062015794078008876, "grad_norm": 2.8179545402526855, "learning_rate": 9.996216689585941e-06, "loss": 3.7555, "step": 62950 }, { "epoch": 0.06206505205583096, "grad_norm": 3.151822805404663, "learning_rate": 9.996210668269454e-06, "loss": 3.6918, "step": 63000 }, { "epoch": 0.06211431003365305, "grad_norm": 2.6101737022399902, "learning_rate": 9.99620464216699e-06, "loss": 3.6697, "step": 63050 }, { "epoch": 0.062163568011475136, "grad_norm": 2.518585443496704, "learning_rate": 9.996198611278552e-06, "loss": 3.7089, "step": 63100 }, { "epoch": 0.06221282598929723, "grad_norm": 2.7241904735565186, "learning_rate": 9.996192575604147e-06, "loss": 3.6672, "step": 63150 }, { "epoch": 0.06226208396711932, "grad_norm": 3.6932260990142822, "learning_rate": 9.996186535143782e-06, "loss": 3.7307, "step": 63200 }, { "epoch": 0.0623113419449414, "grad_norm": 2.692938804626465, "learning_rate": 9.996180489897461e-06, "loss": 3.7466, "step": 63250 }, { "epoch": 0.06236059992276349, "grad_norm": 2.5920846462249756, "learning_rate": 9.99617443986519e-06, "loss": 3.7592, "step": 63300 }, { "epoch": 0.06240985790058558, "grad_norm": 3.367356777191162, "learning_rate": 9.996168385046977e-06, "loss": 3.7785, "step": 63350 }, { "epoch": 0.06245911587840767, "grad_norm": 2.6570591926574707, "learning_rate": 9.996162325442823e-06, "loss": 3.7467, "step": 63400 }, { "epoch": 0.06250837385622976, "grad_norm": 2.741964101791382, "learning_rate": 9.996156261052738e-06, "loss": 3.7198, "step": 63450 }, { "epoch": 0.06255763183405184, "grad_norm": 2.8563601970672607, "learning_rate": 9.996150191876726e-06, "loss": 3.6756, "step": 63500 }, { "epoch": 0.06260688981187393, "grad_norm": 2.790907621383667, "learning_rate": 9.996144117914794e-06, "loss": 3.7218, "step": 63550 }, { "epoch": 0.06265614778969601, "grad_norm": 2.941011428833008, "learning_rate": 9.996138039166947e-06, "loss": 3.7259, "step": 63600 }, { "epoch": 0.06270540576751811, "grad_norm": 2.8726089000701904, "learning_rate": 9.99613195563319e-06, "loss": 3.6857, "step": 63650 }, { "epoch": 0.0627546637453402, "grad_norm": 2.582637310028076, "learning_rate": 9.99612586731353e-06, "loss": 3.7596, "step": 63700 }, { "epoch": 0.06280392172316228, "grad_norm": 2.6552093029022217, "learning_rate": 9.996119774207974e-06, "loss": 3.6819, "step": 63750 }, { "epoch": 0.06285317970098438, "grad_norm": 2.7356131076812744, "learning_rate": 9.996113676316526e-06, "loss": 3.7675, "step": 63800 }, { "epoch": 0.06290243767880646, "grad_norm": 2.638216257095337, "learning_rate": 9.996107573639194e-06, "loss": 3.7673, "step": 63850 }, { "epoch": 0.06295169565662855, "grad_norm": 2.760004997253418, "learning_rate": 9.99610146617598e-06, "loss": 3.6771, "step": 63900 }, { "epoch": 0.06300095363445063, "grad_norm": 4.588542938232422, "learning_rate": 9.996095353926892e-06, "loss": 3.7097, "step": 63950 }, { "epoch": 0.06305021161227273, "grad_norm": 2.6513986587524414, "learning_rate": 9.996089236891938e-06, "loss": 3.7472, "step": 64000 }, { "epoch": 0.06309946959009481, "grad_norm": 2.725025177001953, "learning_rate": 9.996083115071117e-06, "loss": 3.7337, "step": 64050 }, { "epoch": 0.0631487275679169, "grad_norm": 2.6722593307495117, "learning_rate": 9.996076988464444e-06, "loss": 3.732, "step": 64100 }, { "epoch": 0.063197985545739, "grad_norm": 2.9061412811279297, "learning_rate": 9.99607085707192e-06, "loss": 3.696, "step": 64150 }, { "epoch": 0.06324724352356108, "grad_norm": 2.9670908451080322, "learning_rate": 9.99606472089355e-06, "loss": 3.7158, "step": 64200 }, { "epoch": 0.06329650150138316, "grad_norm": 3.5179617404937744, "learning_rate": 9.996058579929341e-06, "loss": 3.6542, "step": 64250 }, { "epoch": 0.06334575947920525, "grad_norm": 2.726682186126709, "learning_rate": 9.9960524341793e-06, "loss": 3.7415, "step": 64300 }, { "epoch": 0.06339501745702734, "grad_norm": 2.5933315753936768, "learning_rate": 9.996046283643432e-06, "loss": 3.6843, "step": 64350 }, { "epoch": 0.06344427543484943, "grad_norm": 2.6134650707244873, "learning_rate": 9.996040128321742e-06, "loss": 3.7027, "step": 64400 }, { "epoch": 0.06349353341267151, "grad_norm": 2.868830442428589, "learning_rate": 9.996033968214238e-06, "loss": 3.7166, "step": 64450 }, { "epoch": 0.06354279139049361, "grad_norm": 3.2262396812438965, "learning_rate": 9.996027803320925e-06, "loss": 3.7137, "step": 64500 }, { "epoch": 0.0635920493683157, "grad_norm": 2.581850528717041, "learning_rate": 9.996021633641807e-06, "loss": 3.7132, "step": 64550 }, { "epoch": 0.06364130734613778, "grad_norm": 2.637319564819336, "learning_rate": 9.996015459176893e-06, "loss": 3.6475, "step": 64600 }, { "epoch": 0.06369056532395986, "grad_norm": 2.994279384613037, "learning_rate": 9.996009279926185e-06, "loss": 3.7911, "step": 64650 }, { "epoch": 0.06373982330178196, "grad_norm": 3.0654690265655518, "learning_rate": 9.996003095889693e-06, "loss": 3.6912, "step": 64700 }, { "epoch": 0.06378908127960405, "grad_norm": 2.7070562839508057, "learning_rate": 9.995996907067422e-06, "loss": 3.7103, "step": 64750 }, { "epoch": 0.06383833925742613, "grad_norm": 2.4365599155426025, "learning_rate": 9.995990713459377e-06, "loss": 3.7167, "step": 64800 }, { "epoch": 0.06388759723524821, "grad_norm": 3.452657461166382, "learning_rate": 9.995984515065562e-06, "loss": 3.6756, "step": 64850 }, { "epoch": 0.06393685521307031, "grad_norm": 2.942868709564209, "learning_rate": 9.995978311885988e-06, "loss": 3.6684, "step": 64900 }, { "epoch": 0.0639861131908924, "grad_norm": 2.9266514778137207, "learning_rate": 9.995972103920655e-06, "loss": 3.7284, "step": 64950 }, { "epoch": 0.06403537116871448, "grad_norm": 2.5801539421081543, "learning_rate": 9.995965891169574e-06, "loss": 3.7141, "step": 65000 }, { "epoch": 0.06408462914653658, "grad_norm": 2.8556201457977295, "learning_rate": 9.99595967363275e-06, "loss": 3.6621, "step": 65050 }, { "epoch": 0.06413388712435866, "grad_norm": 2.5468080043792725, "learning_rate": 9.995953451310185e-06, "loss": 3.6969, "step": 65100 }, { "epoch": 0.06418314510218075, "grad_norm": 2.6413984298706055, "learning_rate": 9.99594722420189e-06, "loss": 3.7128, "step": 65150 }, { "epoch": 0.06423240308000283, "grad_norm": 2.5951850414276123, "learning_rate": 9.995940992307866e-06, "loss": 3.7228, "step": 65200 }, { "epoch": 0.06428166105782493, "grad_norm": 2.5401663780212402, "learning_rate": 9.995934755628124e-06, "loss": 3.7299, "step": 65250 }, { "epoch": 0.06433091903564701, "grad_norm": 2.6954691410064697, "learning_rate": 9.995928514162667e-06, "loss": 3.7235, "step": 65300 }, { "epoch": 0.0643801770134691, "grad_norm": 2.728203773498535, "learning_rate": 9.995922267911502e-06, "loss": 3.7012, "step": 65350 }, { "epoch": 0.0644294349912912, "grad_norm": 2.9748966693878174, "learning_rate": 9.995916016874635e-06, "loss": 3.7108, "step": 65400 }, { "epoch": 0.06447869296911328, "grad_norm": 2.5941824913024902, "learning_rate": 9.995909761052071e-06, "loss": 3.6936, "step": 65450 }, { "epoch": 0.06452795094693536, "grad_norm": 2.7038962841033936, "learning_rate": 9.995903500443818e-06, "loss": 3.7402, "step": 65500 }, { "epoch": 0.06457720892475745, "grad_norm": 2.6934900283813477, "learning_rate": 9.995897235049878e-06, "loss": 3.6949, "step": 65550 }, { "epoch": 0.06462646690257955, "grad_norm": 2.6275341510772705, "learning_rate": 9.995890964870263e-06, "loss": 3.6344, "step": 65600 }, { "epoch": 0.06467572488040163, "grad_norm": 3.036489248275757, "learning_rate": 9.995884689904973e-06, "loss": 3.7363, "step": 65650 }, { "epoch": 0.06472498285822371, "grad_norm": 2.6166820526123047, "learning_rate": 9.995878410154019e-06, "loss": 3.6889, "step": 65700 }, { "epoch": 0.06477424083604581, "grad_norm": 2.964157819747925, "learning_rate": 9.995872125617402e-06, "loss": 3.6786, "step": 65750 }, { "epoch": 0.0648234988138679, "grad_norm": 2.7862625122070312, "learning_rate": 9.995865836295132e-06, "loss": 3.681, "step": 65800 }, { "epoch": 0.06487275679168998, "grad_norm": 2.5488617420196533, "learning_rate": 9.995859542187214e-06, "loss": 3.7056, "step": 65850 }, { "epoch": 0.06492201476951207, "grad_norm": 2.583716630935669, "learning_rate": 9.995853243293655e-06, "loss": 3.7904, "step": 65900 }, { "epoch": 0.06497127274733416, "grad_norm": 2.800046920776367, "learning_rate": 9.995846939614457e-06, "loss": 3.7301, "step": 65950 }, { "epoch": 0.06502053072515625, "grad_norm": 2.9489681720733643, "learning_rate": 9.99584063114963e-06, "loss": 3.6694, "step": 66000 }, { "epoch": 0.06506978870297833, "grad_norm": 2.8252675533294678, "learning_rate": 9.99583431789918e-06, "loss": 3.6542, "step": 66050 }, { "epoch": 0.06511904668080042, "grad_norm": 2.988348960876465, "learning_rate": 9.995827999863111e-06, "loss": 3.6568, "step": 66100 }, { "epoch": 0.06516830465862251, "grad_norm": 3.0104172229766846, "learning_rate": 9.99582167704143e-06, "loss": 3.6298, "step": 66150 }, { "epoch": 0.0652175626364446, "grad_norm": 2.784242630004883, "learning_rate": 9.995815349434143e-06, "loss": 3.6332, "step": 66200 }, { "epoch": 0.06526682061426668, "grad_norm": 2.846703052520752, "learning_rate": 9.995809017041256e-06, "loss": 3.6847, "step": 66250 }, { "epoch": 0.06531607859208878, "grad_norm": 2.8544907569885254, "learning_rate": 9.995802679862774e-06, "loss": 3.6677, "step": 66300 }, { "epoch": 0.06536533656991086, "grad_norm": 2.620543956756592, "learning_rate": 9.995796337898706e-06, "loss": 3.7168, "step": 66350 }, { "epoch": 0.06541459454773295, "grad_norm": 2.8716800212860107, "learning_rate": 9.995789991149056e-06, "loss": 3.6452, "step": 66400 }, { "epoch": 0.06546385252555503, "grad_norm": 2.7985305786132812, "learning_rate": 9.99578363961383e-06, "loss": 3.6687, "step": 66450 }, { "epoch": 0.06551311050337713, "grad_norm": 2.7281441688537598, "learning_rate": 9.995777283293036e-06, "loss": 3.6909, "step": 66500 }, { "epoch": 0.06556236848119922, "grad_norm": 2.6076512336730957, "learning_rate": 9.995770922186675e-06, "loss": 3.6988, "step": 66550 }, { "epoch": 0.0656116264590213, "grad_norm": 2.85115909576416, "learning_rate": 9.99576455629476e-06, "loss": 3.7141, "step": 66600 }, { "epoch": 0.0656608844368434, "grad_norm": 2.996894598007202, "learning_rate": 9.995758185617293e-06, "loss": 3.6868, "step": 66650 }, { "epoch": 0.06571014241466548, "grad_norm": 2.679750919342041, "learning_rate": 9.99575181015428e-06, "loss": 3.721, "step": 66700 }, { "epoch": 0.06575940039248757, "grad_norm": 2.784324884414673, "learning_rate": 9.99574542990573e-06, "loss": 3.6911, "step": 66750 }, { "epoch": 0.06580865837030965, "grad_norm": 2.7431397438049316, "learning_rate": 9.995739044871645e-06, "loss": 3.6608, "step": 66800 }, { "epoch": 0.06585791634813175, "grad_norm": 2.8210859298706055, "learning_rate": 9.995732655052033e-06, "loss": 3.6894, "step": 66850 }, { "epoch": 0.06590717432595383, "grad_norm": 2.549862861633301, "learning_rate": 9.995726260446901e-06, "loss": 3.6452, "step": 66900 }, { "epoch": 0.06595643230377592, "grad_norm": 2.721755266189575, "learning_rate": 9.995719861056254e-06, "loss": 3.6709, "step": 66950 }, { "epoch": 0.06600569028159801, "grad_norm": 2.8079075813293457, "learning_rate": 9.9957134568801e-06, "loss": 3.6714, "step": 67000 }, { "epoch": 0.0660549482594201, "grad_norm": 2.9801430702209473, "learning_rate": 9.995707047918443e-06, "loss": 3.7196, "step": 67050 }, { "epoch": 0.06610420623724218, "grad_norm": 2.6767866611480713, "learning_rate": 9.99570063417129e-06, "loss": 3.7199, "step": 67100 }, { "epoch": 0.06615346421506427, "grad_norm": 2.5830349922180176, "learning_rate": 9.995694215638647e-06, "loss": 3.7052, "step": 67150 }, { "epoch": 0.06620272219288637, "grad_norm": 2.797785997390747, "learning_rate": 9.995687792320519e-06, "loss": 3.7105, "step": 67200 }, { "epoch": 0.06625198017070845, "grad_norm": 2.8043534755706787, "learning_rate": 9.995681364216913e-06, "loss": 3.6795, "step": 67250 }, { "epoch": 0.06630123814853053, "grad_norm": 2.6308836936950684, "learning_rate": 9.995674931327839e-06, "loss": 3.6974, "step": 67300 }, { "epoch": 0.06635049612635262, "grad_norm": 2.7820475101470947, "learning_rate": 9.995668493653296e-06, "loss": 3.6913, "step": 67350 }, { "epoch": 0.06639975410417472, "grad_norm": 2.9104535579681396, "learning_rate": 9.995662051193295e-06, "loss": 3.6462, "step": 67400 }, { "epoch": 0.0664490120819968, "grad_norm": 2.507720708847046, "learning_rate": 9.99565560394784e-06, "loss": 3.6808, "step": 67450 }, { "epoch": 0.06649827005981888, "grad_norm": 2.683095932006836, "learning_rate": 9.99564915191694e-06, "loss": 3.6738, "step": 67500 }, { "epoch": 0.06654752803764098, "grad_norm": 2.4717819690704346, "learning_rate": 9.995642695100598e-06, "loss": 3.6557, "step": 67550 }, { "epoch": 0.06659678601546307, "grad_norm": 2.506570339202881, "learning_rate": 9.995636233498823e-06, "loss": 3.6892, "step": 67600 }, { "epoch": 0.06664604399328515, "grad_norm": 2.6206767559051514, "learning_rate": 9.995629767111618e-06, "loss": 3.6387, "step": 67650 }, { "epoch": 0.06669530197110723, "grad_norm": 2.8186750411987305, "learning_rate": 9.995623295938991e-06, "loss": 3.6422, "step": 67700 }, { "epoch": 0.06674455994892933, "grad_norm": 2.849534034729004, "learning_rate": 9.99561681998095e-06, "loss": 3.6753, "step": 67750 }, { "epoch": 0.06679381792675142, "grad_norm": 2.7502522468566895, "learning_rate": 9.995610339237495e-06, "loss": 3.6918, "step": 67800 }, { "epoch": 0.0668430759045735, "grad_norm": 2.680155038833618, "learning_rate": 9.99560385370864e-06, "loss": 3.6419, "step": 67850 }, { "epoch": 0.0668923338823956, "grad_norm": 2.67852783203125, "learning_rate": 9.995597363394387e-06, "loss": 3.6259, "step": 67900 }, { "epoch": 0.06694159186021768, "grad_norm": 2.801994562149048, "learning_rate": 9.995590868294742e-06, "loss": 3.637, "step": 67950 }, { "epoch": 0.06699084983803977, "grad_norm": 2.822499990463257, "learning_rate": 9.995584368409714e-06, "loss": 3.7181, "step": 68000 }, { "epoch": 0.06704010781586185, "grad_norm": 2.5790913105010986, "learning_rate": 9.995577863739305e-06, "loss": 3.6916, "step": 68050 }, { "epoch": 0.06708936579368395, "grad_norm": 2.928649663925171, "learning_rate": 9.995571354283526e-06, "loss": 3.7781, "step": 68100 }, { "epoch": 0.06713862377150603, "grad_norm": 2.8456549644470215, "learning_rate": 9.99556484004238e-06, "loss": 3.7117, "step": 68150 }, { "epoch": 0.06718788174932812, "grad_norm": 2.619476795196533, "learning_rate": 9.995558321015873e-06, "loss": 3.6417, "step": 68200 }, { "epoch": 0.0672371397271502, "grad_norm": 2.623859405517578, "learning_rate": 9.995551797204012e-06, "loss": 3.7017, "step": 68250 }, { "epoch": 0.0672863977049723, "grad_norm": 2.7679502964019775, "learning_rate": 9.995545268606805e-06, "loss": 3.6567, "step": 68300 }, { "epoch": 0.06733565568279438, "grad_norm": 2.713867425918579, "learning_rate": 9.995538735224258e-06, "loss": 3.667, "step": 68350 }, { "epoch": 0.06738491366061647, "grad_norm": 2.7669084072113037, "learning_rate": 9.995532197056374e-06, "loss": 3.6816, "step": 68400 }, { "epoch": 0.06743417163843857, "grad_norm": 2.79721999168396, "learning_rate": 9.995525654103161e-06, "loss": 3.6512, "step": 68450 }, { "epoch": 0.06748342961626065, "grad_norm": 2.477349281311035, "learning_rate": 9.995519106364627e-06, "loss": 3.7428, "step": 68500 }, { "epoch": 0.06753268759408274, "grad_norm": 2.895503282546997, "learning_rate": 9.995512553840775e-06, "loss": 3.6677, "step": 68550 }, { "epoch": 0.06758194557190482, "grad_norm": 2.718409299850464, "learning_rate": 9.995505996531616e-06, "loss": 3.6753, "step": 68600 }, { "epoch": 0.06763120354972692, "grad_norm": 2.7651071548461914, "learning_rate": 9.995499434437153e-06, "loss": 3.7204, "step": 68650 }, { "epoch": 0.067680461527549, "grad_norm": 2.6033096313476562, "learning_rate": 9.99549286755739e-06, "loss": 3.6773, "step": 68700 }, { "epoch": 0.06772971950537109, "grad_norm": 2.903674840927124, "learning_rate": 9.995486295892338e-06, "loss": 3.6904, "step": 68750 }, { "epoch": 0.06777897748319318, "grad_norm": 2.7972679138183594, "learning_rate": 9.995479719442001e-06, "loss": 3.6332, "step": 68800 }, { "epoch": 0.06782823546101527, "grad_norm": 2.6775546073913574, "learning_rate": 9.995473138206386e-06, "loss": 3.6784, "step": 68850 }, { "epoch": 0.06787749343883735, "grad_norm": 3.021813154220581, "learning_rate": 9.9954665521855e-06, "loss": 3.6889, "step": 68900 }, { "epoch": 0.06792675141665944, "grad_norm": 2.641707420349121, "learning_rate": 9.995459961379346e-06, "loss": 3.6664, "step": 68950 }, { "epoch": 0.06797600939448153, "grad_norm": 2.8372721672058105, "learning_rate": 9.995453365787933e-06, "loss": 3.7011, "step": 69000 }, { "epoch": 0.06802526737230362, "grad_norm": 3.4274492263793945, "learning_rate": 9.995446765411267e-06, "loss": 3.6451, "step": 69050 }, { "epoch": 0.0680745253501257, "grad_norm": 2.832369089126587, "learning_rate": 9.995440160249354e-06, "loss": 3.6733, "step": 69100 }, { "epoch": 0.0681237833279478, "grad_norm": 2.603482246398926, "learning_rate": 9.995433550302201e-06, "loss": 3.6992, "step": 69150 }, { "epoch": 0.06817304130576989, "grad_norm": 2.6224849224090576, "learning_rate": 9.995426935569813e-06, "loss": 3.7153, "step": 69200 }, { "epoch": 0.06822229928359197, "grad_norm": 2.580613136291504, "learning_rate": 9.995420316052198e-06, "loss": 3.7002, "step": 69250 }, { "epoch": 0.06827155726141405, "grad_norm": 2.3886590003967285, "learning_rate": 9.995413691749361e-06, "loss": 3.6898, "step": 69300 }, { "epoch": 0.06832081523923615, "grad_norm": 2.630833625793457, "learning_rate": 9.99540706266131e-06, "loss": 3.7163, "step": 69350 }, { "epoch": 0.06837007321705824, "grad_norm": 3.051788091659546, "learning_rate": 9.995400428788048e-06, "loss": 3.701, "step": 69400 }, { "epoch": 0.06841933119488032, "grad_norm": 2.7844479084014893, "learning_rate": 9.995393790129586e-06, "loss": 3.6846, "step": 69450 }, { "epoch": 0.0684685891727024, "grad_norm": 2.484066963195801, "learning_rate": 9.995387146685927e-06, "loss": 3.6316, "step": 69500 }, { "epoch": 0.0685178471505245, "grad_norm": 2.7856152057647705, "learning_rate": 9.99538049845708e-06, "loss": 3.6809, "step": 69550 }, { "epoch": 0.06856710512834659, "grad_norm": 2.687910318374634, "learning_rate": 9.995373845443046e-06, "loss": 3.6079, "step": 69600 }, { "epoch": 0.06861636310616867, "grad_norm": 2.677640199661255, "learning_rate": 9.995367187643836e-06, "loss": 3.687, "step": 69650 }, { "epoch": 0.06866562108399077, "grad_norm": 2.5716683864593506, "learning_rate": 9.995360525059457e-06, "loss": 3.6982, "step": 69700 }, { "epoch": 0.06871487906181285, "grad_norm": 2.392432451248169, "learning_rate": 9.995353857689913e-06, "loss": 3.7175, "step": 69750 }, { "epoch": 0.06876413703963494, "grad_norm": 2.8337388038635254, "learning_rate": 9.99534718553521e-06, "loss": 3.6757, "step": 69800 }, { "epoch": 0.06881339501745702, "grad_norm": 2.436741352081299, "learning_rate": 9.995340508595358e-06, "loss": 3.6196, "step": 69850 }, { "epoch": 0.06886265299527912, "grad_norm": 2.593412160873413, "learning_rate": 9.995333826870359e-06, "loss": 3.6514, "step": 69900 }, { "epoch": 0.0689119109731012, "grad_norm": 2.501225471496582, "learning_rate": 9.995327140360223e-06, "loss": 3.6725, "step": 69950 }, { "epoch": 0.06896116895092329, "grad_norm": 2.9814841747283936, "learning_rate": 9.995320449064953e-06, "loss": 3.6984, "step": 70000 }, { "epoch": 0.06901042692874539, "grad_norm": 2.6169559955596924, "learning_rate": 9.99531375298456e-06, "loss": 3.6585, "step": 70050 }, { "epoch": 0.06905968490656747, "grad_norm": 2.7606382369995117, "learning_rate": 9.995307052119045e-06, "loss": 3.6963, "step": 70100 }, { "epoch": 0.06910894288438955, "grad_norm": 2.7260844707489014, "learning_rate": 9.995300346468418e-06, "loss": 3.664, "step": 70150 }, { "epoch": 0.06915820086221164, "grad_norm": 2.8161635398864746, "learning_rate": 9.995293636032683e-06, "loss": 3.6813, "step": 70200 }, { "epoch": 0.06920745884003374, "grad_norm": 2.6676254272460938, "learning_rate": 9.995286920811849e-06, "loss": 3.6607, "step": 70250 }, { "epoch": 0.06925671681785582, "grad_norm": 2.661923885345459, "learning_rate": 9.99528020080592e-06, "loss": 3.6198, "step": 70300 }, { "epoch": 0.0693059747956779, "grad_norm": 2.5987586975097656, "learning_rate": 9.995273476014907e-06, "loss": 3.7121, "step": 70350 }, { "epoch": 0.0693552327735, "grad_norm": 2.387489080429077, "learning_rate": 9.99526674643881e-06, "loss": 3.6561, "step": 70400 }, { "epoch": 0.06940449075132209, "grad_norm": 2.600698709487915, "learning_rate": 9.99526001207764e-06, "loss": 3.6802, "step": 70450 }, { "epoch": 0.06945374872914417, "grad_norm": 2.67354416847229, "learning_rate": 9.995253272931402e-06, "loss": 3.6887, "step": 70500 }, { "epoch": 0.06950300670696626, "grad_norm": 2.8217203617095947, "learning_rate": 9.995246529000102e-06, "loss": 3.6617, "step": 70550 }, { "epoch": 0.06955226468478835, "grad_norm": 3.004943370819092, "learning_rate": 9.995239780283748e-06, "loss": 3.6442, "step": 70600 }, { "epoch": 0.06960152266261044, "grad_norm": 2.8911502361297607, "learning_rate": 9.995233026782346e-06, "loss": 3.7146, "step": 70650 }, { "epoch": 0.06965078064043252, "grad_norm": 2.5290520191192627, "learning_rate": 9.995226268495898e-06, "loss": 3.6641, "step": 70700 }, { "epoch": 0.0697000386182546, "grad_norm": 3.099642038345337, "learning_rate": 9.995219505424418e-06, "loss": 3.6278, "step": 70750 }, { "epoch": 0.0697492965960767, "grad_norm": 2.4764974117279053, "learning_rate": 9.995212737567908e-06, "loss": 3.6048, "step": 70800 }, { "epoch": 0.06979855457389879, "grad_norm": 2.7321791648864746, "learning_rate": 9.995205964926375e-06, "loss": 3.6714, "step": 70850 }, { "epoch": 0.06984781255172087, "grad_norm": 2.5434348583221436, "learning_rate": 9.995199187499826e-06, "loss": 3.7312, "step": 70900 }, { "epoch": 0.06989707052954297, "grad_norm": 2.8627142906188965, "learning_rate": 9.995192405288267e-06, "loss": 3.6505, "step": 70950 }, { "epoch": 0.06994632850736505, "grad_norm": 2.8490633964538574, "learning_rate": 9.995185618291706e-06, "loss": 3.6901, "step": 71000 }, { "epoch": 0.06999558648518714, "grad_norm": 2.5741195678710938, "learning_rate": 9.995178826510147e-06, "loss": 3.6637, "step": 71050 }, { "epoch": 0.07004484446300922, "grad_norm": 2.684366226196289, "learning_rate": 9.9951720299436e-06, "loss": 3.6121, "step": 71100 }, { "epoch": 0.07009410244083132, "grad_norm": 2.7699320316314697, "learning_rate": 9.995165228592068e-06, "loss": 3.7427, "step": 71150 }, { "epoch": 0.0701433604186534, "grad_norm": 2.8960466384887695, "learning_rate": 9.995158422455557e-06, "loss": 3.6214, "step": 71200 }, { "epoch": 0.07019261839647549, "grad_norm": 3.0051112174987793, "learning_rate": 9.995151611534078e-06, "loss": 3.6948, "step": 71250 }, { "epoch": 0.07024187637429759, "grad_norm": 2.827670097351074, "learning_rate": 9.995144795827634e-06, "loss": 3.6922, "step": 71300 }, { "epoch": 0.07029113435211967, "grad_norm": 2.649594306945801, "learning_rate": 9.995137975336233e-06, "loss": 3.6204, "step": 71350 }, { "epoch": 0.07034039232994176, "grad_norm": 2.717921018600464, "learning_rate": 9.995131150059881e-06, "loss": 3.6187, "step": 71400 }, { "epoch": 0.07038965030776384, "grad_norm": 2.6892218589782715, "learning_rate": 9.995124319998582e-06, "loss": 3.6327, "step": 71450 }, { "epoch": 0.07043890828558594, "grad_norm": 3.0993971824645996, "learning_rate": 9.995117485152347e-06, "loss": 3.6796, "step": 71500 }, { "epoch": 0.07048816626340802, "grad_norm": 2.848966121673584, "learning_rate": 9.995110645521181e-06, "loss": 3.6522, "step": 71550 }, { "epoch": 0.0705374242412301, "grad_norm": 2.4713757038116455, "learning_rate": 9.995103801105091e-06, "loss": 3.6022, "step": 71600 }, { "epoch": 0.07058668221905219, "grad_norm": 3.00170636177063, "learning_rate": 9.99509695190408e-06, "loss": 3.6149, "step": 71650 }, { "epoch": 0.07063594019687429, "grad_norm": 2.485034704208374, "learning_rate": 9.995090097918159e-06, "loss": 3.6421, "step": 71700 }, { "epoch": 0.07068519817469637, "grad_norm": 2.7099106311798096, "learning_rate": 9.995083239147332e-06, "loss": 3.5373, "step": 71750 }, { "epoch": 0.07073445615251846, "grad_norm": 2.885221242904663, "learning_rate": 9.995076375591608e-06, "loss": 3.6877, "step": 71800 }, { "epoch": 0.07078371413034056, "grad_norm": 2.833265542984009, "learning_rate": 9.99506950725099e-06, "loss": 3.6161, "step": 71850 }, { "epoch": 0.07083297210816264, "grad_norm": 2.7573535442352295, "learning_rate": 9.995062634125488e-06, "loss": 3.6813, "step": 71900 }, { "epoch": 0.07088223008598472, "grad_norm": 3.0593013763427734, "learning_rate": 9.995055756215107e-06, "loss": 3.5762, "step": 71950 }, { "epoch": 0.07093148806380681, "grad_norm": 2.7071948051452637, "learning_rate": 9.995048873519852e-06, "loss": 3.6267, "step": 72000 }, { "epoch": 0.0709807460416289, "grad_norm": 2.6270852088928223, "learning_rate": 9.995041986039733e-06, "loss": 3.6888, "step": 72050 }, { "epoch": 0.07103000401945099, "grad_norm": 2.7508726119995117, "learning_rate": 9.995035093774756e-06, "loss": 3.6479, "step": 72100 }, { "epoch": 0.07107926199727307, "grad_norm": 2.5797231197357178, "learning_rate": 9.995028196724925e-06, "loss": 3.6842, "step": 72150 }, { "epoch": 0.07112851997509517, "grad_norm": 2.882253408432007, "learning_rate": 9.995021294890247e-06, "loss": 3.6116, "step": 72200 }, { "epoch": 0.07117777795291726, "grad_norm": 3.025050401687622, "learning_rate": 9.995014388270731e-06, "loss": 3.6421, "step": 72250 }, { "epoch": 0.07122703593073934, "grad_norm": 2.678253173828125, "learning_rate": 9.995007476866382e-06, "loss": 3.7096, "step": 72300 }, { "epoch": 0.07127629390856142, "grad_norm": 2.800248146057129, "learning_rate": 9.995000560677208e-06, "loss": 3.6463, "step": 72350 }, { "epoch": 0.07132555188638352, "grad_norm": 2.965043544769287, "learning_rate": 9.994993639703215e-06, "loss": 3.6308, "step": 72400 }, { "epoch": 0.07137480986420561, "grad_norm": 2.6530370712280273, "learning_rate": 9.994986713944408e-06, "loss": 3.5791, "step": 72450 }, { "epoch": 0.07142406784202769, "grad_norm": 2.798475503921509, "learning_rate": 9.994979783400794e-06, "loss": 3.6106, "step": 72500 }, { "epoch": 0.07147332581984979, "grad_norm": 2.546464681625366, "learning_rate": 9.994972848072382e-06, "loss": 3.656, "step": 72550 }, { "epoch": 0.07152258379767187, "grad_norm": 2.5798041820526123, "learning_rate": 9.994965907959176e-06, "loss": 3.6222, "step": 72600 }, { "epoch": 0.07157184177549396, "grad_norm": 2.6365602016448975, "learning_rate": 9.994958963061186e-06, "loss": 3.711, "step": 72650 }, { "epoch": 0.07162109975331604, "grad_norm": 2.6040518283843994, "learning_rate": 9.994952013378413e-06, "loss": 3.665, "step": 72700 }, { "epoch": 0.07167035773113814, "grad_norm": 2.791257381439209, "learning_rate": 9.99494505891087e-06, "loss": 3.654, "step": 72750 }, { "epoch": 0.07171961570896022, "grad_norm": 2.5012261867523193, "learning_rate": 9.99493809965856e-06, "loss": 3.5989, "step": 72800 }, { "epoch": 0.07176887368678231, "grad_norm": 2.8017241954803467, "learning_rate": 9.99493113562149e-06, "loss": 3.6419, "step": 72850 }, { "epoch": 0.07181813166460439, "grad_norm": 2.886157989501953, "learning_rate": 9.994924166799668e-06, "loss": 3.645, "step": 72900 }, { "epoch": 0.07186738964242649, "grad_norm": 2.587864637374878, "learning_rate": 9.994917193193099e-06, "loss": 3.6195, "step": 72950 }, { "epoch": 0.07191664762024857, "grad_norm": 2.6062097549438477, "learning_rate": 9.994910214801791e-06, "loss": 3.6503, "step": 73000 }, { "epoch": 0.07196590559807066, "grad_norm": 2.972599506378174, "learning_rate": 9.99490323162575e-06, "loss": 3.6452, "step": 73050 }, { "epoch": 0.07201516357589276, "grad_norm": 2.6599671840667725, "learning_rate": 9.994896243664983e-06, "loss": 3.6479, "step": 73100 }, { "epoch": 0.07206442155371484, "grad_norm": 3.5980241298675537, "learning_rate": 9.994889250919495e-06, "loss": 3.608, "step": 73150 }, { "epoch": 0.07211367953153693, "grad_norm": 3.08015513420105, "learning_rate": 9.994882253389295e-06, "loss": 3.7253, "step": 73200 }, { "epoch": 0.07216293750935901, "grad_norm": 2.792656183242798, "learning_rate": 9.994875251074389e-06, "loss": 3.5673, "step": 73250 }, { "epoch": 0.07221219548718111, "grad_norm": 2.592083692550659, "learning_rate": 9.994868243974785e-06, "loss": 3.6473, "step": 73300 }, { "epoch": 0.07226145346500319, "grad_norm": 2.6621804237365723, "learning_rate": 9.994861232090487e-06, "loss": 3.6207, "step": 73350 }, { "epoch": 0.07231071144282528, "grad_norm": 2.6156387329101562, "learning_rate": 9.994854215421504e-06, "loss": 3.6283, "step": 73400 }, { "epoch": 0.07235996942064737, "grad_norm": 2.7689008712768555, "learning_rate": 9.994847193967841e-06, "loss": 3.5898, "step": 73450 }, { "epoch": 0.07240922739846946, "grad_norm": 2.7425997257232666, "learning_rate": 9.994840167729506e-06, "loss": 3.6368, "step": 73500 }, { "epoch": 0.07245848537629154, "grad_norm": 2.7740464210510254, "learning_rate": 9.994833136706505e-06, "loss": 3.6445, "step": 73550 }, { "epoch": 0.07250774335411363, "grad_norm": 2.5702173709869385, "learning_rate": 9.994826100898845e-06, "loss": 3.6603, "step": 73600 }, { "epoch": 0.07255700133193572, "grad_norm": 2.607623338699341, "learning_rate": 9.994819060306532e-06, "loss": 3.6451, "step": 73650 }, { "epoch": 0.07260625930975781, "grad_norm": 2.7097530364990234, "learning_rate": 9.994812014929573e-06, "loss": 3.6392, "step": 73700 }, { "epoch": 0.0726555172875799, "grad_norm": 2.643967628479004, "learning_rate": 9.994804964767976e-06, "loss": 3.6257, "step": 73750 }, { "epoch": 0.07270477526540199, "grad_norm": 2.7616708278656006, "learning_rate": 9.994797909821748e-06, "loss": 3.6016, "step": 73800 }, { "epoch": 0.07275403324322408, "grad_norm": 2.607081174850464, "learning_rate": 9.994790850090894e-06, "loss": 3.6168, "step": 73850 }, { "epoch": 0.07280329122104616, "grad_norm": 2.5300259590148926, "learning_rate": 9.994783785575421e-06, "loss": 3.6344, "step": 73900 }, { "epoch": 0.07285254919886824, "grad_norm": 2.6927247047424316, "learning_rate": 9.994776716275338e-06, "loss": 3.6252, "step": 73950 }, { "epoch": 0.07290180717669034, "grad_norm": 2.677111864089966, "learning_rate": 9.99476964219065e-06, "loss": 3.6237, "step": 74000 }, { "epoch": 0.07295106515451243, "grad_norm": 2.928058385848999, "learning_rate": 9.994762563321361e-06, "loss": 3.5721, "step": 74050 }, { "epoch": 0.07300032313233451, "grad_norm": 2.744075059890747, "learning_rate": 9.994755479667482e-06, "loss": 3.5986, "step": 74100 }, { "epoch": 0.0730495811101566, "grad_norm": 2.5685763359069824, "learning_rate": 9.994748391229018e-06, "loss": 3.6138, "step": 74150 }, { "epoch": 0.07309883908797869, "grad_norm": 2.582242488861084, "learning_rate": 9.994741298005977e-06, "loss": 3.6385, "step": 74200 }, { "epoch": 0.07314809706580078, "grad_norm": 2.5937206745147705, "learning_rate": 9.994734199998366e-06, "loss": 3.6381, "step": 74250 }, { "epoch": 0.07319735504362286, "grad_norm": 2.5076181888580322, "learning_rate": 9.994727097206189e-06, "loss": 3.594, "step": 74300 }, { "epoch": 0.07324661302144496, "grad_norm": 2.7037906646728516, "learning_rate": 9.994719989629455e-06, "loss": 3.6886, "step": 74350 }, { "epoch": 0.07329587099926704, "grad_norm": 2.7960517406463623, "learning_rate": 9.99471287726817e-06, "loss": 3.6601, "step": 74400 }, { "epoch": 0.07334512897708913, "grad_norm": 2.716439723968506, "learning_rate": 9.994705760122342e-06, "loss": 3.6461, "step": 74450 }, { "epoch": 0.07339438695491121, "grad_norm": 2.6618385314941406, "learning_rate": 9.994698638191977e-06, "loss": 3.5955, "step": 74500 }, { "epoch": 0.07344364493273331, "grad_norm": 2.8261427879333496, "learning_rate": 9.99469151147708e-06, "loss": 3.602, "step": 74550 }, { "epoch": 0.0734929029105554, "grad_norm": 2.7226064205169678, "learning_rate": 9.994684379977661e-06, "loss": 3.6213, "step": 74600 }, { "epoch": 0.07354216088837748, "grad_norm": 2.9325544834136963, "learning_rate": 9.994677243693727e-06, "loss": 3.6381, "step": 74650 }, { "epoch": 0.07359141886619958, "grad_norm": 2.766641139984131, "learning_rate": 9.994670102625281e-06, "loss": 3.6667, "step": 74700 }, { "epoch": 0.07364067684402166, "grad_norm": 2.987947463989258, "learning_rate": 9.994662956772332e-06, "loss": 3.6074, "step": 74750 }, { "epoch": 0.07368993482184374, "grad_norm": 2.605741500854492, "learning_rate": 9.99465580613489e-06, "loss": 3.6531, "step": 74800 }, { "epoch": 0.07373919279966583, "grad_norm": 2.561439275741577, "learning_rate": 9.994648650712955e-06, "loss": 3.6461, "step": 74850 }, { "epoch": 0.07378845077748793, "grad_norm": 2.6646921634674072, "learning_rate": 9.99464149050654e-06, "loss": 3.6528, "step": 74900 }, { "epoch": 0.07383770875531001, "grad_norm": 2.671616554260254, "learning_rate": 9.994634325515648e-06, "loss": 3.6359, "step": 74950 }, { "epoch": 0.0738869667331321, "grad_norm": 3.020414352416992, "learning_rate": 9.99462715574029e-06, "loss": 3.6615, "step": 75000 }, { "epoch": 0.07393622471095418, "grad_norm": 2.4770257472991943, "learning_rate": 9.994619981180468e-06, "loss": 3.5992, "step": 75050 }, { "epoch": 0.07398548268877628, "grad_norm": 2.883547306060791, "learning_rate": 9.994612801836193e-06, "loss": 3.6267, "step": 75100 }, { "epoch": 0.07403474066659836, "grad_norm": 2.6869547367095947, "learning_rate": 9.994605617707469e-06, "loss": 3.6229, "step": 75150 }, { "epoch": 0.07408399864442045, "grad_norm": 2.956298589706421, "learning_rate": 9.994598428794304e-06, "loss": 3.6463, "step": 75200 }, { "epoch": 0.07413325662224254, "grad_norm": 2.4470391273498535, "learning_rate": 9.994591235096705e-06, "loss": 3.5897, "step": 75250 }, { "epoch": 0.07418251460006463, "grad_norm": 2.615194082260132, "learning_rate": 9.994584036614678e-06, "loss": 3.6559, "step": 75300 }, { "epoch": 0.07423177257788671, "grad_norm": 2.567371368408203, "learning_rate": 9.99457683334823e-06, "loss": 3.593, "step": 75350 }, { "epoch": 0.0742810305557088, "grad_norm": 2.517949104309082, "learning_rate": 9.99456962529737e-06, "loss": 3.6654, "step": 75400 }, { "epoch": 0.0743302885335309, "grad_norm": 2.58486270904541, "learning_rate": 9.994562412462103e-06, "loss": 3.574, "step": 75450 }, { "epoch": 0.07437954651135298, "grad_norm": 2.7378883361816406, "learning_rate": 9.994555194842438e-06, "loss": 3.6217, "step": 75500 }, { "epoch": 0.07442880448917506, "grad_norm": 2.8276114463806152, "learning_rate": 9.994547972438378e-06, "loss": 3.6062, "step": 75550 }, { "epoch": 0.07447806246699716, "grad_norm": 2.5560925006866455, "learning_rate": 9.994540745249933e-06, "loss": 3.6145, "step": 75600 }, { "epoch": 0.07452732044481925, "grad_norm": 2.887159585952759, "learning_rate": 9.994533513277108e-06, "loss": 3.638, "step": 75650 }, { "epoch": 0.07457657842264133, "grad_norm": 2.5389602184295654, "learning_rate": 9.994526276519911e-06, "loss": 3.595, "step": 75700 }, { "epoch": 0.07462583640046341, "grad_norm": 2.3895115852355957, "learning_rate": 9.99451903497835e-06, "loss": 3.5791, "step": 75750 }, { "epoch": 0.07467509437828551, "grad_norm": 2.5909411907196045, "learning_rate": 9.994511788652432e-06, "loss": 3.6239, "step": 75800 }, { "epoch": 0.0747243523561076, "grad_norm": 2.6543195247650146, "learning_rate": 9.99450453754216e-06, "loss": 3.5759, "step": 75850 }, { "epoch": 0.07477361033392968, "grad_norm": 2.590404987335205, "learning_rate": 9.994497281647546e-06, "loss": 3.6394, "step": 75900 }, { "epoch": 0.07482286831175178, "grad_norm": 2.6828038692474365, "learning_rate": 9.994490020968594e-06, "loss": 3.6476, "step": 75950 }, { "epoch": 0.07487212628957386, "grad_norm": 2.7572526931762695, "learning_rate": 9.994482755505311e-06, "loss": 3.6188, "step": 76000 }, { "epoch": 0.07492138426739595, "grad_norm": 2.4381093978881836, "learning_rate": 9.994475485257704e-06, "loss": 3.6484, "step": 76050 }, { "epoch": 0.07497064224521803, "grad_norm": 2.3182830810546875, "learning_rate": 9.994468210225783e-06, "loss": 3.5418, "step": 76100 }, { "epoch": 0.07501990022304013, "grad_norm": 2.761262893676758, "learning_rate": 9.99446093040955e-06, "loss": 3.6533, "step": 76150 }, { "epoch": 0.07506915820086221, "grad_norm": 2.8115320205688477, "learning_rate": 9.994453645809017e-06, "loss": 3.6134, "step": 76200 }, { "epoch": 0.0751184161786843, "grad_norm": 2.7130353450775146, "learning_rate": 9.994446356424188e-06, "loss": 3.6109, "step": 76250 }, { "epoch": 0.07516767415650638, "grad_norm": 2.639683961868286, "learning_rate": 9.994439062255069e-06, "loss": 3.6099, "step": 76300 }, { "epoch": 0.07521693213432848, "grad_norm": 2.6402246952056885, "learning_rate": 9.994431763301669e-06, "loss": 3.6275, "step": 76350 }, { "epoch": 0.07526619011215056, "grad_norm": 2.9030849933624268, "learning_rate": 9.994424459563995e-06, "loss": 3.6109, "step": 76400 }, { "epoch": 0.07531544808997265, "grad_norm": 3.5715205669403076, "learning_rate": 9.994417151042053e-06, "loss": 3.6429, "step": 76450 }, { "epoch": 0.07536470606779475, "grad_norm": 2.611229658126831, "learning_rate": 9.99440983773585e-06, "loss": 3.6049, "step": 76500 }, { "epoch": 0.07541396404561683, "grad_norm": 2.777139902114868, "learning_rate": 9.994402519645395e-06, "loss": 3.6235, "step": 76550 }, { "epoch": 0.07546322202343891, "grad_norm": 2.6420998573303223, "learning_rate": 9.994395196770692e-06, "loss": 3.6286, "step": 76600 }, { "epoch": 0.075512480001261, "grad_norm": 2.5565552711486816, "learning_rate": 9.99438786911175e-06, "loss": 3.6462, "step": 76650 }, { "epoch": 0.0755617379790831, "grad_norm": 2.4049456119537354, "learning_rate": 9.994380536668575e-06, "loss": 3.654, "step": 76700 }, { "epoch": 0.07561099595690518, "grad_norm": 2.695481538772583, "learning_rate": 9.994373199441177e-06, "loss": 3.5298, "step": 76750 }, { "epoch": 0.07566025393472726, "grad_norm": 2.5945494174957275, "learning_rate": 9.994365857429558e-06, "loss": 3.6266, "step": 76800 }, { "epoch": 0.07570951191254936, "grad_norm": 2.878767728805542, "learning_rate": 9.994358510633728e-06, "loss": 3.5033, "step": 76850 }, { "epoch": 0.07575876989037145, "grad_norm": 2.37325382232666, "learning_rate": 9.994351159053695e-06, "loss": 3.6621, "step": 76900 }, { "epoch": 0.07580802786819353, "grad_norm": 3.0814969539642334, "learning_rate": 9.994343802689464e-06, "loss": 3.6279, "step": 76950 }, { "epoch": 0.07585728584601562, "grad_norm": 2.934889316558838, "learning_rate": 9.994336441541042e-06, "loss": 3.5978, "step": 77000 }, { "epoch": 0.07590654382383771, "grad_norm": 2.6793806552886963, "learning_rate": 9.994329075608437e-06, "loss": 3.564, "step": 77050 }, { "epoch": 0.0759558018016598, "grad_norm": 2.661285877227783, "learning_rate": 9.994321704891655e-06, "loss": 3.6109, "step": 77100 }, { "epoch": 0.07600505977948188, "grad_norm": 2.576117515563965, "learning_rate": 9.994314329390705e-06, "loss": 3.6132, "step": 77150 }, { "epoch": 0.07605431775730398, "grad_norm": 2.7961719036102295, "learning_rate": 9.994306949105593e-06, "loss": 3.6825, "step": 77200 }, { "epoch": 0.07610357573512606, "grad_norm": 2.4003701210021973, "learning_rate": 9.994299564036327e-06, "loss": 3.6218, "step": 77250 }, { "epoch": 0.07615283371294815, "grad_norm": 2.6332144737243652, "learning_rate": 9.994292174182912e-06, "loss": 3.6142, "step": 77300 }, { "epoch": 0.07620209169077023, "grad_norm": 2.495497941970825, "learning_rate": 9.994284779545356e-06, "loss": 3.5891, "step": 77350 }, { "epoch": 0.07625134966859233, "grad_norm": 2.729457378387451, "learning_rate": 9.994277380123664e-06, "loss": 3.636, "step": 77400 }, { "epoch": 0.07630060764641441, "grad_norm": 2.39570951461792, "learning_rate": 9.99426997591785e-06, "loss": 3.5865, "step": 77450 }, { "epoch": 0.0763498656242365, "grad_norm": 2.624575614929199, "learning_rate": 9.994262566927913e-06, "loss": 3.5783, "step": 77500 }, { "epoch": 0.07639912360205858, "grad_norm": 2.4480931758880615, "learning_rate": 9.994255153153864e-06, "loss": 3.5392, "step": 77550 }, { "epoch": 0.07644838157988068, "grad_norm": 2.610196590423584, "learning_rate": 9.994247734595712e-06, "loss": 3.61, "step": 77600 }, { "epoch": 0.07649763955770277, "grad_norm": 2.655616044998169, "learning_rate": 9.99424031125346e-06, "loss": 3.5909, "step": 77650 }, { "epoch": 0.07654689753552485, "grad_norm": 2.848410129547119, "learning_rate": 9.994232883127118e-06, "loss": 3.5885, "step": 77700 }, { "epoch": 0.07659615551334695, "grad_norm": 2.6791510581970215, "learning_rate": 9.99422545021669e-06, "loss": 3.5365, "step": 77750 }, { "epoch": 0.07664541349116903, "grad_norm": 2.8551318645477295, "learning_rate": 9.994218012522185e-06, "loss": 3.6117, "step": 77800 }, { "epoch": 0.07669467146899112, "grad_norm": 2.731682062149048, "learning_rate": 9.994210570043613e-06, "loss": 3.6046, "step": 77850 }, { "epoch": 0.0767439294468132, "grad_norm": 2.5910708904266357, "learning_rate": 9.994203122780976e-06, "loss": 3.6615, "step": 77900 }, { "epoch": 0.0767931874246353, "grad_norm": 2.7816951274871826, "learning_rate": 9.994195670734284e-06, "loss": 3.629, "step": 77950 }, { "epoch": 0.07684244540245738, "grad_norm": 2.4194846153259277, "learning_rate": 9.994188213903545e-06, "loss": 3.5913, "step": 78000 }, { "epoch": 0.07689170338027947, "grad_norm": 2.4510204792022705, "learning_rate": 9.994180752288762e-06, "loss": 3.6189, "step": 78050 }, { "epoch": 0.07694096135810156, "grad_norm": 3.543703079223633, "learning_rate": 9.994173285889948e-06, "loss": 3.5794, "step": 78100 }, { "epoch": 0.07699021933592365, "grad_norm": 2.7752790451049805, "learning_rate": 9.994165814707105e-06, "loss": 3.5912, "step": 78150 }, { "epoch": 0.07703947731374573, "grad_norm": 2.6488571166992188, "learning_rate": 9.994158338740243e-06, "loss": 3.5987, "step": 78200 }, { "epoch": 0.07708873529156782, "grad_norm": 2.561976909637451, "learning_rate": 9.994150857989367e-06, "loss": 3.6018, "step": 78250 }, { "epoch": 0.07713799326938992, "grad_norm": 3.1859700679779053, "learning_rate": 9.994143372454487e-06, "loss": 3.5352, "step": 78300 }, { "epoch": 0.077187251247212, "grad_norm": 2.742384433746338, "learning_rate": 9.99413588213561e-06, "loss": 3.5665, "step": 78350 }, { "epoch": 0.07723650922503408, "grad_norm": 2.7384369373321533, "learning_rate": 9.994128387032738e-06, "loss": 3.6001, "step": 78400 }, { "epoch": 0.07728576720285618, "grad_norm": 2.6327786445617676, "learning_rate": 9.994120887145886e-06, "loss": 3.6087, "step": 78450 }, { "epoch": 0.07733502518067827, "grad_norm": 2.589012384414673, "learning_rate": 9.994113382475054e-06, "loss": 3.6411, "step": 78500 }, { "epoch": 0.07738428315850035, "grad_norm": 2.4711496829986572, "learning_rate": 9.994105873020255e-06, "loss": 3.6762, "step": 78550 }, { "epoch": 0.07743354113632243, "grad_norm": 2.9146006107330322, "learning_rate": 9.994098358781494e-06, "loss": 3.5652, "step": 78600 }, { "epoch": 0.07748279911414453, "grad_norm": 2.840862512588501, "learning_rate": 9.994090839758775e-06, "loss": 3.5447, "step": 78650 }, { "epoch": 0.07753205709196662, "grad_norm": 2.6190738677978516, "learning_rate": 9.99408331595211e-06, "loss": 3.5823, "step": 78700 }, { "epoch": 0.0775813150697887, "grad_norm": 2.683576822280884, "learning_rate": 9.994075787361504e-06, "loss": 3.5888, "step": 78750 }, { "epoch": 0.07763057304761078, "grad_norm": 2.739565372467041, "learning_rate": 9.994068253986963e-06, "loss": 3.5937, "step": 78800 }, { "epoch": 0.07767983102543288, "grad_norm": 2.480062484741211, "learning_rate": 9.994060715828498e-06, "loss": 3.6413, "step": 78850 }, { "epoch": 0.07772908900325497, "grad_norm": 2.461531639099121, "learning_rate": 9.994053172886114e-06, "loss": 3.638, "step": 78900 }, { "epoch": 0.07777834698107705, "grad_norm": 2.5659523010253906, "learning_rate": 9.994045625159817e-06, "loss": 3.5782, "step": 78950 }, { "epoch": 0.07782760495889915, "grad_norm": 2.706355094909668, "learning_rate": 9.994038072649615e-06, "loss": 3.6286, "step": 79000 }, { "epoch": 0.07787686293672123, "grad_norm": 2.6770951747894287, "learning_rate": 9.994030515355516e-06, "loss": 3.5217, "step": 79050 }, { "epoch": 0.07792612091454332, "grad_norm": 2.8909950256347656, "learning_rate": 9.994022953277526e-06, "loss": 3.6623, "step": 79100 }, { "epoch": 0.0779753788923654, "grad_norm": 2.6451656818389893, "learning_rate": 9.994015386415655e-06, "loss": 3.6235, "step": 79150 }, { "epoch": 0.0780246368701875, "grad_norm": 2.821711301803589, "learning_rate": 9.994007814769906e-06, "loss": 3.65, "step": 79200 }, { "epoch": 0.07807389484800958, "grad_norm": 2.878124952316284, "learning_rate": 9.994000238340292e-06, "loss": 3.6128, "step": 79250 }, { "epoch": 0.07812315282583167, "grad_norm": 2.720065116882324, "learning_rate": 9.993992657126815e-06, "loss": 3.5102, "step": 79300 }, { "epoch": 0.07817241080365377, "grad_norm": 2.8402578830718994, "learning_rate": 9.993985071129483e-06, "loss": 3.6065, "step": 79350 }, { "epoch": 0.07822166878147585, "grad_norm": 2.5894715785980225, "learning_rate": 9.993977480348305e-06, "loss": 3.5863, "step": 79400 }, { "epoch": 0.07827092675929793, "grad_norm": 2.5315945148468018, "learning_rate": 9.993969884783288e-06, "loss": 3.5701, "step": 79450 }, { "epoch": 0.07832018473712002, "grad_norm": 2.865124464035034, "learning_rate": 9.993962284434437e-06, "loss": 3.613, "step": 79500 }, { "epoch": 0.07836944271494212, "grad_norm": 2.835350275039673, "learning_rate": 9.993954679301766e-06, "loss": 3.5383, "step": 79550 }, { "epoch": 0.0784187006927642, "grad_norm": 2.6091625690460205, "learning_rate": 9.993947069385273e-06, "loss": 3.6222, "step": 79600 }, { "epoch": 0.07846795867058629, "grad_norm": 2.510575532913208, "learning_rate": 9.993939454684971e-06, "loss": 3.5605, "step": 79650 }, { "epoch": 0.07851721664840837, "grad_norm": 2.857231378555298, "learning_rate": 9.993931835200865e-06, "loss": 3.5389, "step": 79700 }, { "epoch": 0.07856647462623047, "grad_norm": 2.524425506591797, "learning_rate": 9.993924210932966e-06, "loss": 3.5836, "step": 79750 }, { "epoch": 0.07861573260405255, "grad_norm": 2.5206527709960938, "learning_rate": 9.993916581881277e-06, "loss": 3.5662, "step": 79800 }, { "epoch": 0.07866499058187464, "grad_norm": 2.827385187149048, "learning_rate": 9.993908948045807e-06, "loss": 3.5492, "step": 79850 }, { "epoch": 0.07871424855969673, "grad_norm": 2.6258528232574463, "learning_rate": 9.993901309426564e-06, "loss": 3.5732, "step": 79900 }, { "epoch": 0.07876350653751882, "grad_norm": 2.4983890056610107, "learning_rate": 9.993893666023553e-06, "loss": 3.577, "step": 79950 }, { "epoch": 0.0788127645153409, "grad_norm": 2.4640541076660156, "learning_rate": 9.993886017836785e-06, "loss": 3.6314, "step": 80000 }, { "epoch": 0.07886202249316299, "grad_norm": 2.6135013103485107, "learning_rate": 9.993878364866263e-06, "loss": 3.6251, "step": 80050 }, { "epoch": 0.07891128047098508, "grad_norm": 2.940502882003784, "learning_rate": 9.993870707111999e-06, "loss": 3.5992, "step": 80100 }, { "epoch": 0.07896053844880717, "grad_norm": 2.6008734703063965, "learning_rate": 9.993863044573996e-06, "loss": 3.6376, "step": 80150 }, { "epoch": 0.07900979642662925, "grad_norm": 2.803135395050049, "learning_rate": 9.993855377252264e-06, "loss": 3.5755, "step": 80200 }, { "epoch": 0.07905905440445135, "grad_norm": 2.534965753555298, "learning_rate": 9.99384770514681e-06, "loss": 3.5858, "step": 80250 }, { "epoch": 0.07910831238227344, "grad_norm": 2.4554474353790283, "learning_rate": 9.99384002825764e-06, "loss": 3.5692, "step": 80300 }, { "epoch": 0.07915757036009552, "grad_norm": 2.5986316204071045, "learning_rate": 9.993832346584764e-06, "loss": 3.5769, "step": 80350 }, { "epoch": 0.0792068283379176, "grad_norm": 2.474804162979126, "learning_rate": 9.993824660128187e-06, "loss": 3.6002, "step": 80400 }, { "epoch": 0.0792560863157397, "grad_norm": 2.91159987449646, "learning_rate": 9.993816968887915e-06, "loss": 3.4805, "step": 80450 }, { "epoch": 0.07930534429356179, "grad_norm": 2.571033000946045, "learning_rate": 9.99380927286396e-06, "loss": 3.6005, "step": 80500 }, { "epoch": 0.07935460227138387, "grad_norm": 2.9510879516601562, "learning_rate": 9.993801572056325e-06, "loss": 3.63, "step": 80550 }, { "epoch": 0.07940386024920597, "grad_norm": 2.8745508193969727, "learning_rate": 9.99379386646502e-06, "loss": 3.5616, "step": 80600 }, { "epoch": 0.07945311822702805, "grad_norm": 2.6009600162506104, "learning_rate": 9.99378615609005e-06, "loss": 3.6428, "step": 80650 }, { "epoch": 0.07950237620485014, "grad_norm": 2.789090633392334, "learning_rate": 9.993778440931426e-06, "loss": 3.577, "step": 80700 }, { "epoch": 0.07955163418267222, "grad_norm": 2.7155349254608154, "learning_rate": 9.993770720989153e-06, "loss": 3.6402, "step": 80750 }, { "epoch": 0.07960089216049432, "grad_norm": 2.6621360778808594, "learning_rate": 9.99376299626324e-06, "loss": 3.6139, "step": 80800 }, { "epoch": 0.0796501501383164, "grad_norm": 2.6630618572235107, "learning_rate": 9.993755266753688e-06, "loss": 3.5622, "step": 80850 }, { "epoch": 0.07969940811613849, "grad_norm": 2.847508668899536, "learning_rate": 9.993747532460514e-06, "loss": 3.5647, "step": 80900 }, { "epoch": 0.07974866609396057, "grad_norm": 3.3109116554260254, "learning_rate": 9.99373979338372e-06, "loss": 3.6132, "step": 80950 }, { "epoch": 0.07979792407178267, "grad_norm": 2.7042760848999023, "learning_rate": 9.993732049523314e-06, "loss": 3.5918, "step": 81000 }, { "epoch": 0.07984718204960475, "grad_norm": 2.6336474418640137, "learning_rate": 9.993724300879304e-06, "loss": 3.592, "step": 81050 }, { "epoch": 0.07989644002742684, "grad_norm": 2.385345458984375, "learning_rate": 9.993716547451698e-06, "loss": 3.6123, "step": 81100 }, { "epoch": 0.07994569800524894, "grad_norm": 2.7782771587371826, "learning_rate": 9.9937087892405e-06, "loss": 3.5801, "step": 81150 }, { "epoch": 0.07999495598307102, "grad_norm": 2.6983256340026855, "learning_rate": 9.99370102624572e-06, "loss": 3.6101, "step": 81200 }, { "epoch": 0.0800442139608931, "grad_norm": 3.1907289028167725, "learning_rate": 9.99369325846737e-06, "loss": 3.609, "step": 81250 }, { "epoch": 0.08009347193871519, "grad_norm": 2.7355217933654785, "learning_rate": 9.993685485905449e-06, "loss": 3.6224, "step": 81300 }, { "epoch": 0.08014272991653729, "grad_norm": 2.4982829093933105, "learning_rate": 9.99367770855997e-06, "loss": 3.6367, "step": 81350 }, { "epoch": 0.08019198789435937, "grad_norm": 2.634140968322754, "learning_rate": 9.993669926430938e-06, "loss": 3.4926, "step": 81400 }, { "epoch": 0.08024124587218145, "grad_norm": 2.3907389640808105, "learning_rate": 9.993662139518361e-06, "loss": 3.4938, "step": 81450 }, { "epoch": 0.08029050385000355, "grad_norm": 2.639362096786499, "learning_rate": 9.993654347822246e-06, "loss": 3.539, "step": 81500 }, { "epoch": 0.08033976182782564, "grad_norm": 2.5869786739349365, "learning_rate": 9.993646551342603e-06, "loss": 3.5478, "step": 81550 }, { "epoch": 0.08038901980564772, "grad_norm": 2.3697030544281006, "learning_rate": 9.993638750079438e-06, "loss": 3.6165, "step": 81600 }, { "epoch": 0.0804382777834698, "grad_norm": 2.643968105316162, "learning_rate": 9.993630944032756e-06, "loss": 3.6269, "step": 81650 }, { "epoch": 0.0804875357612919, "grad_norm": 2.7823052406311035, "learning_rate": 9.993623133202568e-06, "loss": 3.5842, "step": 81700 }, { "epoch": 0.08053679373911399, "grad_norm": 2.7021989822387695, "learning_rate": 9.99361531758888e-06, "loss": 3.5726, "step": 81750 }, { "epoch": 0.08058605171693607, "grad_norm": 2.6700222492218018, "learning_rate": 9.993607497191697e-06, "loss": 3.6108, "step": 81800 }, { "epoch": 0.08063530969475817, "grad_norm": 2.8914437294006348, "learning_rate": 9.993599672011032e-06, "loss": 3.5854, "step": 81850 }, { "epoch": 0.08068456767258025, "grad_norm": 2.826688528060913, "learning_rate": 9.993591842046889e-06, "loss": 3.5801, "step": 81900 }, { "epoch": 0.08073382565040234, "grad_norm": 2.4949636459350586, "learning_rate": 9.993584007299276e-06, "loss": 3.5815, "step": 81950 }, { "epoch": 0.08078308362822442, "grad_norm": 2.6836111545562744, "learning_rate": 9.9935761677682e-06, "loss": 3.5297, "step": 82000 }, { "epoch": 0.08083234160604652, "grad_norm": 2.6047656536102295, "learning_rate": 9.993568323453669e-06, "loss": 3.5641, "step": 82050 }, { "epoch": 0.0808815995838686, "grad_norm": 2.683647632598877, "learning_rate": 9.993560474355692e-06, "loss": 3.5003, "step": 82100 }, { "epoch": 0.08093085756169069, "grad_norm": 2.6664469242095947, "learning_rate": 9.993552620474274e-06, "loss": 3.5817, "step": 82150 }, { "epoch": 0.08098011553951277, "grad_norm": 2.6779887676239014, "learning_rate": 9.993544761809424e-06, "loss": 3.6213, "step": 82200 }, { "epoch": 0.08102937351733487, "grad_norm": 2.6139307022094727, "learning_rate": 9.993536898361149e-06, "loss": 3.5381, "step": 82250 }, { "epoch": 0.08107863149515696, "grad_norm": 2.6804659366607666, "learning_rate": 9.993529030129456e-06, "loss": 3.6028, "step": 82300 }, { "epoch": 0.08112788947297904, "grad_norm": 2.5710816383361816, "learning_rate": 9.993521157114355e-06, "loss": 3.6792, "step": 82350 }, { "epoch": 0.08117714745080114, "grad_norm": 2.461784601211548, "learning_rate": 9.993513279315849e-06, "loss": 3.6058, "step": 82400 }, { "epoch": 0.08122640542862322, "grad_norm": 2.76576828956604, "learning_rate": 9.99350539673395e-06, "loss": 3.5626, "step": 82450 }, { "epoch": 0.0812756634064453, "grad_norm": 2.6112589836120605, "learning_rate": 9.993497509368664e-06, "loss": 3.5211, "step": 82500 }, { "epoch": 0.08132492138426739, "grad_norm": 2.596060037612915, "learning_rate": 9.993489617219998e-06, "loss": 3.553, "step": 82550 }, { "epoch": 0.08137417936208949, "grad_norm": 2.709398031234741, "learning_rate": 9.99348172028796e-06, "loss": 3.6273, "step": 82600 }, { "epoch": 0.08142343733991157, "grad_norm": 2.744860887527466, "learning_rate": 9.993473818572557e-06, "loss": 3.5767, "step": 82650 }, { "epoch": 0.08147269531773366, "grad_norm": 2.5852866172790527, "learning_rate": 9.993465912073798e-06, "loss": 3.5873, "step": 82700 }, { "epoch": 0.08152195329555575, "grad_norm": 2.9701457023620605, "learning_rate": 9.99345800079169e-06, "loss": 3.5377, "step": 82750 }, { "epoch": 0.08157121127337784, "grad_norm": 2.768817901611328, "learning_rate": 9.99345008472624e-06, "loss": 3.5916, "step": 82800 }, { "epoch": 0.08162046925119992, "grad_norm": 2.5297558307647705, "learning_rate": 9.993442163877453e-06, "loss": 3.4865, "step": 82850 }, { "epoch": 0.08166972722902201, "grad_norm": 2.386624336242676, "learning_rate": 9.993434238245343e-06, "loss": 3.5865, "step": 82900 }, { "epoch": 0.0817189852068441, "grad_norm": 2.538627862930298, "learning_rate": 9.993426307829912e-06, "loss": 3.5824, "step": 82950 }, { "epoch": 0.08176824318466619, "grad_norm": 2.5463125705718994, "learning_rate": 9.993418372631172e-06, "loss": 3.523, "step": 83000 }, { "epoch": 0.08181750116248827, "grad_norm": 2.7368357181549072, "learning_rate": 9.993410432649126e-06, "loss": 3.561, "step": 83050 }, { "epoch": 0.08186675914031036, "grad_norm": 2.91473126411438, "learning_rate": 9.993402487883785e-06, "loss": 3.5516, "step": 83100 }, { "epoch": 0.08191601711813246, "grad_norm": 2.842916250228882, "learning_rate": 9.993394538335154e-06, "loss": 3.5482, "step": 83150 }, { "epoch": 0.08196527509595454, "grad_norm": 2.469609260559082, "learning_rate": 9.993386584003244e-06, "loss": 3.5721, "step": 83200 }, { "epoch": 0.08201453307377662, "grad_norm": 2.6840031147003174, "learning_rate": 9.99337862488806e-06, "loss": 3.5425, "step": 83250 }, { "epoch": 0.08206379105159872, "grad_norm": 2.7899723052978516, "learning_rate": 9.99337066098961e-06, "loss": 3.5184, "step": 83300 }, { "epoch": 0.0821130490294208, "grad_norm": 2.701415538787842, "learning_rate": 9.993362692307901e-06, "loss": 3.5715, "step": 83350 }, { "epoch": 0.08216230700724289, "grad_norm": 2.996854305267334, "learning_rate": 9.993354718842943e-06, "loss": 3.6097, "step": 83400 }, { "epoch": 0.08221156498506497, "grad_norm": 2.7720508575439453, "learning_rate": 9.993346740594742e-06, "loss": 3.5882, "step": 83450 }, { "epoch": 0.08226082296288707, "grad_norm": 2.5792629718780518, "learning_rate": 9.993338757563305e-06, "loss": 3.5467, "step": 83500 }, { "epoch": 0.08231008094070916, "grad_norm": 2.9115114212036133, "learning_rate": 9.993330769748641e-06, "loss": 3.5538, "step": 83550 }, { "epoch": 0.08235933891853124, "grad_norm": 2.5848231315612793, "learning_rate": 9.993322777150759e-06, "loss": 3.5436, "step": 83600 }, { "epoch": 0.08240859689635334, "grad_norm": 3.1945884227752686, "learning_rate": 9.99331477976966e-06, "loss": 3.5299, "step": 83650 }, { "epoch": 0.08245785487417542, "grad_norm": 2.5118203163146973, "learning_rate": 9.993306777605361e-06, "loss": 3.5181, "step": 83700 }, { "epoch": 0.08250711285199751, "grad_norm": 2.635374069213867, "learning_rate": 9.993298770657863e-06, "loss": 3.5096, "step": 83750 }, { "epoch": 0.08255637082981959, "grad_norm": 2.7606754302978516, "learning_rate": 9.993290758927176e-06, "loss": 3.5188, "step": 83800 }, { "epoch": 0.08260562880764169, "grad_norm": 2.7675282955169678, "learning_rate": 9.993282742413308e-06, "loss": 3.557, "step": 83850 }, { "epoch": 0.08265488678546377, "grad_norm": 2.632143497467041, "learning_rate": 9.993274721116267e-06, "loss": 3.6154, "step": 83900 }, { "epoch": 0.08270414476328586, "grad_norm": 2.952312469482422, "learning_rate": 9.993266695036056e-06, "loss": 3.5026, "step": 83950 }, { "epoch": 0.08275340274110796, "grad_norm": 2.521624803543091, "learning_rate": 9.993258664172691e-06, "loss": 3.5469, "step": 84000 }, { "epoch": 0.08280266071893004, "grad_norm": 2.532771348953247, "learning_rate": 9.993250628526172e-06, "loss": 3.5172, "step": 84050 }, { "epoch": 0.08285191869675212, "grad_norm": 2.7896690368652344, "learning_rate": 9.993242588096513e-06, "loss": 3.5624, "step": 84100 }, { "epoch": 0.08290117667457421, "grad_norm": 2.5722405910491943, "learning_rate": 9.993234542883717e-06, "loss": 3.5807, "step": 84150 }, { "epoch": 0.08295043465239631, "grad_norm": 2.8355026245117188, "learning_rate": 9.993226492887793e-06, "loss": 3.556, "step": 84200 }, { "epoch": 0.08299969263021839, "grad_norm": 2.637105703353882, "learning_rate": 9.993218438108748e-06, "loss": 3.6392, "step": 84250 }, { "epoch": 0.08304895060804048, "grad_norm": 2.335650682449341, "learning_rate": 9.993210378546592e-06, "loss": 3.6058, "step": 84300 }, { "epoch": 0.08309820858586256, "grad_norm": 3.0109500885009766, "learning_rate": 9.99320231420133e-06, "loss": 3.5696, "step": 84350 }, { "epoch": 0.08314746656368466, "grad_norm": 2.690244436264038, "learning_rate": 9.993194245072974e-06, "loss": 3.5141, "step": 84400 }, { "epoch": 0.08319672454150674, "grad_norm": 2.534149169921875, "learning_rate": 9.99318617116153e-06, "loss": 3.5352, "step": 84450 }, { "epoch": 0.08324598251932883, "grad_norm": 2.8025267124176025, "learning_rate": 9.993178092467e-06, "loss": 3.5739, "step": 84500 }, { "epoch": 0.08329524049715092, "grad_norm": 2.7458815574645996, "learning_rate": 9.993170008989399e-06, "loss": 3.5933, "step": 84550 }, { "epoch": 0.08334449847497301, "grad_norm": 2.5686168670654297, "learning_rate": 9.993161920728731e-06, "loss": 3.5118, "step": 84600 }, { "epoch": 0.08339375645279509, "grad_norm": 2.4805991649627686, "learning_rate": 9.993153827685006e-06, "loss": 3.5379, "step": 84650 }, { "epoch": 0.08344301443061718, "grad_norm": 2.742490768432617, "learning_rate": 9.99314572985823e-06, "loss": 3.5668, "step": 84700 }, { "epoch": 0.08349227240843927, "grad_norm": 2.7217047214508057, "learning_rate": 9.993137627248411e-06, "loss": 3.5622, "step": 84750 }, { "epoch": 0.08354153038626136, "grad_norm": 2.5703630447387695, "learning_rate": 9.993129519855558e-06, "loss": 3.5837, "step": 84800 }, { "epoch": 0.08359078836408344, "grad_norm": 2.553809881210327, "learning_rate": 9.993121407679678e-06, "loss": 3.495, "step": 84850 }, { "epoch": 0.08364004634190554, "grad_norm": 2.5125691890716553, "learning_rate": 9.993113290720779e-06, "loss": 3.5538, "step": 84900 }, { "epoch": 0.08368930431972763, "grad_norm": 3.1222012042999268, "learning_rate": 9.993105168978868e-06, "loss": 3.5866, "step": 84950 }, { "epoch": 0.08373856229754971, "grad_norm": 2.4522104263305664, "learning_rate": 9.993097042453951e-06, "loss": 3.5397, "step": 85000 }, { "epoch": 0.0837878202753718, "grad_norm": 2.7148947715759277, "learning_rate": 9.993088911146041e-06, "loss": 3.4826, "step": 85050 }, { "epoch": 0.08383707825319389, "grad_norm": 2.6041154861450195, "learning_rate": 9.993080775055141e-06, "loss": 3.5449, "step": 85100 }, { "epoch": 0.08388633623101598, "grad_norm": 3.046959400177002, "learning_rate": 9.993072634181263e-06, "loss": 3.5281, "step": 85150 }, { "epoch": 0.08393559420883806, "grad_norm": 2.5197510719299316, "learning_rate": 9.993064488524412e-06, "loss": 3.5353, "step": 85200 }, { "epoch": 0.08398485218666016, "grad_norm": 2.660175323486328, "learning_rate": 9.993056338084594e-06, "loss": 3.5334, "step": 85250 }, { "epoch": 0.08403411016448224, "grad_norm": 2.4798688888549805, "learning_rate": 9.99304818286182e-06, "loss": 3.5673, "step": 85300 }, { "epoch": 0.08408336814230433, "grad_norm": 2.895433187484741, "learning_rate": 9.993040022856098e-06, "loss": 3.5246, "step": 85350 }, { "epoch": 0.08413262612012641, "grad_norm": 2.505220413208008, "learning_rate": 9.993031858067432e-06, "loss": 3.6022, "step": 85400 }, { "epoch": 0.08418188409794851, "grad_norm": 2.4558920860290527, "learning_rate": 9.993023688495834e-06, "loss": 3.5304, "step": 85450 }, { "epoch": 0.0842311420757706, "grad_norm": 2.706031084060669, "learning_rate": 9.993015514141311e-06, "loss": 3.5175, "step": 85500 }, { "epoch": 0.08428040005359268, "grad_norm": 2.73598575592041, "learning_rate": 9.99300733500387e-06, "loss": 3.5644, "step": 85550 }, { "epoch": 0.08432965803141476, "grad_norm": 3.419055938720703, "learning_rate": 9.992999151083518e-06, "loss": 3.5961, "step": 85600 }, { "epoch": 0.08437891600923686, "grad_norm": 2.535879373550415, "learning_rate": 9.992990962380264e-06, "loss": 3.546, "step": 85650 }, { "epoch": 0.08442817398705894, "grad_norm": 2.622371196746826, "learning_rate": 9.992982768894115e-06, "loss": 3.5623, "step": 85700 }, { "epoch": 0.08447743196488103, "grad_norm": 3.0832736492156982, "learning_rate": 9.992974570625081e-06, "loss": 3.5773, "step": 85750 }, { "epoch": 0.08452668994270313, "grad_norm": 2.691267490386963, "learning_rate": 9.992966367573167e-06, "loss": 3.5602, "step": 85800 }, { "epoch": 0.08457594792052521, "grad_norm": 2.6377806663513184, "learning_rate": 9.992958159738383e-06, "loss": 3.5294, "step": 85850 }, { "epoch": 0.0846252058983473, "grad_norm": 2.8323581218719482, "learning_rate": 9.992949947120737e-06, "loss": 3.5228, "step": 85900 }, { "epoch": 0.08467446387616938, "grad_norm": 7.656682014465332, "learning_rate": 9.992941729720235e-06, "loss": 3.513, "step": 85950 }, { "epoch": 0.08472372185399148, "grad_norm": 2.609022855758667, "learning_rate": 9.992933507536886e-06, "loss": 3.5711, "step": 86000 }, { "epoch": 0.08477297983181356, "grad_norm": 2.923875331878662, "learning_rate": 9.992925280570696e-06, "loss": 3.5956, "step": 86050 }, { "epoch": 0.08482223780963564, "grad_norm": 2.524531364440918, "learning_rate": 9.992917048821677e-06, "loss": 3.6039, "step": 86100 }, { "epoch": 0.08487149578745774, "grad_norm": 2.6002161502838135, "learning_rate": 9.992908812289833e-06, "loss": 3.4705, "step": 86150 }, { "epoch": 0.08492075376527983, "grad_norm": 2.611948251724243, "learning_rate": 9.992900570975174e-06, "loss": 3.6147, "step": 86200 }, { "epoch": 0.08497001174310191, "grad_norm": 2.7273263931274414, "learning_rate": 9.992892324877707e-06, "loss": 3.4891, "step": 86250 }, { "epoch": 0.085019269720924, "grad_norm": 2.6298270225524902, "learning_rate": 9.99288407399744e-06, "loss": 3.5598, "step": 86300 }, { "epoch": 0.0850685276987461, "grad_norm": 2.372058629989624, "learning_rate": 9.99287581833438e-06, "loss": 3.5594, "step": 86350 }, { "epoch": 0.08511778567656818, "grad_norm": 3.4173614978790283, "learning_rate": 9.992867557888539e-06, "loss": 3.5739, "step": 86400 }, { "epoch": 0.08516704365439026, "grad_norm": 2.603400707244873, "learning_rate": 9.992859292659919e-06, "loss": 3.5375, "step": 86450 }, { "epoch": 0.08521630163221235, "grad_norm": 2.4780337810516357, "learning_rate": 9.99285102264853e-06, "loss": 3.6018, "step": 86500 }, { "epoch": 0.08526555961003444, "grad_norm": 2.813688039779663, "learning_rate": 9.992842747854383e-06, "loss": 3.5412, "step": 86550 }, { "epoch": 0.08531481758785653, "grad_norm": 2.7290074825286865, "learning_rate": 9.992834468277484e-06, "loss": 3.5557, "step": 86600 }, { "epoch": 0.08536407556567861, "grad_norm": 2.385192394256592, "learning_rate": 9.992826183917839e-06, "loss": 3.5122, "step": 86650 }, { "epoch": 0.08541333354350071, "grad_norm": 2.5369277000427246, "learning_rate": 9.992817894775458e-06, "loss": 3.5678, "step": 86700 }, { "epoch": 0.0854625915213228, "grad_norm": 2.7546024322509766, "learning_rate": 9.992809600850348e-06, "loss": 3.4976, "step": 86750 }, { "epoch": 0.08551184949914488, "grad_norm": 2.501375913619995, "learning_rate": 9.992801302142519e-06, "loss": 3.5853, "step": 86800 }, { "epoch": 0.08556110747696696, "grad_norm": 2.810171365737915, "learning_rate": 9.992792998651977e-06, "loss": 3.5156, "step": 86850 }, { "epoch": 0.08561036545478906, "grad_norm": 2.856325387954712, "learning_rate": 9.992784690378728e-06, "loss": 3.5626, "step": 86900 }, { "epoch": 0.08565962343261115, "grad_norm": 2.920994520187378, "learning_rate": 9.992776377322784e-06, "loss": 3.486, "step": 86950 }, { "epoch": 0.08570888141043323, "grad_norm": 2.8368146419525146, "learning_rate": 9.99276805948415e-06, "loss": 3.5304, "step": 87000 }, { "epoch": 0.08575813938825533, "grad_norm": 2.583176851272583, "learning_rate": 9.992759736862838e-06, "loss": 3.6053, "step": 87050 }, { "epoch": 0.08580739736607741, "grad_norm": 2.7900843620300293, "learning_rate": 9.99275140945885e-06, "loss": 3.5985, "step": 87100 }, { "epoch": 0.0858566553438995, "grad_norm": 3.079324960708618, "learning_rate": 9.992743077272198e-06, "loss": 3.6397, "step": 87150 }, { "epoch": 0.08590591332172158, "grad_norm": 2.7468321323394775, "learning_rate": 9.992734740302889e-06, "loss": 3.5446, "step": 87200 }, { "epoch": 0.08595517129954368, "grad_norm": 2.8026750087738037, "learning_rate": 9.992726398550931e-06, "loss": 3.5369, "step": 87250 }, { "epoch": 0.08600442927736576, "grad_norm": 2.53709077835083, "learning_rate": 9.992718052016333e-06, "loss": 3.5228, "step": 87300 }, { "epoch": 0.08605368725518785, "grad_norm": 2.520190715789795, "learning_rate": 9.992709700699103e-06, "loss": 3.6005, "step": 87350 }, { "epoch": 0.08610294523300994, "grad_norm": 2.77109956741333, "learning_rate": 9.992701344599245e-06, "loss": 3.5916, "step": 87400 }, { "epoch": 0.08615220321083203, "grad_norm": 2.5478734970092773, "learning_rate": 9.992692983716773e-06, "loss": 3.5448, "step": 87450 }, { "epoch": 0.08620146118865411, "grad_norm": 2.9430477619171143, "learning_rate": 9.992684618051691e-06, "loss": 3.5113, "step": 87500 }, { "epoch": 0.0862507191664762, "grad_norm": 2.5092997550964355, "learning_rate": 9.992676247604008e-06, "loss": 3.552, "step": 87550 }, { "epoch": 0.0862999771442983, "grad_norm": 2.726875066757202, "learning_rate": 9.992667872373732e-06, "loss": 3.5148, "step": 87600 }, { "epoch": 0.08634923512212038, "grad_norm": 2.6471102237701416, "learning_rate": 9.992659492360872e-06, "loss": 3.5582, "step": 87650 }, { "epoch": 0.08639849309994246, "grad_norm": 2.5109517574310303, "learning_rate": 9.992651107565433e-06, "loss": 3.5547, "step": 87700 }, { "epoch": 0.08644775107776455, "grad_norm": 2.7187154293060303, "learning_rate": 9.992642717987427e-06, "loss": 3.4924, "step": 87750 }, { "epoch": 0.08649700905558665, "grad_norm": 2.7590205669403076, "learning_rate": 9.99263432362686e-06, "loss": 3.5396, "step": 87800 }, { "epoch": 0.08654626703340873, "grad_norm": 2.751372814178467, "learning_rate": 9.992625924483739e-06, "loss": 3.5425, "step": 87850 }, { "epoch": 0.08659552501123081, "grad_norm": 2.77827525138855, "learning_rate": 9.992617520558075e-06, "loss": 3.5343, "step": 87900 }, { "epoch": 0.08664478298905291, "grad_norm": 2.4039108753204346, "learning_rate": 9.992609111849873e-06, "loss": 3.486, "step": 87950 }, { "epoch": 0.086694040966875, "grad_norm": 3.9515085220336914, "learning_rate": 9.992600698359144e-06, "loss": 3.5407, "step": 88000 }, { "epoch": 0.08674329894469708, "grad_norm": 2.4379289150238037, "learning_rate": 9.992592280085894e-06, "loss": 3.4459, "step": 88050 }, { "epoch": 0.08679255692251917, "grad_norm": 2.4810924530029297, "learning_rate": 9.992583857030132e-06, "loss": 3.5778, "step": 88100 }, { "epoch": 0.08684181490034126, "grad_norm": 2.5288853645324707, "learning_rate": 9.992575429191864e-06, "loss": 3.5012, "step": 88150 }, { "epoch": 0.08689107287816335, "grad_norm": 2.4745914936065674, "learning_rate": 9.9925669965711e-06, "loss": 3.512, "step": 88200 }, { "epoch": 0.08694033085598543, "grad_norm": 2.5554139614105225, "learning_rate": 9.992558559167848e-06, "loss": 3.5573, "step": 88250 }, { "epoch": 0.08698958883380753, "grad_norm": 2.7023372650146484, "learning_rate": 9.992550116982116e-06, "loss": 3.5133, "step": 88300 }, { "epoch": 0.08703884681162961, "grad_norm": 2.445462703704834, "learning_rate": 9.992541670013912e-06, "loss": 3.6232, "step": 88350 }, { "epoch": 0.0870881047894517, "grad_norm": 2.461852788925171, "learning_rate": 9.992533218263245e-06, "loss": 3.5407, "step": 88400 }, { "epoch": 0.08713736276727378, "grad_norm": 2.715186595916748, "learning_rate": 9.99252476173012e-06, "loss": 3.5156, "step": 88450 }, { "epoch": 0.08718662074509588, "grad_norm": 2.658315658569336, "learning_rate": 9.99251630041455e-06, "loss": 3.5975, "step": 88500 }, { "epoch": 0.08723587872291796, "grad_norm": 2.541517734527588, "learning_rate": 9.992507834316539e-06, "loss": 3.5416, "step": 88550 }, { "epoch": 0.08728513670074005, "grad_norm": 2.6636147499084473, "learning_rate": 9.992499363436095e-06, "loss": 3.499, "step": 88600 }, { "epoch": 0.08733439467856215, "grad_norm": 2.708833694458008, "learning_rate": 9.99249088777323e-06, "loss": 3.5042, "step": 88650 }, { "epoch": 0.08738365265638423, "grad_norm": 2.585986375808716, "learning_rate": 9.992482407327948e-06, "loss": 3.5843, "step": 88700 }, { "epoch": 0.08743291063420632, "grad_norm": 2.4351749420166016, "learning_rate": 9.992473922100259e-06, "loss": 3.5687, "step": 88750 }, { "epoch": 0.0874821686120284, "grad_norm": 2.382880449295044, "learning_rate": 9.99246543209017e-06, "loss": 3.5761, "step": 88800 }, { "epoch": 0.0875314265898505, "grad_norm": 2.5152156352996826, "learning_rate": 9.992456937297692e-06, "loss": 3.5486, "step": 88850 }, { "epoch": 0.08758068456767258, "grad_norm": 2.8831043243408203, "learning_rate": 9.99244843772283e-06, "loss": 3.5019, "step": 88900 }, { "epoch": 0.08762994254549467, "grad_norm": 2.4755122661590576, "learning_rate": 9.992439933365594e-06, "loss": 3.5072, "step": 88950 }, { "epoch": 0.08767920052331675, "grad_norm": 2.9149246215820312, "learning_rate": 9.992431424225992e-06, "loss": 3.5254, "step": 89000 }, { "epoch": 0.08772845850113885, "grad_norm": 2.470930814743042, "learning_rate": 9.992422910304031e-06, "loss": 3.5119, "step": 89050 }, { "epoch": 0.08777771647896093, "grad_norm": 2.594900608062744, "learning_rate": 9.99241439159972e-06, "loss": 3.5471, "step": 89100 }, { "epoch": 0.08782697445678302, "grad_norm": 2.8226711750030518, "learning_rate": 9.992405868113066e-06, "loss": 3.5427, "step": 89150 }, { "epoch": 0.08787623243460511, "grad_norm": 2.391951322555542, "learning_rate": 9.992397339844079e-06, "loss": 3.477, "step": 89200 }, { "epoch": 0.0879254904124272, "grad_norm": 2.280829429626465, "learning_rate": 9.992388806792765e-06, "loss": 3.5308, "step": 89250 }, { "epoch": 0.08797474839024928, "grad_norm": 2.977295160293579, "learning_rate": 9.992380268959134e-06, "loss": 3.5034, "step": 89300 }, { "epoch": 0.08802400636807137, "grad_norm": 2.482581615447998, "learning_rate": 9.992371726343195e-06, "loss": 3.53, "step": 89350 }, { "epoch": 0.08807326434589346, "grad_norm": 2.6620121002197266, "learning_rate": 9.992363178944954e-06, "loss": 3.4888, "step": 89400 }, { "epoch": 0.08812252232371555, "grad_norm": 2.6313095092773438, "learning_rate": 9.99235462676442e-06, "loss": 3.5438, "step": 89450 }, { "epoch": 0.08817178030153763, "grad_norm": 2.8939120769500732, "learning_rate": 9.992346069801602e-06, "loss": 3.5419, "step": 89500 }, { "epoch": 0.08822103827935973, "grad_norm": 2.3940343856811523, "learning_rate": 9.992337508056505e-06, "loss": 3.5407, "step": 89550 }, { "epoch": 0.08827029625718182, "grad_norm": 2.6430909633636475, "learning_rate": 9.992328941529141e-06, "loss": 3.5123, "step": 89600 }, { "epoch": 0.0883195542350039, "grad_norm": 2.820066452026367, "learning_rate": 9.992320370219516e-06, "loss": 3.5074, "step": 89650 }, { "epoch": 0.08836881221282598, "grad_norm": 2.598034143447876, "learning_rate": 9.992311794127641e-06, "loss": 3.5068, "step": 89700 }, { "epoch": 0.08841807019064808, "grad_norm": 2.3448190689086914, "learning_rate": 9.992303213253522e-06, "loss": 3.5129, "step": 89750 }, { "epoch": 0.08846732816847017, "grad_norm": 2.763636350631714, "learning_rate": 9.992294627597166e-06, "loss": 3.4955, "step": 89800 }, { "epoch": 0.08851658614629225, "grad_norm": 2.390789270401001, "learning_rate": 9.992286037158584e-06, "loss": 3.5049, "step": 89850 }, { "epoch": 0.08856584412411435, "grad_norm": 2.3159124851226807, "learning_rate": 9.992277441937781e-06, "loss": 3.5574, "step": 89900 }, { "epoch": 0.08861510210193643, "grad_norm": 2.6102449893951416, "learning_rate": 9.99226884193477e-06, "loss": 3.4645, "step": 89950 }, { "epoch": 0.08866436007975852, "grad_norm": 2.8563361167907715, "learning_rate": 9.992260237149554e-06, "loss": 3.5104, "step": 90000 }, { "epoch": 0.0887136180575806, "grad_norm": 2.7328379154205322, "learning_rate": 9.992251627582145e-06, "loss": 3.5954, "step": 90050 }, { "epoch": 0.0887628760354027, "grad_norm": 2.6661548614501953, "learning_rate": 9.992243013232549e-06, "loss": 3.544, "step": 90100 }, { "epoch": 0.08881213401322478, "grad_norm": 2.617321014404297, "learning_rate": 9.992234394100775e-06, "loss": 3.5255, "step": 90150 }, { "epoch": 0.08886139199104687, "grad_norm": 2.4210362434387207, "learning_rate": 9.992225770186833e-06, "loss": 3.5065, "step": 90200 }, { "epoch": 0.08891064996886895, "grad_norm": 2.6725893020629883, "learning_rate": 9.992217141490727e-06, "loss": 3.5117, "step": 90250 }, { "epoch": 0.08895990794669105, "grad_norm": 2.578807830810547, "learning_rate": 9.99220850801247e-06, "loss": 3.5545, "step": 90300 }, { "epoch": 0.08900916592451313, "grad_norm": 2.3690056800842285, "learning_rate": 9.992199869752066e-06, "loss": 3.503, "step": 90350 }, { "epoch": 0.08905842390233522, "grad_norm": 2.781726837158203, "learning_rate": 9.992191226709528e-06, "loss": 3.5626, "step": 90400 }, { "epoch": 0.08910768188015732, "grad_norm": 2.4719045162200928, "learning_rate": 9.99218257888486e-06, "loss": 3.5145, "step": 90450 }, { "epoch": 0.0891569398579794, "grad_norm": 2.624699115753174, "learning_rate": 9.992173926278072e-06, "loss": 3.4759, "step": 90500 }, { "epoch": 0.08920619783580148, "grad_norm": 2.647613286972046, "learning_rate": 9.992165268889174e-06, "loss": 3.5719, "step": 90550 }, { "epoch": 0.08925545581362357, "grad_norm": 2.4592554569244385, "learning_rate": 9.99215660671817e-06, "loss": 3.5596, "step": 90600 }, { "epoch": 0.08930471379144567, "grad_norm": 2.569150924682617, "learning_rate": 9.992147939765073e-06, "loss": 3.5558, "step": 90650 }, { "epoch": 0.08935397176926775, "grad_norm": 2.7038497924804688, "learning_rate": 9.99213926802989e-06, "loss": 3.4813, "step": 90700 }, { "epoch": 0.08940322974708984, "grad_norm": 2.477156162261963, "learning_rate": 9.992130591512627e-06, "loss": 3.5581, "step": 90750 }, { "epoch": 0.08945248772491193, "grad_norm": 2.522063732147217, "learning_rate": 9.992121910213294e-06, "loss": 3.5009, "step": 90800 }, { "epoch": 0.08950174570273402, "grad_norm": 2.414885997772217, "learning_rate": 9.992113224131898e-06, "loss": 3.4959, "step": 90850 }, { "epoch": 0.0895510036805561, "grad_norm": 2.3920798301696777, "learning_rate": 9.99210453326845e-06, "loss": 3.4681, "step": 90900 }, { "epoch": 0.08960026165837819, "grad_norm": 2.5773704051971436, "learning_rate": 9.992095837622957e-06, "loss": 3.5545, "step": 90950 }, { "epoch": 0.08964951963620028, "grad_norm": 2.5034501552581787, "learning_rate": 9.992087137195426e-06, "loss": 3.5353, "step": 91000 }, { "epoch": 0.08969877761402237, "grad_norm": 2.741173028945923, "learning_rate": 9.992078431985867e-06, "loss": 3.5645, "step": 91050 }, { "epoch": 0.08974803559184445, "grad_norm": 2.4738497734069824, "learning_rate": 9.992069721994286e-06, "loss": 3.573, "step": 91100 }, { "epoch": 0.08979729356966654, "grad_norm": 2.6511213779449463, "learning_rate": 9.992061007220695e-06, "loss": 3.4823, "step": 91150 }, { "epoch": 0.08984655154748863, "grad_norm": 2.5052990913391113, "learning_rate": 9.992052287665102e-06, "loss": 3.5282, "step": 91200 }, { "epoch": 0.08989580952531072, "grad_norm": 2.4966471195220947, "learning_rate": 9.992043563327511e-06, "loss": 3.5504, "step": 91250 }, { "epoch": 0.0899450675031328, "grad_norm": 2.365420341491699, "learning_rate": 9.992034834207933e-06, "loss": 3.5086, "step": 91300 }, { "epoch": 0.0899943254809549, "grad_norm": 2.395207166671753, "learning_rate": 9.992026100306378e-06, "loss": 3.5467, "step": 91350 }, { "epoch": 0.09004358345877699, "grad_norm": 2.4911882877349854, "learning_rate": 9.992017361622853e-06, "loss": 3.5371, "step": 91400 }, { "epoch": 0.09009284143659907, "grad_norm": 2.4821033477783203, "learning_rate": 9.992008618157366e-06, "loss": 3.5372, "step": 91450 }, { "epoch": 0.09014209941442115, "grad_norm": 2.7418088912963867, "learning_rate": 9.991999869909926e-06, "loss": 3.5821, "step": 91500 }, { "epoch": 0.09019135739224325, "grad_norm": 2.600388526916504, "learning_rate": 9.991991116880539e-06, "loss": 3.4726, "step": 91550 }, { "epoch": 0.09024061537006534, "grad_norm": 2.75099515914917, "learning_rate": 9.991982359069219e-06, "loss": 3.5501, "step": 91600 }, { "epoch": 0.09028987334788742, "grad_norm": 2.580803394317627, "learning_rate": 9.991973596475968e-06, "loss": 3.4941, "step": 91650 }, { "epoch": 0.09033913132570952, "grad_norm": 2.90989089012146, "learning_rate": 9.991964829100797e-06, "loss": 3.4952, "step": 91700 }, { "epoch": 0.0903883893035316, "grad_norm": 2.6223320960998535, "learning_rate": 9.991956056943718e-06, "loss": 3.4964, "step": 91750 }, { "epoch": 0.09043764728135369, "grad_norm": 2.605656862258911, "learning_rate": 9.991947280004732e-06, "loss": 3.5144, "step": 91800 }, { "epoch": 0.09048690525917577, "grad_norm": 2.6077051162719727, "learning_rate": 9.991938498283852e-06, "loss": 3.5102, "step": 91850 }, { "epoch": 0.09053616323699787, "grad_norm": 2.599778890609741, "learning_rate": 9.991929711781087e-06, "loss": 3.4575, "step": 91900 }, { "epoch": 0.09058542121481995, "grad_norm": 2.926546812057495, "learning_rate": 9.991920920496445e-06, "loss": 3.6151, "step": 91950 }, { "epoch": 0.09063467919264204, "grad_norm": 2.5250444412231445, "learning_rate": 9.991912124429932e-06, "loss": 3.5043, "step": 92000 }, { "epoch": 0.09068393717046414, "grad_norm": 2.434934616088867, "learning_rate": 9.99190332358156e-06, "loss": 3.4979, "step": 92050 }, { "epoch": 0.09073319514828622, "grad_norm": 2.545457124710083, "learning_rate": 9.991894517951334e-06, "loss": 3.4963, "step": 92100 }, { "epoch": 0.0907824531261083, "grad_norm": 2.6983044147491455, "learning_rate": 9.991885707539264e-06, "loss": 3.5408, "step": 92150 }, { "epoch": 0.09083171110393039, "grad_norm": 2.5740928649902344, "learning_rate": 9.991876892345359e-06, "loss": 3.4939, "step": 92200 }, { "epoch": 0.09088096908175249, "grad_norm": 2.4838335514068604, "learning_rate": 9.991868072369627e-06, "loss": 3.5084, "step": 92250 }, { "epoch": 0.09093022705957457, "grad_norm": 2.5277609825134277, "learning_rate": 9.991859247612075e-06, "loss": 3.5361, "step": 92300 }, { "epoch": 0.09097948503739665, "grad_norm": 2.6219544410705566, "learning_rate": 9.991850418072713e-06, "loss": 3.5214, "step": 92350 }, { "epoch": 0.09102874301521874, "grad_norm": 2.58064866065979, "learning_rate": 9.99184158375155e-06, "loss": 3.4821, "step": 92400 }, { "epoch": 0.09107800099304084, "grad_norm": 2.35748028755188, "learning_rate": 9.991832744648593e-06, "loss": 3.5567, "step": 92450 }, { "epoch": 0.09112725897086292, "grad_norm": 2.5825164318084717, "learning_rate": 9.991823900763851e-06, "loss": 3.4979, "step": 92500 }, { "epoch": 0.091176516948685, "grad_norm": 2.380385160446167, "learning_rate": 9.991815052097333e-06, "loss": 3.5532, "step": 92550 }, { "epoch": 0.0912257749265071, "grad_norm": 2.7011330127716064, "learning_rate": 9.991806198649048e-06, "loss": 3.4907, "step": 92600 }, { "epoch": 0.09127503290432919, "grad_norm": 2.463148593902588, "learning_rate": 9.991797340419003e-06, "loss": 3.5428, "step": 92650 }, { "epoch": 0.09132429088215127, "grad_norm": 2.667295455932617, "learning_rate": 9.991788477407205e-06, "loss": 3.5165, "step": 92700 }, { "epoch": 0.09137354885997336, "grad_norm": 2.5165274143218994, "learning_rate": 9.991779609613668e-06, "loss": 3.5115, "step": 92750 }, { "epoch": 0.09142280683779545, "grad_norm": 2.5392086505889893, "learning_rate": 9.991770737038395e-06, "loss": 3.4982, "step": 92800 }, { "epoch": 0.09147206481561754, "grad_norm": 3.4226720333099365, "learning_rate": 9.991761859681397e-06, "loss": 3.4924, "step": 92850 }, { "epoch": 0.09152132279343962, "grad_norm": 2.593413829803467, "learning_rate": 9.991752977542681e-06, "loss": 3.5206, "step": 92900 }, { "epoch": 0.09157058077126172, "grad_norm": 2.3894574642181396, "learning_rate": 9.991744090622258e-06, "loss": 3.5297, "step": 92950 }, { "epoch": 0.0916198387490838, "grad_norm": 2.5359370708465576, "learning_rate": 9.991735198920133e-06, "loss": 3.5035, "step": 93000 }, { "epoch": 0.09166909672690589, "grad_norm": 2.5807111263275146, "learning_rate": 9.991726302436318e-06, "loss": 3.5291, "step": 93050 }, { "epoch": 0.09171835470472797, "grad_norm": 2.6205883026123047, "learning_rate": 9.99171740117082e-06, "loss": 3.4919, "step": 93100 }, { "epoch": 0.09176761268255007, "grad_norm": 2.713055372238159, "learning_rate": 9.99170849512365e-06, "loss": 3.4739, "step": 93150 }, { "epoch": 0.09181687066037215, "grad_norm": 2.6073999404907227, "learning_rate": 9.991699584294811e-06, "loss": 3.4595, "step": 93200 }, { "epoch": 0.09186612863819424, "grad_norm": 2.469595193862915, "learning_rate": 9.991690668684315e-06, "loss": 3.4965, "step": 93250 }, { "epoch": 0.09191538661601634, "grad_norm": 2.6497719287872314, "learning_rate": 9.99168174829217e-06, "loss": 3.5542, "step": 93300 }, { "epoch": 0.09196464459383842, "grad_norm": 2.485102891921997, "learning_rate": 9.991672823118386e-06, "loss": 3.5522, "step": 93350 }, { "epoch": 0.0920139025716605, "grad_norm": 2.45676589012146, "learning_rate": 9.99166389316297e-06, "loss": 3.4387, "step": 93400 }, { "epoch": 0.09206316054948259, "grad_norm": 2.5401172637939453, "learning_rate": 9.99165495842593e-06, "loss": 3.4868, "step": 93450 }, { "epoch": 0.09211241852730469, "grad_norm": 2.681870698928833, "learning_rate": 9.991646018907277e-06, "loss": 3.501, "step": 93500 }, { "epoch": 0.09216167650512677, "grad_norm": 2.6447935104370117, "learning_rate": 9.991637074607017e-06, "loss": 3.5465, "step": 93550 }, { "epoch": 0.09221093448294886, "grad_norm": 2.541200876235962, "learning_rate": 9.99162812552516e-06, "loss": 3.5026, "step": 93600 }, { "epoch": 0.09226019246077094, "grad_norm": 2.5497496128082275, "learning_rate": 9.991619171661713e-06, "loss": 3.5523, "step": 93650 }, { "epoch": 0.09230945043859304, "grad_norm": 2.5159003734588623, "learning_rate": 9.991610213016687e-06, "loss": 3.5206, "step": 93700 }, { "epoch": 0.09235870841641512, "grad_norm": 2.498478412628174, "learning_rate": 9.991601249590088e-06, "loss": 3.4579, "step": 93750 }, { "epoch": 0.0924079663942372, "grad_norm": 2.622728109359741, "learning_rate": 9.991592281381928e-06, "loss": 3.5274, "step": 93800 }, { "epoch": 0.0924572243720593, "grad_norm": 2.6374659538269043, "learning_rate": 9.99158330839221e-06, "loss": 3.498, "step": 93850 }, { "epoch": 0.09250648234988139, "grad_norm": 2.5417380332946777, "learning_rate": 9.99157433062095e-06, "loss": 3.4728, "step": 93900 }, { "epoch": 0.09255574032770347, "grad_norm": 2.5940327644348145, "learning_rate": 9.991565348068151e-06, "loss": 3.4344, "step": 93950 }, { "epoch": 0.09260499830552556, "grad_norm": 2.634363889694214, "learning_rate": 9.991556360733823e-06, "loss": 3.4755, "step": 94000 }, { "epoch": 0.09265425628334766, "grad_norm": 2.5082905292510986, "learning_rate": 9.991547368617974e-06, "loss": 3.5322, "step": 94050 }, { "epoch": 0.09270351426116974, "grad_norm": 2.5004141330718994, "learning_rate": 9.991538371720615e-06, "loss": 3.4453, "step": 94100 }, { "epoch": 0.09275277223899182, "grad_norm": 2.683391809463501, "learning_rate": 9.991529370041752e-06, "loss": 3.497, "step": 94150 }, { "epoch": 0.09280203021681392, "grad_norm": 2.576052188873291, "learning_rate": 9.991520363581395e-06, "loss": 3.4334, "step": 94200 }, { "epoch": 0.092851288194636, "grad_norm": 2.607886552810669, "learning_rate": 9.991511352339554e-06, "loss": 3.448, "step": 94250 }, { "epoch": 0.09290054617245809, "grad_norm": 2.674027919769287, "learning_rate": 9.991502336316234e-06, "loss": 3.4608, "step": 94300 }, { "epoch": 0.09294980415028017, "grad_norm": 2.5196359157562256, "learning_rate": 9.991493315511447e-06, "loss": 3.4313, "step": 94350 }, { "epoch": 0.09299906212810227, "grad_norm": 2.801942825317383, "learning_rate": 9.991484289925199e-06, "loss": 3.5405, "step": 94400 }, { "epoch": 0.09304832010592436, "grad_norm": 2.6478660106658936, "learning_rate": 9.991475259557501e-06, "loss": 3.4951, "step": 94450 }, { "epoch": 0.09309757808374644, "grad_norm": 3.284269332885742, "learning_rate": 9.99146622440836e-06, "loss": 3.5049, "step": 94500 }, { "epoch": 0.09314683606156852, "grad_norm": 2.835618257522583, "learning_rate": 9.991457184477784e-06, "loss": 3.5369, "step": 94550 }, { "epoch": 0.09319609403939062, "grad_norm": 2.5260472297668457, "learning_rate": 9.991448139765785e-06, "loss": 3.4654, "step": 94600 }, { "epoch": 0.09324535201721271, "grad_norm": 2.541609287261963, "learning_rate": 9.99143909027237e-06, "loss": 3.5424, "step": 94650 }, { "epoch": 0.09329460999503479, "grad_norm": 2.5830399990081787, "learning_rate": 9.991430035997544e-06, "loss": 3.5411, "step": 94700 }, { "epoch": 0.09334386797285689, "grad_norm": 2.764085531234741, "learning_rate": 9.991420976941322e-06, "loss": 3.4757, "step": 94750 }, { "epoch": 0.09339312595067897, "grad_norm": 2.7741506099700928, "learning_rate": 9.991411913103708e-06, "loss": 3.5134, "step": 94800 }, { "epoch": 0.09344238392850106, "grad_norm": 2.8838443756103516, "learning_rate": 9.991402844484712e-06, "loss": 3.5217, "step": 94850 }, { "epoch": 0.09349164190632314, "grad_norm": 2.6583337783813477, "learning_rate": 9.991393771084343e-06, "loss": 3.5017, "step": 94900 }, { "epoch": 0.09354089988414524, "grad_norm": 2.9703612327575684, "learning_rate": 9.99138469290261e-06, "loss": 3.4961, "step": 94950 }, { "epoch": 0.09359015786196732, "grad_norm": 2.417046546936035, "learning_rate": 9.991375609939524e-06, "loss": 3.493, "step": 95000 }, { "epoch": 0.09363941583978941, "grad_norm": 2.505190372467041, "learning_rate": 9.991366522195088e-06, "loss": 3.476, "step": 95050 }, { "epoch": 0.0936886738176115, "grad_norm": 2.5905630588531494, "learning_rate": 9.991357429669314e-06, "loss": 3.4537, "step": 95100 }, { "epoch": 0.09373793179543359, "grad_norm": 2.4609169960021973, "learning_rate": 9.99134833236221e-06, "loss": 3.4682, "step": 95150 }, { "epoch": 0.09378718977325567, "grad_norm": 2.482228994369507, "learning_rate": 9.991339230273786e-06, "loss": 3.5, "step": 95200 }, { "epoch": 0.09383644775107776, "grad_norm": 2.9098143577575684, "learning_rate": 9.991330123404051e-06, "loss": 3.513, "step": 95250 }, { "epoch": 0.09388570572889986, "grad_norm": 2.8260180950164795, "learning_rate": 9.99132101175301e-06, "loss": 3.4946, "step": 95300 }, { "epoch": 0.09393496370672194, "grad_norm": 2.4881205558776855, "learning_rate": 9.991311895320677e-06, "loss": 3.5306, "step": 95350 }, { "epoch": 0.09398422168454403, "grad_norm": 2.8624229431152344, "learning_rate": 9.991302774107055e-06, "loss": 3.4836, "step": 95400 }, { "epoch": 0.09403347966236612, "grad_norm": 2.796124219894409, "learning_rate": 9.991293648112158e-06, "loss": 3.5702, "step": 95450 }, { "epoch": 0.09408273764018821, "grad_norm": 2.7107365131378174, "learning_rate": 9.991284517335993e-06, "loss": 3.4546, "step": 95500 }, { "epoch": 0.09413199561801029, "grad_norm": 2.4883761405944824, "learning_rate": 9.991275381778566e-06, "loss": 3.4415, "step": 95550 }, { "epoch": 0.09418125359583238, "grad_norm": 2.442410707473755, "learning_rate": 9.99126624143989e-06, "loss": 3.5253, "step": 95600 }, { "epoch": 0.09423051157365447, "grad_norm": 2.4142189025878906, "learning_rate": 9.99125709631997e-06, "loss": 3.5348, "step": 95650 }, { "epoch": 0.09427976955147656, "grad_norm": 2.625922679901123, "learning_rate": 9.991247946418818e-06, "loss": 3.4561, "step": 95700 }, { "epoch": 0.09432902752929864, "grad_norm": 2.813594102859497, "learning_rate": 9.991238791736442e-06, "loss": 3.5081, "step": 95750 }, { "epoch": 0.09437828550712073, "grad_norm": 2.6372132301330566, "learning_rate": 9.991229632272849e-06, "loss": 3.5865, "step": 95800 }, { "epoch": 0.09442754348494282, "grad_norm": 2.5123181343078613, "learning_rate": 9.991220468028048e-06, "loss": 3.4986, "step": 95850 }, { "epoch": 0.09447680146276491, "grad_norm": 2.395050048828125, "learning_rate": 9.991211299002051e-06, "loss": 3.4792, "step": 95900 }, { "epoch": 0.09452605944058699, "grad_norm": 2.519516944885254, "learning_rate": 9.991202125194863e-06, "loss": 3.4771, "step": 95950 }, { "epoch": 0.09457531741840909, "grad_norm": 2.4439661502838135, "learning_rate": 9.991192946606493e-06, "loss": 3.4459, "step": 96000 }, { "epoch": 0.09462457539623118, "grad_norm": 2.567617654800415, "learning_rate": 9.991183763236954e-06, "loss": 3.4779, "step": 96050 }, { "epoch": 0.09467383337405326, "grad_norm": 2.4986140727996826, "learning_rate": 9.991174575086249e-06, "loss": 3.4662, "step": 96100 }, { "epoch": 0.09472309135187534, "grad_norm": 2.4114882946014404, "learning_rate": 9.99116538215439e-06, "loss": 3.4822, "step": 96150 }, { "epoch": 0.09477234932969744, "grad_norm": 2.7959210872650146, "learning_rate": 9.991156184441387e-06, "loss": 3.5351, "step": 96200 }, { "epoch": 0.09482160730751953, "grad_norm": 3.051692008972168, "learning_rate": 9.991146981947246e-06, "loss": 3.4586, "step": 96250 }, { "epoch": 0.09487086528534161, "grad_norm": 2.402531862258911, "learning_rate": 9.991137774671977e-06, "loss": 3.4468, "step": 96300 }, { "epoch": 0.09492012326316371, "grad_norm": 2.6857526302337646, "learning_rate": 9.991128562615589e-06, "loss": 3.4635, "step": 96350 }, { "epoch": 0.09496938124098579, "grad_norm": 2.657498598098755, "learning_rate": 9.99111934577809e-06, "loss": 3.5091, "step": 96400 }, { "epoch": 0.09501863921880788, "grad_norm": 2.8503901958465576, "learning_rate": 9.991110124159491e-06, "loss": 3.5072, "step": 96450 }, { "epoch": 0.09506789719662996, "grad_norm": 2.5324859619140625, "learning_rate": 9.991100897759799e-06, "loss": 3.4034, "step": 96500 }, { "epoch": 0.09511715517445206, "grad_norm": 2.7217602729797363, "learning_rate": 9.991091666579022e-06, "loss": 3.4746, "step": 96550 }, { "epoch": 0.09516641315227414, "grad_norm": 2.5247881412506104, "learning_rate": 9.991082430617172e-06, "loss": 3.4707, "step": 96600 }, { "epoch": 0.09521567113009623, "grad_norm": 2.921921968460083, "learning_rate": 9.991073189874254e-06, "loss": 3.4536, "step": 96650 }, { "epoch": 0.09526492910791833, "grad_norm": 2.5481796264648438, "learning_rate": 9.99106394435028e-06, "loss": 3.4458, "step": 96700 }, { "epoch": 0.09531418708574041, "grad_norm": 2.909856081008911, "learning_rate": 9.991054694045257e-06, "loss": 3.5254, "step": 96750 }, { "epoch": 0.0953634450635625, "grad_norm": 2.6333370208740234, "learning_rate": 9.991045438959195e-06, "loss": 3.4705, "step": 96800 }, { "epoch": 0.09541270304138458, "grad_norm": 2.746682643890381, "learning_rate": 9.991036179092101e-06, "loss": 3.4788, "step": 96850 }, { "epoch": 0.09546196101920668, "grad_norm": 2.63202166557312, "learning_rate": 9.991026914443986e-06, "loss": 3.4861, "step": 96900 }, { "epoch": 0.09551121899702876, "grad_norm": 2.33595609664917, "learning_rate": 9.991017645014857e-06, "loss": 3.4755, "step": 96950 }, { "epoch": 0.09556047697485084, "grad_norm": 2.4882144927978516, "learning_rate": 9.991008370804726e-06, "loss": 3.455, "step": 97000 }, { "epoch": 0.09560973495267293, "grad_norm": 2.552701234817505, "learning_rate": 9.990999091813598e-06, "loss": 3.4825, "step": 97050 }, { "epoch": 0.09565899293049503, "grad_norm": 2.5472116470336914, "learning_rate": 9.990989808041484e-06, "loss": 3.4527, "step": 97100 }, { "epoch": 0.09570825090831711, "grad_norm": 2.492727279663086, "learning_rate": 9.990980519488393e-06, "loss": 3.5282, "step": 97150 }, { "epoch": 0.0957575088861392, "grad_norm": 2.5237650871276855, "learning_rate": 9.990971226154333e-06, "loss": 3.4578, "step": 97200 }, { "epoch": 0.09580676686396129, "grad_norm": 2.453138828277588, "learning_rate": 9.990961928039316e-06, "loss": 3.4018, "step": 97250 }, { "epoch": 0.09585602484178338, "grad_norm": 2.765453338623047, "learning_rate": 9.990952625143344e-06, "loss": 3.5062, "step": 97300 }, { "epoch": 0.09590528281960546, "grad_norm": 2.573054790496826, "learning_rate": 9.990943317466433e-06, "loss": 3.5221, "step": 97350 }, { "epoch": 0.09595454079742755, "grad_norm": 2.7383389472961426, "learning_rate": 9.99093400500859e-06, "loss": 3.5395, "step": 97400 }, { "epoch": 0.09600379877524964, "grad_norm": 2.4440360069274902, "learning_rate": 9.990924687769821e-06, "loss": 3.5486, "step": 97450 }, { "epoch": 0.09605305675307173, "grad_norm": 2.4890618324279785, "learning_rate": 9.990915365750137e-06, "loss": 3.5377, "step": 97500 }, { "epoch": 0.09610231473089381, "grad_norm": 2.441108465194702, "learning_rate": 9.990906038949547e-06, "loss": 3.5227, "step": 97550 }, { "epoch": 0.09615157270871591, "grad_norm": 2.552497625350952, "learning_rate": 9.99089670736806e-06, "loss": 3.5359, "step": 97600 }, { "epoch": 0.096200830686538, "grad_norm": 2.4606144428253174, "learning_rate": 9.990887371005685e-06, "loss": 3.4534, "step": 97650 }, { "epoch": 0.09625008866436008, "grad_norm": 2.6555192470550537, "learning_rate": 9.990878029862432e-06, "loss": 3.486, "step": 97700 }, { "epoch": 0.09629934664218216, "grad_norm": 2.334169864654541, "learning_rate": 9.990868683938307e-06, "loss": 3.4815, "step": 97750 }, { "epoch": 0.09634860462000426, "grad_norm": 2.764765739440918, "learning_rate": 9.99085933323332e-06, "loss": 3.444, "step": 97800 }, { "epoch": 0.09639786259782634, "grad_norm": 2.4359214305877686, "learning_rate": 9.990849977747482e-06, "loss": 3.4403, "step": 97850 }, { "epoch": 0.09644712057564843, "grad_norm": 2.440549612045288, "learning_rate": 9.9908406174808e-06, "loss": 3.4835, "step": 97900 }, { "epoch": 0.09649637855347051, "grad_norm": 2.7988245487213135, "learning_rate": 9.990831252433283e-06, "loss": 3.4705, "step": 97950 }, { "epoch": 0.09654563653129261, "grad_norm": 2.4685730934143066, "learning_rate": 9.990821882604941e-06, "loss": 3.3753, "step": 98000 }, { "epoch": 0.0965948945091147, "grad_norm": 2.4733502864837646, "learning_rate": 9.990812507995785e-06, "loss": 3.4926, "step": 98050 }, { "epoch": 0.09664415248693678, "grad_norm": 2.232922315597534, "learning_rate": 9.990803128605818e-06, "loss": 3.5051, "step": 98100 }, { "epoch": 0.09669341046475888, "grad_norm": 2.513568878173828, "learning_rate": 9.990793744435052e-06, "loss": 3.4412, "step": 98150 }, { "epoch": 0.09674266844258096, "grad_norm": 2.5386228561401367, "learning_rate": 9.990784355483498e-06, "loss": 3.5011, "step": 98200 }, { "epoch": 0.09679192642040305, "grad_norm": 2.622732162475586, "learning_rate": 9.990774961751164e-06, "loss": 3.4878, "step": 98250 }, { "epoch": 0.09684118439822513, "grad_norm": 2.6431615352630615, "learning_rate": 9.990765563238057e-06, "loss": 3.519, "step": 98300 }, { "epoch": 0.09689044237604723, "grad_norm": 2.5935304164886475, "learning_rate": 9.990756159944188e-06, "loss": 3.4962, "step": 98350 }, { "epoch": 0.09693970035386931, "grad_norm": 2.5681283473968506, "learning_rate": 9.990746751869566e-06, "loss": 3.5362, "step": 98400 }, { "epoch": 0.0969889583316914, "grad_norm": 2.5565953254699707, "learning_rate": 9.990737339014199e-06, "loss": 3.4671, "step": 98450 }, { "epoch": 0.0970382163095135, "grad_norm": 2.694472312927246, "learning_rate": 9.990727921378096e-06, "loss": 3.4981, "step": 98500 }, { "epoch": 0.09708747428733558, "grad_norm": 2.4938342571258545, "learning_rate": 9.990718498961268e-06, "loss": 3.4925, "step": 98550 }, { "epoch": 0.09713673226515766, "grad_norm": 2.255632162094116, "learning_rate": 9.99070907176372e-06, "loss": 3.4718, "step": 98600 }, { "epoch": 0.09718599024297975, "grad_norm": 2.745765209197998, "learning_rate": 9.990699639785465e-06, "loss": 3.4349, "step": 98650 }, { "epoch": 0.09723524822080185, "grad_norm": 2.6263084411621094, "learning_rate": 9.99069020302651e-06, "loss": 3.4559, "step": 98700 }, { "epoch": 0.09728450619862393, "grad_norm": 2.5612058639526367, "learning_rate": 9.990680761486864e-06, "loss": 3.5187, "step": 98750 }, { "epoch": 0.09733376417644601, "grad_norm": 2.5737497806549072, "learning_rate": 9.990671315166538e-06, "loss": 3.4553, "step": 98800 }, { "epoch": 0.09738302215426811, "grad_norm": 2.6999330520629883, "learning_rate": 9.990661864065539e-06, "loss": 3.4896, "step": 98850 }, { "epoch": 0.0974322801320902, "grad_norm": 2.508227825164795, "learning_rate": 9.990652408183877e-06, "loss": 3.5152, "step": 98900 }, { "epoch": 0.09748153810991228, "grad_norm": 2.356147289276123, "learning_rate": 9.99064294752156e-06, "loss": 3.4644, "step": 98950 }, { "epoch": 0.09753079608773436, "grad_norm": 2.2947211265563965, "learning_rate": 9.9906334820786e-06, "loss": 3.4727, "step": 99000 }, { "epoch": 0.09758005406555646, "grad_norm": 2.6120007038116455, "learning_rate": 9.990624011855003e-06, "loss": 3.4392, "step": 99050 }, { "epoch": 0.09762931204337855, "grad_norm": 2.534365177154541, "learning_rate": 9.990614536850778e-06, "loss": 3.4336, "step": 99100 }, { "epoch": 0.09767857002120063, "grad_norm": 2.5490798950195312, "learning_rate": 9.990605057065935e-06, "loss": 3.449, "step": 99150 }, { "epoch": 0.09772782799902271, "grad_norm": 2.6555633544921875, "learning_rate": 9.990595572500484e-06, "loss": 3.4595, "step": 99200 }, { "epoch": 0.09777708597684481, "grad_norm": 2.5927019119262695, "learning_rate": 9.990586083154433e-06, "loss": 3.397, "step": 99250 }, { "epoch": 0.0978263439546669, "grad_norm": 2.411048412322998, "learning_rate": 9.990576589027792e-06, "loss": 3.4678, "step": 99300 }, { "epoch": 0.09787560193248898, "grad_norm": 2.452605962753296, "learning_rate": 9.990567090120569e-06, "loss": 3.5182, "step": 99350 }, { "epoch": 0.09792485991031108, "grad_norm": 2.798715114593506, "learning_rate": 9.990557586432773e-06, "loss": 3.4316, "step": 99400 }, { "epoch": 0.09797411788813316, "grad_norm": 2.5999791622161865, "learning_rate": 9.990548077964412e-06, "loss": 3.5288, "step": 99450 }, { "epoch": 0.09802337586595525, "grad_norm": 2.5821330547332764, "learning_rate": 9.9905385647155e-06, "loss": 3.4817, "step": 99500 }, { "epoch": 0.09807263384377733, "grad_norm": 2.5211875438690186, "learning_rate": 9.990529046686042e-06, "loss": 3.4781, "step": 99550 }, { "epoch": 0.09812189182159943, "grad_norm": 2.659984588623047, "learning_rate": 9.990519523876046e-06, "loss": 3.476, "step": 99600 }, { "epoch": 0.09817114979942151, "grad_norm": 2.32822322845459, "learning_rate": 9.990509996285524e-06, "loss": 3.4623, "step": 99650 }, { "epoch": 0.0982204077772436, "grad_norm": 2.5946106910705566, "learning_rate": 9.990500463914486e-06, "loss": 3.4747, "step": 99700 }, { "epoch": 0.0982696657550657, "grad_norm": 2.855876922607422, "learning_rate": 9.990490926762937e-06, "loss": 3.3972, "step": 99750 }, { "epoch": 0.09831892373288778, "grad_norm": 2.671902656555176, "learning_rate": 9.99048138483089e-06, "loss": 3.4539, "step": 99800 }, { "epoch": 0.09836818171070986, "grad_norm": 2.4329681396484375, "learning_rate": 9.990471838118352e-06, "loss": 3.4546, "step": 99850 }, { "epoch": 0.09841743968853195, "grad_norm": 2.5714337825775146, "learning_rate": 9.990462286625332e-06, "loss": 3.488, "step": 99900 }, { "epoch": 0.09846669766635405, "grad_norm": 2.980865716934204, "learning_rate": 9.990452730351842e-06, "loss": 3.5138, "step": 99950 }, { "epoch": 0.09851595564417613, "grad_norm": 2.672783851623535, "learning_rate": 9.990443169297887e-06, "loss": 3.4683, "step": 100000 }, { "epoch": 0.09856521362199822, "grad_norm": 2.368260383605957, "learning_rate": 9.99043360346348e-06, "loss": 3.4939, "step": 100050 }, { "epoch": 0.09861447159982031, "grad_norm": 2.5601234436035156, "learning_rate": 9.990424032848626e-06, "loss": 3.4729, "step": 100100 }, { "epoch": 0.0986637295776424, "grad_norm": 2.4284610748291016, "learning_rate": 9.990414457453338e-06, "loss": 3.4955, "step": 100150 }, { "epoch": 0.09871298755546448, "grad_norm": 2.730605363845825, "learning_rate": 9.990404877277625e-06, "loss": 3.4554, "step": 100200 }, { "epoch": 0.09876224553328657, "grad_norm": 2.4088621139526367, "learning_rate": 9.990395292321493e-06, "loss": 3.4361, "step": 100250 }, { "epoch": 0.09881150351110866, "grad_norm": 2.9302256107330322, "learning_rate": 9.990385702584955e-06, "loss": 3.4401, "step": 100300 }, { "epoch": 0.09886076148893075, "grad_norm": 2.779808282852173, "learning_rate": 9.990376108068017e-06, "loss": 3.4329, "step": 100350 }, { "epoch": 0.09891001946675283, "grad_norm": 2.444542646408081, "learning_rate": 9.990366508770691e-06, "loss": 3.439, "step": 100400 }, { "epoch": 0.09895927744457492, "grad_norm": 2.5757060050964355, "learning_rate": 9.990356904692982e-06, "loss": 3.4789, "step": 100450 }, { "epoch": 0.09900853542239701, "grad_norm": 2.5243523120880127, "learning_rate": 9.990347295834903e-06, "loss": 3.4918, "step": 100500 }, { "epoch": 0.0990577934002191, "grad_norm": 2.7300150394439697, "learning_rate": 9.990337682196463e-06, "loss": 3.4574, "step": 100550 }, { "epoch": 0.09910705137804118, "grad_norm": 2.6037099361419678, "learning_rate": 9.990328063777667e-06, "loss": 3.4234, "step": 100600 }, { "epoch": 0.09915630935586328, "grad_norm": 2.6875269412994385, "learning_rate": 9.990318440578532e-06, "loss": 3.4544, "step": 100650 }, { "epoch": 0.09920556733368537, "grad_norm": 2.674828052520752, "learning_rate": 9.99030881259906e-06, "loss": 3.4433, "step": 100700 }, { "epoch": 0.09925482531150745, "grad_norm": 2.975830554962158, "learning_rate": 9.990299179839263e-06, "loss": 3.5119, "step": 100750 }, { "epoch": 0.09930408328932953, "grad_norm": 2.3529393672943115, "learning_rate": 9.990289542299151e-06, "loss": 3.4886, "step": 100800 }, { "epoch": 0.09935334126715163, "grad_norm": 2.622812271118164, "learning_rate": 9.990279899978731e-06, "loss": 3.4206, "step": 100850 }, { "epoch": 0.09940259924497372, "grad_norm": 2.516540288925171, "learning_rate": 9.990270252878017e-06, "loss": 3.4618, "step": 100900 }, { "epoch": 0.0994518572227958, "grad_norm": 2.7003159523010254, "learning_rate": 9.990260600997012e-06, "loss": 3.4045, "step": 100950 }, { "epoch": 0.0995011152006179, "grad_norm": 2.4062156677246094, "learning_rate": 9.990250944335728e-06, "loss": 3.5073, "step": 101000 }, { "epoch": 0.09955037317843998, "grad_norm": 2.9946587085723877, "learning_rate": 9.990241282894175e-06, "loss": 3.4648, "step": 101050 }, { "epoch": 0.09959963115626207, "grad_norm": 2.6976659297943115, "learning_rate": 9.99023161667236e-06, "loss": 3.4952, "step": 101100 }, { "epoch": 0.09964888913408415, "grad_norm": 2.6516623497009277, "learning_rate": 9.990221945670297e-06, "loss": 3.4891, "step": 101150 }, { "epoch": 0.09969814711190625, "grad_norm": 2.345975399017334, "learning_rate": 9.990212269887991e-06, "loss": 3.5224, "step": 101200 }, { "epoch": 0.09974740508972833, "grad_norm": 2.7140519618988037, "learning_rate": 9.990202589325453e-06, "loss": 3.5205, "step": 101250 }, { "epoch": 0.09979666306755042, "grad_norm": 2.496894359588623, "learning_rate": 9.99019290398269e-06, "loss": 3.473, "step": 101300 }, { "epoch": 0.09984592104537252, "grad_norm": 2.4504098892211914, "learning_rate": 9.990183213859714e-06, "loss": 3.4246, "step": 101350 }, { "epoch": 0.0998951790231946, "grad_norm": 2.4256160259246826, "learning_rate": 9.990173518956533e-06, "loss": 3.4312, "step": 101400 }, { "epoch": 0.09994443700101668, "grad_norm": 2.5933609008789062, "learning_rate": 9.990163819273157e-06, "loss": 3.4689, "step": 101450 }, { "epoch": 0.09999369497883877, "grad_norm": 2.5092689990997314, "learning_rate": 9.990154114809594e-06, "loss": 3.4108, "step": 101500 }, { "epoch": 0.10004295295666087, "grad_norm": 2.6835668087005615, "learning_rate": 9.990144405565853e-06, "loss": 3.4288, "step": 101550 }, { "epoch": 0.10009221093448295, "grad_norm": 2.7285449504852295, "learning_rate": 9.990134691541948e-06, "loss": 3.4368, "step": 101600 }, { "epoch": 0.10014146891230503, "grad_norm": 2.494976758956909, "learning_rate": 9.990124972737882e-06, "loss": 3.4657, "step": 101650 }, { "epoch": 0.10019072689012712, "grad_norm": 2.7615764141082764, "learning_rate": 9.990115249153666e-06, "loss": 3.4515, "step": 101700 }, { "epoch": 0.10023998486794922, "grad_norm": 2.5332746505737305, "learning_rate": 9.990105520789314e-06, "loss": 3.467, "step": 101750 }, { "epoch": 0.1002892428457713, "grad_norm": 2.5252227783203125, "learning_rate": 9.990095787644829e-06, "loss": 3.4811, "step": 101800 }, { "epoch": 0.10033850082359339, "grad_norm": 2.4719691276550293, "learning_rate": 9.990086049720225e-06, "loss": 3.4801, "step": 101850 }, { "epoch": 0.10038775880141548, "grad_norm": 2.521556854248047, "learning_rate": 9.990076307015507e-06, "loss": 3.406, "step": 101900 }, { "epoch": 0.10043701677923757, "grad_norm": 2.5171749591827393, "learning_rate": 9.990066559530688e-06, "loss": 3.4356, "step": 101950 }, { "epoch": 0.10048627475705965, "grad_norm": 2.8130905628204346, "learning_rate": 9.990056807265776e-06, "loss": 3.5226, "step": 102000 }, { "epoch": 0.10053553273488174, "grad_norm": 2.659457206726074, "learning_rate": 9.99004705022078e-06, "loss": 3.4655, "step": 102050 }, { "epoch": 0.10058479071270383, "grad_norm": 2.511193037033081, "learning_rate": 9.990037288395708e-06, "loss": 3.4708, "step": 102100 }, { "epoch": 0.10063404869052592, "grad_norm": 2.5251622200012207, "learning_rate": 9.990027521790573e-06, "loss": 3.4235, "step": 102150 }, { "epoch": 0.100683306668348, "grad_norm": 2.5951144695281982, "learning_rate": 9.990017750405383e-06, "loss": 3.4367, "step": 102200 }, { "epoch": 0.1007325646461701, "grad_norm": 2.5631210803985596, "learning_rate": 9.990007974240145e-06, "loss": 3.5078, "step": 102250 }, { "epoch": 0.10078182262399218, "grad_norm": 2.601407766342163, "learning_rate": 9.989998193294872e-06, "loss": 3.4266, "step": 102300 }, { "epoch": 0.10083108060181427, "grad_norm": 2.742953062057495, "learning_rate": 9.98998840756957e-06, "loss": 3.4437, "step": 102350 }, { "epoch": 0.10088033857963635, "grad_norm": 2.7548887729644775, "learning_rate": 9.98997861706425e-06, "loss": 3.5472, "step": 102400 }, { "epoch": 0.10092959655745845, "grad_norm": 2.5202136039733887, "learning_rate": 9.989968821778921e-06, "loss": 3.5267, "step": 102450 }, { "epoch": 0.10097885453528054, "grad_norm": 2.8414618968963623, "learning_rate": 9.989959021713593e-06, "loss": 3.46, "step": 102500 }, { "epoch": 0.10102811251310262, "grad_norm": 2.468543767929077, "learning_rate": 9.989949216868276e-06, "loss": 3.4299, "step": 102550 }, { "epoch": 0.1010773704909247, "grad_norm": 2.5138635635375977, "learning_rate": 9.989939407242978e-06, "loss": 3.418, "step": 102600 }, { "epoch": 0.1011266284687468, "grad_norm": 2.4124672412872314, "learning_rate": 9.989929592837708e-06, "loss": 3.4285, "step": 102650 }, { "epoch": 0.10117588644656889, "grad_norm": 2.6053659915924072, "learning_rate": 9.989919773652477e-06, "loss": 3.4339, "step": 102700 }, { "epoch": 0.10122514442439097, "grad_norm": 2.5849623680114746, "learning_rate": 9.989909949687293e-06, "loss": 3.4488, "step": 102750 }, { "epoch": 0.10127440240221307, "grad_norm": 2.5343995094299316, "learning_rate": 9.989900120942165e-06, "loss": 3.4805, "step": 102800 }, { "epoch": 0.10132366038003515, "grad_norm": 2.77571439743042, "learning_rate": 9.989890287417105e-06, "loss": 3.4309, "step": 102850 }, { "epoch": 0.10137291835785724, "grad_norm": 3.9947614669799805, "learning_rate": 9.98988044911212e-06, "loss": 3.3827, "step": 102900 }, { "epoch": 0.10142217633567932, "grad_norm": 2.9862940311431885, "learning_rate": 9.98987060602722e-06, "loss": 3.3701, "step": 102950 }, { "epoch": 0.10147143431350142, "grad_norm": 2.440326690673828, "learning_rate": 9.989860758162416e-06, "loss": 3.4573, "step": 103000 }, { "epoch": 0.1015206922913235, "grad_norm": 2.4570956230163574, "learning_rate": 9.989850905517715e-06, "loss": 3.4363, "step": 103050 }, { "epoch": 0.10156995026914559, "grad_norm": 2.504934787750244, "learning_rate": 9.989841048093129e-06, "loss": 3.3862, "step": 103100 }, { "epoch": 0.10161920824696768, "grad_norm": 2.584400177001953, "learning_rate": 9.989831185888665e-06, "loss": 3.4427, "step": 103150 }, { "epoch": 0.10166846622478977, "grad_norm": 2.9653422832489014, "learning_rate": 9.989821318904333e-06, "loss": 3.4554, "step": 103200 }, { "epoch": 0.10171772420261185, "grad_norm": 2.6193161010742188, "learning_rate": 9.989811447140143e-06, "loss": 3.4106, "step": 103250 }, { "epoch": 0.10176698218043394, "grad_norm": 2.246081829071045, "learning_rate": 9.989801570596106e-06, "loss": 3.2985, "step": 103300 }, { "epoch": 0.10181624015825604, "grad_norm": 2.726445198059082, "learning_rate": 9.989791689272228e-06, "loss": 3.4363, "step": 103350 }, { "epoch": 0.10186549813607812, "grad_norm": 2.35740065574646, "learning_rate": 9.989781803168521e-06, "loss": 3.4767, "step": 103400 }, { "epoch": 0.1019147561139002, "grad_norm": 2.625596284866333, "learning_rate": 9.989771912284994e-06, "loss": 3.409, "step": 103450 }, { "epoch": 0.1019640140917223, "grad_norm": 2.3832321166992188, "learning_rate": 9.989762016621656e-06, "loss": 3.4212, "step": 103500 }, { "epoch": 0.10201327206954439, "grad_norm": 2.4753546714782715, "learning_rate": 9.989752116178516e-06, "loss": 3.5071, "step": 103550 }, { "epoch": 0.10206253004736647, "grad_norm": 2.5028698444366455, "learning_rate": 9.989742210955587e-06, "loss": 3.4451, "step": 103600 }, { "epoch": 0.10211178802518855, "grad_norm": 2.589632272720337, "learning_rate": 9.989732300952872e-06, "loss": 3.4173, "step": 103650 }, { "epoch": 0.10216104600301065, "grad_norm": 2.3595945835113525, "learning_rate": 9.989722386170387e-06, "loss": 3.4366, "step": 103700 }, { "epoch": 0.10221030398083274, "grad_norm": 2.5656590461730957, "learning_rate": 9.989712466608136e-06, "loss": 3.3858, "step": 103750 }, { "epoch": 0.10225956195865482, "grad_norm": 2.4863128662109375, "learning_rate": 9.989702542266133e-06, "loss": 3.4264, "step": 103800 }, { "epoch": 0.1023088199364769, "grad_norm": 2.644341230392456, "learning_rate": 9.989692613144386e-06, "loss": 3.401, "step": 103850 }, { "epoch": 0.102358077914299, "grad_norm": 2.296281099319458, "learning_rate": 9.989682679242903e-06, "loss": 3.4655, "step": 103900 }, { "epoch": 0.10240733589212109, "grad_norm": 2.4843084812164307, "learning_rate": 9.989672740561696e-06, "loss": 3.4834, "step": 103950 }, { "epoch": 0.10245659386994317, "grad_norm": 2.814067840576172, "learning_rate": 9.989662797100771e-06, "loss": 3.4329, "step": 104000 }, { "epoch": 0.10250585184776527, "grad_norm": 2.458986759185791, "learning_rate": 9.989652848860142e-06, "loss": 3.5131, "step": 104050 }, { "epoch": 0.10255510982558735, "grad_norm": 2.8316445350646973, "learning_rate": 9.989642895839816e-06, "loss": 3.4472, "step": 104100 }, { "epoch": 0.10260436780340944, "grad_norm": 2.4114742279052734, "learning_rate": 9.989632938039801e-06, "loss": 3.5042, "step": 104150 }, { "epoch": 0.10265362578123152, "grad_norm": 2.50154709815979, "learning_rate": 9.98962297546011e-06, "loss": 3.3802, "step": 104200 }, { "epoch": 0.10270288375905362, "grad_norm": 2.5768649578094482, "learning_rate": 9.989613008100751e-06, "loss": 3.4427, "step": 104250 }, { "epoch": 0.1027521417368757, "grad_norm": 2.5603206157684326, "learning_rate": 9.989603035961732e-06, "loss": 3.47, "step": 104300 }, { "epoch": 0.10280139971469779, "grad_norm": 2.428147315979004, "learning_rate": 9.989593059043066e-06, "loss": 3.3894, "step": 104350 }, { "epoch": 0.10285065769251989, "grad_norm": 2.6639344692230225, "learning_rate": 9.98958307734476e-06, "loss": 3.3962, "step": 104400 }, { "epoch": 0.10289991567034197, "grad_norm": 2.7756402492523193, "learning_rate": 9.989573090866824e-06, "loss": 3.4937, "step": 104450 }, { "epoch": 0.10294917364816406, "grad_norm": 2.6371467113494873, "learning_rate": 9.989563099609268e-06, "loss": 3.5071, "step": 104500 }, { "epoch": 0.10299843162598614, "grad_norm": 2.8595943450927734, "learning_rate": 9.9895531035721e-06, "loss": 3.4386, "step": 104550 }, { "epoch": 0.10304768960380824, "grad_norm": 2.5792438983917236, "learning_rate": 9.989543102755331e-06, "loss": 3.4549, "step": 104600 }, { "epoch": 0.10309694758163032, "grad_norm": 2.5201122760772705, "learning_rate": 9.989533097158972e-06, "loss": 3.3948, "step": 104650 }, { "epoch": 0.1031462055594524, "grad_norm": 2.5167293548583984, "learning_rate": 9.98952308678303e-06, "loss": 3.4983, "step": 104700 }, { "epoch": 0.1031954635372745, "grad_norm": 2.4705235958099365, "learning_rate": 9.989513071627515e-06, "loss": 3.4197, "step": 104750 }, { "epoch": 0.10324472151509659, "grad_norm": 2.478104829788208, "learning_rate": 9.989503051692437e-06, "loss": 3.4536, "step": 104800 }, { "epoch": 0.10329397949291867, "grad_norm": 2.5004122257232666, "learning_rate": 9.989493026977809e-06, "loss": 3.3995, "step": 104850 }, { "epoch": 0.10334323747074076, "grad_norm": 2.452639102935791, "learning_rate": 9.989482997483633e-06, "loss": 3.4266, "step": 104900 }, { "epoch": 0.10339249544856285, "grad_norm": 2.6803834438323975, "learning_rate": 9.989472963209925e-06, "loss": 3.4234, "step": 104950 }, { "epoch": 0.10344175342638494, "grad_norm": 2.54144024848938, "learning_rate": 9.989462924156694e-06, "loss": 3.4369, "step": 105000 }, { "epoch": 0.10349101140420702, "grad_norm": 2.48311185836792, "learning_rate": 9.989452880323946e-06, "loss": 3.4788, "step": 105050 }, { "epoch": 0.10354026938202911, "grad_norm": 2.560347318649292, "learning_rate": 9.989442831711693e-06, "loss": 3.451, "step": 105100 }, { "epoch": 0.1035895273598512, "grad_norm": 2.6288838386535645, "learning_rate": 9.989432778319944e-06, "loss": 3.3832, "step": 105150 }, { "epoch": 0.10363878533767329, "grad_norm": 2.608074426651001, "learning_rate": 9.989422720148711e-06, "loss": 3.4797, "step": 105200 }, { "epoch": 0.10368804331549537, "grad_norm": 2.577819585800171, "learning_rate": 9.989412657198e-06, "loss": 3.3888, "step": 105250 }, { "epoch": 0.10373730129331747, "grad_norm": 2.646688461303711, "learning_rate": 9.989402589467824e-06, "loss": 3.397, "step": 105300 }, { "epoch": 0.10378655927113956, "grad_norm": 2.7764625549316406, "learning_rate": 9.989392516958192e-06, "loss": 3.4673, "step": 105350 }, { "epoch": 0.10383581724896164, "grad_norm": 2.627352476119995, "learning_rate": 9.989382439669111e-06, "loss": 3.506, "step": 105400 }, { "epoch": 0.10388507522678372, "grad_norm": 2.774658203125, "learning_rate": 9.989372357600591e-06, "loss": 3.4066, "step": 105450 }, { "epoch": 0.10393433320460582, "grad_norm": 2.4145090579986572, "learning_rate": 9.989362270752646e-06, "loss": 3.3803, "step": 105500 }, { "epoch": 0.1039835911824279, "grad_norm": 2.5385334491729736, "learning_rate": 9.98935217912528e-06, "loss": 3.4549, "step": 105550 }, { "epoch": 0.10403284916024999, "grad_norm": 2.7633135318756104, "learning_rate": 9.989342082718508e-06, "loss": 3.5103, "step": 105600 }, { "epoch": 0.10408210713807209, "grad_norm": 2.586259603500366, "learning_rate": 9.989331981532334e-06, "loss": 3.4195, "step": 105650 }, { "epoch": 0.10413136511589417, "grad_norm": 2.5349018573760986, "learning_rate": 9.989321875566772e-06, "loss": 3.4737, "step": 105700 }, { "epoch": 0.10418062309371626, "grad_norm": 2.62892484664917, "learning_rate": 9.98931176482183e-06, "loss": 3.4073, "step": 105750 }, { "epoch": 0.10422988107153834, "grad_norm": 2.5253756046295166, "learning_rate": 9.989301649297518e-06, "loss": 3.4352, "step": 105800 }, { "epoch": 0.10427913904936044, "grad_norm": 2.4391913414001465, "learning_rate": 9.989291528993846e-06, "loss": 3.4147, "step": 105850 }, { "epoch": 0.10432839702718252, "grad_norm": 2.642686605453491, "learning_rate": 9.989281403910824e-06, "loss": 3.5669, "step": 105900 }, { "epoch": 0.10437765500500461, "grad_norm": 2.4868695735931396, "learning_rate": 9.98927127404846e-06, "loss": 3.4567, "step": 105950 }, { "epoch": 0.10442691298282669, "grad_norm": 2.4317967891693115, "learning_rate": 9.989261139406766e-06, "loss": 3.3978, "step": 106000 }, { "epoch": 0.10447617096064879, "grad_norm": 2.5884909629821777, "learning_rate": 9.989250999985748e-06, "loss": 3.4413, "step": 106050 }, { "epoch": 0.10452542893847087, "grad_norm": 2.6073176860809326, "learning_rate": 9.989240855785422e-06, "loss": 3.3917, "step": 106100 }, { "epoch": 0.10457468691629296, "grad_norm": 2.508674144744873, "learning_rate": 9.989230706805792e-06, "loss": 3.4787, "step": 106150 }, { "epoch": 0.10462394489411506, "grad_norm": 2.3065237998962402, "learning_rate": 9.98922055304687e-06, "loss": 3.4365, "step": 106200 }, { "epoch": 0.10467320287193714, "grad_norm": 2.5615105628967285, "learning_rate": 9.989210394508665e-06, "loss": 3.4132, "step": 106250 }, { "epoch": 0.10472246084975922, "grad_norm": 2.578866481781006, "learning_rate": 9.989200231191188e-06, "loss": 3.3706, "step": 106300 }, { "epoch": 0.10477171882758131, "grad_norm": 2.652883529663086, "learning_rate": 9.989190063094446e-06, "loss": 3.4375, "step": 106350 }, { "epoch": 0.1048209768054034, "grad_norm": 2.5515949726104736, "learning_rate": 9.989179890218453e-06, "loss": 3.3565, "step": 106400 }, { "epoch": 0.10487023478322549, "grad_norm": 2.346762180328369, "learning_rate": 9.989169712563214e-06, "loss": 3.5482, "step": 106450 }, { "epoch": 0.10491949276104758, "grad_norm": 2.4448182582855225, "learning_rate": 9.989159530128742e-06, "loss": 3.4846, "step": 106500 }, { "epoch": 0.10496875073886967, "grad_norm": 2.5624773502349854, "learning_rate": 9.989149342915046e-06, "loss": 3.4437, "step": 106550 }, { "epoch": 0.10501800871669176, "grad_norm": 2.554595470428467, "learning_rate": 9.989139150922136e-06, "loss": 3.505, "step": 106600 }, { "epoch": 0.10506726669451384, "grad_norm": 2.5186970233917236, "learning_rate": 9.98912895415002e-06, "loss": 3.4999, "step": 106650 }, { "epoch": 0.10511652467233593, "grad_norm": 2.8461217880249023, "learning_rate": 9.989118752598711e-06, "loss": 3.4892, "step": 106700 }, { "epoch": 0.10516578265015802, "grad_norm": 2.5799806118011475, "learning_rate": 9.989108546268216e-06, "loss": 3.413, "step": 106750 }, { "epoch": 0.10521504062798011, "grad_norm": 2.627345085144043, "learning_rate": 9.989098335158547e-06, "loss": 3.4796, "step": 106800 }, { "epoch": 0.10526429860580219, "grad_norm": 2.4923176765441895, "learning_rate": 9.98908811926971e-06, "loss": 3.4554, "step": 106850 }, { "epoch": 0.10531355658362429, "grad_norm": 2.5942320823669434, "learning_rate": 9.989077898601719e-06, "loss": 3.3898, "step": 106900 }, { "epoch": 0.10536281456144637, "grad_norm": 2.6207265853881836, "learning_rate": 9.989067673154582e-06, "loss": 3.4644, "step": 106950 }, { "epoch": 0.10541207253926846, "grad_norm": 2.5532491207122803, "learning_rate": 9.989057442928307e-06, "loss": 3.4471, "step": 107000 }, { "epoch": 0.10546133051709054, "grad_norm": 2.5012567043304443, "learning_rate": 9.989047207922908e-06, "loss": 3.4172, "step": 107050 }, { "epoch": 0.10551058849491264, "grad_norm": 2.380570888519287, "learning_rate": 9.989036968138391e-06, "loss": 3.4249, "step": 107100 }, { "epoch": 0.10555984647273473, "grad_norm": 2.500776529312134, "learning_rate": 9.989026723574768e-06, "loss": 3.427, "step": 107150 }, { "epoch": 0.10560910445055681, "grad_norm": 2.5395073890686035, "learning_rate": 9.989016474232049e-06, "loss": 3.3926, "step": 107200 }, { "epoch": 0.1056583624283789, "grad_norm": 2.7072510719299316, "learning_rate": 9.98900622011024e-06, "loss": 3.4548, "step": 107250 }, { "epoch": 0.10570762040620099, "grad_norm": 2.9147465229034424, "learning_rate": 9.988995961209355e-06, "loss": 3.4481, "step": 107300 }, { "epoch": 0.10575687838402308, "grad_norm": 2.5399296283721924, "learning_rate": 9.988985697529404e-06, "loss": 3.4287, "step": 107350 }, { "epoch": 0.10580613636184516, "grad_norm": 2.587733268737793, "learning_rate": 9.988975429070394e-06, "loss": 3.4861, "step": 107400 }, { "epoch": 0.10585539433966726, "grad_norm": 2.714301347732544, "learning_rate": 9.988965155832337e-06, "loss": 3.4571, "step": 107450 }, { "epoch": 0.10590465231748934, "grad_norm": 2.3904097080230713, "learning_rate": 9.988954877815242e-06, "loss": 3.4008, "step": 107500 }, { "epoch": 0.10595391029531143, "grad_norm": 2.5300099849700928, "learning_rate": 9.98894459501912e-06, "loss": 3.4124, "step": 107550 }, { "epoch": 0.10600316827313351, "grad_norm": 3.1428849697113037, "learning_rate": 9.988934307443976e-06, "loss": 3.3973, "step": 107600 }, { "epoch": 0.10605242625095561, "grad_norm": 2.380772590637207, "learning_rate": 9.988924015089827e-06, "loss": 3.4239, "step": 107650 }, { "epoch": 0.10610168422877769, "grad_norm": 2.4956820011138916, "learning_rate": 9.988913717956677e-06, "loss": 3.3983, "step": 107700 }, { "epoch": 0.10615094220659978, "grad_norm": 2.362903118133545, "learning_rate": 9.98890341604454e-06, "loss": 3.3915, "step": 107750 }, { "epoch": 0.10620020018442188, "grad_norm": 2.6356241703033447, "learning_rate": 9.988893109353423e-06, "loss": 3.4059, "step": 107800 }, { "epoch": 0.10624945816224396, "grad_norm": 2.397761106491089, "learning_rate": 9.98888279788334e-06, "loss": 3.4271, "step": 107850 }, { "epoch": 0.10629871614006604, "grad_norm": 2.544606924057007, "learning_rate": 9.988872481634294e-06, "loss": 3.4545, "step": 107900 }, { "epoch": 0.10634797411788813, "grad_norm": 2.4447109699249268, "learning_rate": 9.988862160606301e-06, "loss": 3.4591, "step": 107950 }, { "epoch": 0.10639723209571023, "grad_norm": 2.4964346885681152, "learning_rate": 9.988851834799368e-06, "loss": 3.4385, "step": 108000 }, { "epoch": 0.10644649007353231, "grad_norm": 2.578468084335327, "learning_rate": 9.988841504213506e-06, "loss": 3.4315, "step": 108050 }, { "epoch": 0.1064957480513544, "grad_norm": 2.128343343734741, "learning_rate": 9.988831168848725e-06, "loss": 3.4199, "step": 108100 }, { "epoch": 0.10654500602917649, "grad_norm": 2.490886926651001, "learning_rate": 9.988820828705034e-06, "loss": 3.4563, "step": 108150 }, { "epoch": 0.10659426400699858, "grad_norm": 2.655769109725952, "learning_rate": 9.988810483782443e-06, "loss": 3.4231, "step": 108200 }, { "epoch": 0.10664352198482066, "grad_norm": 2.489685297012329, "learning_rate": 9.988800134080962e-06, "loss": 3.3999, "step": 108250 }, { "epoch": 0.10669277996264274, "grad_norm": 2.65108060836792, "learning_rate": 9.988789779600603e-06, "loss": 3.481, "step": 108300 }, { "epoch": 0.10674203794046484, "grad_norm": 2.439152956008911, "learning_rate": 9.988779420341373e-06, "loss": 3.4463, "step": 108350 }, { "epoch": 0.10679129591828693, "grad_norm": 2.5587260723114014, "learning_rate": 9.988769056303281e-06, "loss": 3.494, "step": 108400 }, { "epoch": 0.10684055389610901, "grad_norm": 2.501852035522461, "learning_rate": 9.988758687486341e-06, "loss": 3.4207, "step": 108450 }, { "epoch": 0.1068898118739311, "grad_norm": 2.6238811016082764, "learning_rate": 9.98874831389056e-06, "loss": 3.3929, "step": 108500 }, { "epoch": 0.1069390698517532, "grad_norm": 2.5396251678466797, "learning_rate": 9.988737935515948e-06, "loss": 3.4537, "step": 108550 }, { "epoch": 0.10698832782957528, "grad_norm": 2.4617257118225098, "learning_rate": 9.988727552362518e-06, "loss": 3.436, "step": 108600 }, { "epoch": 0.10703758580739736, "grad_norm": 2.3989791870117188, "learning_rate": 9.988717164430277e-06, "loss": 3.3739, "step": 108650 }, { "epoch": 0.10708684378521946, "grad_norm": 2.312450885772705, "learning_rate": 9.988706771719234e-06, "loss": 3.4191, "step": 108700 }, { "epoch": 0.10713610176304154, "grad_norm": 2.483731985092163, "learning_rate": 9.988696374229402e-06, "loss": 3.4227, "step": 108750 }, { "epoch": 0.10718535974086363, "grad_norm": 2.3549368381500244, "learning_rate": 9.98868597196079e-06, "loss": 3.443, "step": 108800 }, { "epoch": 0.10723461771868571, "grad_norm": 2.426194906234741, "learning_rate": 9.988675564913406e-06, "loss": 3.4576, "step": 108850 }, { "epoch": 0.10728387569650781, "grad_norm": 2.4857242107391357, "learning_rate": 9.988665153087262e-06, "loss": 3.4134, "step": 108900 }, { "epoch": 0.1073331336743299, "grad_norm": 2.5802876949310303, "learning_rate": 9.988654736482367e-06, "loss": 3.4335, "step": 108950 }, { "epoch": 0.10738239165215198, "grad_norm": 2.865737199783325, "learning_rate": 9.988644315098733e-06, "loss": 3.3975, "step": 109000 }, { "epoch": 0.10743164962997408, "grad_norm": 2.97513747215271, "learning_rate": 9.988633888936365e-06, "loss": 3.4605, "step": 109050 }, { "epoch": 0.10748090760779616, "grad_norm": 2.5182788372039795, "learning_rate": 9.98862345799528e-06, "loss": 3.3922, "step": 109100 }, { "epoch": 0.10753016558561825, "grad_norm": 2.4117255210876465, "learning_rate": 9.988613022275483e-06, "loss": 3.4451, "step": 109150 }, { "epoch": 0.10757942356344033, "grad_norm": 2.4379544258117676, "learning_rate": 9.988602581776985e-06, "loss": 3.3883, "step": 109200 }, { "epoch": 0.10762868154126243, "grad_norm": 2.53374981880188, "learning_rate": 9.988592136499798e-06, "loss": 3.4512, "step": 109250 }, { "epoch": 0.10767793951908451, "grad_norm": 2.4503746032714844, "learning_rate": 9.988581686443928e-06, "loss": 3.4373, "step": 109300 }, { "epoch": 0.1077271974969066, "grad_norm": 2.627257823944092, "learning_rate": 9.98857123160939e-06, "loss": 3.3885, "step": 109350 }, { "epoch": 0.10777645547472868, "grad_norm": 2.5857720375061035, "learning_rate": 9.98856077199619e-06, "loss": 3.3983, "step": 109400 }, { "epoch": 0.10782571345255078, "grad_norm": 2.38537859916687, "learning_rate": 9.988550307604337e-06, "loss": 3.4365, "step": 109450 }, { "epoch": 0.10787497143037286, "grad_norm": 2.4997787475585938, "learning_rate": 9.988539838433846e-06, "loss": 3.3819, "step": 109500 }, { "epoch": 0.10792422940819495, "grad_norm": 2.4812381267547607, "learning_rate": 9.988529364484725e-06, "loss": 3.4365, "step": 109550 }, { "epoch": 0.10797348738601704, "grad_norm": 2.4586451053619385, "learning_rate": 9.988518885756983e-06, "loss": 3.3783, "step": 109600 }, { "epoch": 0.10802274536383913, "grad_norm": 2.6401255130767822, "learning_rate": 9.98850840225063e-06, "loss": 3.4164, "step": 109650 }, { "epoch": 0.10807200334166121, "grad_norm": 2.408278465270996, "learning_rate": 9.988497913965677e-06, "loss": 3.4213, "step": 109700 }, { "epoch": 0.1081212613194833, "grad_norm": 2.5933334827423096, "learning_rate": 9.988487420902134e-06, "loss": 3.4468, "step": 109750 }, { "epoch": 0.1081705192973054, "grad_norm": 2.545337438583374, "learning_rate": 9.98847692306001e-06, "loss": 3.4492, "step": 109800 }, { "epoch": 0.10821977727512748, "grad_norm": 2.454753875732422, "learning_rate": 9.988466420439316e-06, "loss": 3.4457, "step": 109850 }, { "epoch": 0.10826903525294956, "grad_norm": 2.561382532119751, "learning_rate": 9.988455913040061e-06, "loss": 3.4308, "step": 109900 }, { "epoch": 0.10831829323077166, "grad_norm": 2.6643896102905273, "learning_rate": 9.988445400862256e-06, "loss": 3.4418, "step": 109950 }, { "epoch": 0.10836755120859375, "grad_norm": 2.618042469024658, "learning_rate": 9.988434883905911e-06, "loss": 3.4745, "step": 110000 }, { "epoch": 0.10841680918641583, "grad_norm": 2.850022315979004, "learning_rate": 9.988424362171037e-06, "loss": 3.3956, "step": 110050 }, { "epoch": 0.10846606716423791, "grad_norm": 2.4870285987854004, "learning_rate": 9.988413835657642e-06, "loss": 3.3957, "step": 110100 }, { "epoch": 0.10851532514206001, "grad_norm": 2.5803768634796143, "learning_rate": 9.988403304365739e-06, "loss": 3.4583, "step": 110150 }, { "epoch": 0.1085645831198821, "grad_norm": 2.4523186683654785, "learning_rate": 9.988392768295335e-06, "loss": 3.4915, "step": 110200 }, { "epoch": 0.10861384109770418, "grad_norm": 2.5048675537109375, "learning_rate": 9.98838222744644e-06, "loss": 3.3927, "step": 110250 }, { "epoch": 0.10866309907552628, "grad_norm": 2.492319107055664, "learning_rate": 9.988371681819067e-06, "loss": 3.3645, "step": 110300 }, { "epoch": 0.10871235705334836, "grad_norm": 3.1961677074432373, "learning_rate": 9.988361131413224e-06, "loss": 3.4328, "step": 110350 }, { "epoch": 0.10876161503117045, "grad_norm": 2.7520387172698975, "learning_rate": 9.988350576228922e-06, "loss": 3.4286, "step": 110400 }, { "epoch": 0.10881087300899253, "grad_norm": 2.4118900299072266, "learning_rate": 9.988340016266172e-06, "loss": 3.4115, "step": 110450 }, { "epoch": 0.10886013098681463, "grad_norm": 2.679257869720459, "learning_rate": 9.98832945152498e-06, "loss": 3.4613, "step": 110500 }, { "epoch": 0.10890938896463671, "grad_norm": 2.663564443588257, "learning_rate": 9.988318882005362e-06, "loss": 3.4672, "step": 110550 }, { "epoch": 0.1089586469424588, "grad_norm": 2.775219440460205, "learning_rate": 9.988308307707325e-06, "loss": 3.4399, "step": 110600 }, { "epoch": 0.10900790492028088, "grad_norm": 2.414954423904419, "learning_rate": 9.988297728630878e-06, "loss": 3.4513, "step": 110650 }, { "epoch": 0.10905716289810298, "grad_norm": 2.408740758895874, "learning_rate": 9.988287144776032e-06, "loss": 3.4014, "step": 110700 }, { "epoch": 0.10910642087592506, "grad_norm": 2.64711594581604, "learning_rate": 9.988276556142797e-06, "loss": 3.4552, "step": 110750 }, { "epoch": 0.10915567885374715, "grad_norm": 2.481914520263672, "learning_rate": 9.988265962731185e-06, "loss": 3.4262, "step": 110800 }, { "epoch": 0.10920493683156925, "grad_norm": 2.362440824508667, "learning_rate": 9.988255364541204e-06, "loss": 3.3931, "step": 110850 }, { "epoch": 0.10925419480939133, "grad_norm": 2.5215508937835693, "learning_rate": 9.988244761572867e-06, "loss": 3.4026, "step": 110900 }, { "epoch": 0.10930345278721341, "grad_norm": 2.737044334411621, "learning_rate": 9.988234153826179e-06, "loss": 3.4186, "step": 110950 }, { "epoch": 0.1093527107650355, "grad_norm": 2.4314589500427246, "learning_rate": 9.988223541301157e-06, "loss": 3.4509, "step": 111000 }, { "epoch": 0.1094019687428576, "grad_norm": 2.458906412124634, "learning_rate": 9.988212923997805e-06, "loss": 3.3762, "step": 111050 }, { "epoch": 0.10945122672067968, "grad_norm": 2.286505699157715, "learning_rate": 9.988202301916135e-06, "loss": 3.3965, "step": 111100 }, { "epoch": 0.10950048469850177, "grad_norm": 2.4821290969848633, "learning_rate": 9.988191675056157e-06, "loss": 3.4179, "step": 111150 }, { "epoch": 0.10954974267632386, "grad_norm": 2.560870409011841, "learning_rate": 9.988181043417886e-06, "loss": 3.3824, "step": 111200 }, { "epoch": 0.10959900065414595, "grad_norm": 2.3149266242980957, "learning_rate": 9.988170407001325e-06, "loss": 3.4375, "step": 111250 }, { "epoch": 0.10964825863196803, "grad_norm": 2.9345152378082275, "learning_rate": 9.988159765806489e-06, "loss": 3.4518, "step": 111300 }, { "epoch": 0.10969751660979012, "grad_norm": 2.558701753616333, "learning_rate": 9.988149119833386e-06, "loss": 3.4397, "step": 111350 }, { "epoch": 0.10974677458761221, "grad_norm": 2.439044952392578, "learning_rate": 9.988138469082026e-06, "loss": 3.4335, "step": 111400 }, { "epoch": 0.1097960325654343, "grad_norm": 2.5487821102142334, "learning_rate": 9.98812781355242e-06, "loss": 3.3254, "step": 111450 }, { "epoch": 0.10984529054325638, "grad_norm": 2.534282684326172, "learning_rate": 9.988117153244579e-06, "loss": 3.4294, "step": 111500 }, { "epoch": 0.10989454852107848, "grad_norm": 2.481159210205078, "learning_rate": 9.988106488158513e-06, "loss": 3.4523, "step": 111550 }, { "epoch": 0.10994380649890056, "grad_norm": 2.996933937072754, "learning_rate": 9.98809581829423e-06, "loss": 3.4765, "step": 111600 }, { "epoch": 0.10999306447672265, "grad_norm": 2.6198647022247314, "learning_rate": 9.988085143651742e-06, "loss": 3.4115, "step": 111650 }, { "epoch": 0.11004232245454473, "grad_norm": 2.666602611541748, "learning_rate": 9.98807446423106e-06, "loss": 3.4559, "step": 111700 }, { "epoch": 0.11009158043236683, "grad_norm": 2.5945160388946533, "learning_rate": 9.988063780032193e-06, "loss": 3.4468, "step": 111750 }, { "epoch": 0.11014083841018892, "grad_norm": 2.5586225986480713, "learning_rate": 9.98805309105515e-06, "loss": 3.3748, "step": 111800 }, { "epoch": 0.110190096388011, "grad_norm": 2.5541446208953857, "learning_rate": 9.988042397299945e-06, "loss": 3.4333, "step": 111850 }, { "epoch": 0.11023935436583308, "grad_norm": 2.7662041187286377, "learning_rate": 9.988031698766585e-06, "loss": 3.4169, "step": 111900 }, { "epoch": 0.11028861234365518, "grad_norm": 2.494795083999634, "learning_rate": 9.988020995455081e-06, "loss": 3.401, "step": 111950 }, { "epoch": 0.11033787032147727, "grad_norm": 2.4045021533966064, "learning_rate": 9.988010287365444e-06, "loss": 3.3917, "step": 112000 }, { "epoch": 0.11038712829929935, "grad_norm": 2.6514217853546143, "learning_rate": 9.987999574497683e-06, "loss": 3.3701, "step": 112050 }, { "epoch": 0.11043638627712145, "grad_norm": 2.3198745250701904, "learning_rate": 9.98798885685181e-06, "loss": 3.3977, "step": 112100 }, { "epoch": 0.11048564425494353, "grad_norm": 2.224750518798828, "learning_rate": 9.987978134427833e-06, "loss": 3.416, "step": 112150 }, { "epoch": 0.11053490223276562, "grad_norm": 2.7536113262176514, "learning_rate": 9.987967407225765e-06, "loss": 3.4961, "step": 112200 }, { "epoch": 0.1105841602105877, "grad_norm": 2.658810615539551, "learning_rate": 9.987956675245614e-06, "loss": 3.374, "step": 112250 }, { "epoch": 0.1106334181884098, "grad_norm": 2.635411024093628, "learning_rate": 9.98794593848739e-06, "loss": 3.3899, "step": 112300 }, { "epoch": 0.11068267616623188, "grad_norm": 2.497048854827881, "learning_rate": 9.987935196951107e-06, "loss": 3.4449, "step": 112350 }, { "epoch": 0.11073193414405397, "grad_norm": 2.940375328063965, "learning_rate": 9.98792445063677e-06, "loss": 3.4267, "step": 112400 }, { "epoch": 0.11078119212187607, "grad_norm": 2.435006856918335, "learning_rate": 9.987913699544394e-06, "loss": 3.3526, "step": 112450 }, { "epoch": 0.11083045009969815, "grad_norm": 2.365358352661133, "learning_rate": 9.987902943673986e-06, "loss": 3.4065, "step": 112500 }, { "epoch": 0.11087970807752023, "grad_norm": 2.5144810676574707, "learning_rate": 9.987892183025557e-06, "loss": 3.3353, "step": 112550 }, { "epoch": 0.11092896605534232, "grad_norm": 2.7100422382354736, "learning_rate": 9.98788141759912e-06, "loss": 3.3739, "step": 112600 }, { "epoch": 0.11097822403316442, "grad_norm": 2.4877023696899414, "learning_rate": 9.987870647394682e-06, "loss": 3.4418, "step": 112650 }, { "epoch": 0.1110274820109865, "grad_norm": 2.6677637100219727, "learning_rate": 9.987859872412252e-06, "loss": 3.3764, "step": 112700 }, { "epoch": 0.11107673998880858, "grad_norm": 2.953568935394287, "learning_rate": 9.987849092651847e-06, "loss": 3.4827, "step": 112750 }, { "epoch": 0.11112599796663068, "grad_norm": 2.5357887744903564, "learning_rate": 9.987838308113471e-06, "loss": 3.4318, "step": 112800 }, { "epoch": 0.11117525594445277, "grad_norm": 2.476473808288574, "learning_rate": 9.987827518797138e-06, "loss": 3.4021, "step": 112850 }, { "epoch": 0.11122451392227485, "grad_norm": 2.4701685905456543, "learning_rate": 9.987816724702857e-06, "loss": 3.432, "step": 112900 }, { "epoch": 0.11127377190009693, "grad_norm": 2.5180435180664062, "learning_rate": 9.987805925830637e-06, "loss": 3.4738, "step": 112950 }, { "epoch": 0.11132302987791903, "grad_norm": 2.4799001216888428, "learning_rate": 9.987795122180489e-06, "loss": 3.3909, "step": 113000 }, { "epoch": 0.11137228785574112, "grad_norm": 2.8029589653015137, "learning_rate": 9.987784313752424e-06, "loss": 3.36, "step": 113050 }, { "epoch": 0.1114215458335632, "grad_norm": 2.607605218887329, "learning_rate": 9.987773500546452e-06, "loss": 3.4136, "step": 113100 }, { "epoch": 0.11147080381138529, "grad_norm": 2.538440227508545, "learning_rate": 9.987762682562583e-06, "loss": 3.4064, "step": 113150 }, { "epoch": 0.11152006178920738, "grad_norm": 2.293917417526245, "learning_rate": 9.98775185980083e-06, "loss": 3.4403, "step": 113200 }, { "epoch": 0.11156931976702947, "grad_norm": 2.5304901599884033, "learning_rate": 9.9877410322612e-06, "loss": 3.4569, "step": 113250 }, { "epoch": 0.11161857774485155, "grad_norm": 2.6614298820495605, "learning_rate": 9.987730199943706e-06, "loss": 3.491, "step": 113300 }, { "epoch": 0.11166783572267365, "grad_norm": 2.291957139968872, "learning_rate": 9.987719362848357e-06, "loss": 3.4138, "step": 113350 }, { "epoch": 0.11171709370049573, "grad_norm": 2.4876787662506104, "learning_rate": 9.98770852097516e-06, "loss": 3.4173, "step": 113400 }, { "epoch": 0.11176635167831782, "grad_norm": 2.5979630947113037, "learning_rate": 9.987697674324131e-06, "loss": 3.3969, "step": 113450 }, { "epoch": 0.1118156096561399, "grad_norm": 2.491980791091919, "learning_rate": 9.987686822895279e-06, "loss": 3.3456, "step": 113500 }, { "epoch": 0.111864867633962, "grad_norm": 3.744938850402832, "learning_rate": 9.987675966688612e-06, "loss": 3.4717, "step": 113550 }, { "epoch": 0.11191412561178408, "grad_norm": 2.775392532348633, "learning_rate": 9.987665105704144e-06, "loss": 3.3637, "step": 113600 }, { "epoch": 0.11196338358960617, "grad_norm": 2.565307855606079, "learning_rate": 9.987654239941883e-06, "loss": 3.4053, "step": 113650 }, { "epoch": 0.11201264156742827, "grad_norm": 2.610602617263794, "learning_rate": 9.987643369401837e-06, "loss": 3.3738, "step": 113700 }, { "epoch": 0.11206189954525035, "grad_norm": 4.281893730163574, "learning_rate": 9.987632494084023e-06, "loss": 3.3887, "step": 113750 }, { "epoch": 0.11211115752307244, "grad_norm": 2.573922872543335, "learning_rate": 9.987621613988447e-06, "loss": 3.4215, "step": 113800 }, { "epoch": 0.11216041550089452, "grad_norm": 2.975816488265991, "learning_rate": 9.987610729115119e-06, "loss": 3.37, "step": 113850 }, { "epoch": 0.11220967347871662, "grad_norm": 2.7732412815093994, "learning_rate": 9.98759983946405e-06, "loss": 3.3931, "step": 113900 }, { "epoch": 0.1122589314565387, "grad_norm": 2.857531785964966, "learning_rate": 9.987588945035252e-06, "loss": 3.4307, "step": 113950 }, { "epoch": 0.11230818943436079, "grad_norm": 2.2794342041015625, "learning_rate": 9.987578045828733e-06, "loss": 3.4288, "step": 114000 }, { "epoch": 0.11235744741218287, "grad_norm": 2.3085877895355225, "learning_rate": 9.987567141844507e-06, "loss": 3.3807, "step": 114050 }, { "epoch": 0.11240670539000497, "grad_norm": 2.9306790828704834, "learning_rate": 9.98755623308258e-06, "loss": 3.3656, "step": 114100 }, { "epoch": 0.11245596336782705, "grad_norm": 2.5620062351226807, "learning_rate": 9.987545319542966e-06, "loss": 3.4017, "step": 114150 }, { "epoch": 0.11250522134564914, "grad_norm": 2.433534860610962, "learning_rate": 9.987534401225674e-06, "loss": 3.4368, "step": 114200 }, { "epoch": 0.11255447932347123, "grad_norm": 2.8827786445617676, "learning_rate": 9.987523478130714e-06, "loss": 3.4199, "step": 114250 }, { "epoch": 0.11260373730129332, "grad_norm": 2.3539559841156006, "learning_rate": 9.987512550258097e-06, "loss": 3.4186, "step": 114300 }, { "epoch": 0.1126529952791154, "grad_norm": 2.4843831062316895, "learning_rate": 9.987501617607832e-06, "loss": 3.3831, "step": 114350 }, { "epoch": 0.11270225325693749, "grad_norm": 2.338747024536133, "learning_rate": 9.987490680179935e-06, "loss": 3.4218, "step": 114400 }, { "epoch": 0.11275151123475959, "grad_norm": 2.682001829147339, "learning_rate": 9.98747973797441e-06, "loss": 3.4335, "step": 114450 }, { "epoch": 0.11280076921258167, "grad_norm": 3.1694600582122803, "learning_rate": 9.98746879099127e-06, "loss": 3.3911, "step": 114500 }, { "epoch": 0.11285002719040375, "grad_norm": 2.632359266281128, "learning_rate": 9.987457839230527e-06, "loss": 3.3858, "step": 114550 }, { "epoch": 0.11289928516822585, "grad_norm": 2.5634548664093018, "learning_rate": 9.987446882692188e-06, "loss": 3.3981, "step": 114600 }, { "epoch": 0.11294854314604794, "grad_norm": 2.483888626098633, "learning_rate": 9.987435921376266e-06, "loss": 3.3978, "step": 114650 }, { "epoch": 0.11299780112387002, "grad_norm": 2.4412615299224854, "learning_rate": 9.987424955282771e-06, "loss": 3.41, "step": 114700 }, { "epoch": 0.1130470591016921, "grad_norm": 2.408216953277588, "learning_rate": 9.987413984411714e-06, "loss": 3.3307, "step": 114750 }, { "epoch": 0.1130963170795142, "grad_norm": 2.5929269790649414, "learning_rate": 9.987403008763105e-06, "loss": 3.3723, "step": 114800 }, { "epoch": 0.11314557505733629, "grad_norm": 2.459618330001831, "learning_rate": 9.987392028336954e-06, "loss": 3.3847, "step": 114850 }, { "epoch": 0.11319483303515837, "grad_norm": 2.676130533218384, "learning_rate": 9.987381043133272e-06, "loss": 3.4079, "step": 114900 }, { "epoch": 0.11324409101298047, "grad_norm": 2.4563794136047363, "learning_rate": 9.98737005315207e-06, "loss": 3.419, "step": 114950 }, { "epoch": 0.11329334899080255, "grad_norm": 2.5015296936035156, "learning_rate": 9.987359058393357e-06, "loss": 3.3676, "step": 115000 }, { "epoch": 0.11334260696862464, "grad_norm": 2.5444447994232178, "learning_rate": 9.987348058857145e-06, "loss": 3.3635, "step": 115050 }, { "epoch": 0.11339186494644672, "grad_norm": 2.5149121284484863, "learning_rate": 9.987337054543445e-06, "loss": 3.49, "step": 115100 }, { "epoch": 0.11344112292426882, "grad_norm": 2.5545473098754883, "learning_rate": 9.987326045452265e-06, "loss": 3.3597, "step": 115150 }, { "epoch": 0.1134903809020909, "grad_norm": 2.407400608062744, "learning_rate": 9.987315031583617e-06, "loss": 3.4364, "step": 115200 }, { "epoch": 0.11353963887991299, "grad_norm": 2.4975545406341553, "learning_rate": 9.987304012937513e-06, "loss": 3.3888, "step": 115250 }, { "epoch": 0.11358889685773507, "grad_norm": 2.6385886669158936, "learning_rate": 9.987292989513962e-06, "loss": 3.3589, "step": 115300 }, { "epoch": 0.11363815483555717, "grad_norm": 2.5099990367889404, "learning_rate": 9.987281961312975e-06, "loss": 3.3985, "step": 115350 }, { "epoch": 0.11368741281337925, "grad_norm": 2.9271302223205566, "learning_rate": 9.987270928334562e-06, "loss": 3.4282, "step": 115400 }, { "epoch": 0.11373667079120134, "grad_norm": 2.9757797718048096, "learning_rate": 9.987259890578736e-06, "loss": 3.4637, "step": 115450 }, { "epoch": 0.11378592876902344, "grad_norm": 2.576037883758545, "learning_rate": 9.987248848045503e-06, "loss": 3.3522, "step": 115500 }, { "epoch": 0.11383518674684552, "grad_norm": 3.0438504219055176, "learning_rate": 9.987237800734876e-06, "loss": 3.4641, "step": 115550 }, { "epoch": 0.1138844447246676, "grad_norm": 2.401563882827759, "learning_rate": 9.987226748646868e-06, "loss": 3.3784, "step": 115600 }, { "epoch": 0.11393370270248969, "grad_norm": 2.865088939666748, "learning_rate": 9.987215691781485e-06, "loss": 3.3725, "step": 115650 }, { "epoch": 0.11398296068031179, "grad_norm": 2.649177074432373, "learning_rate": 9.987204630138742e-06, "loss": 3.401, "step": 115700 }, { "epoch": 0.11403221865813387, "grad_norm": 2.3352818489074707, "learning_rate": 9.987193563718645e-06, "loss": 3.3696, "step": 115750 }, { "epoch": 0.11408147663595596, "grad_norm": 2.5297980308532715, "learning_rate": 9.987182492521211e-06, "loss": 3.3961, "step": 115800 }, { "epoch": 0.11413073461377805, "grad_norm": 2.8760290145874023, "learning_rate": 9.987171416546443e-06, "loss": 3.3294, "step": 115850 }, { "epoch": 0.11417999259160014, "grad_norm": 2.4567251205444336, "learning_rate": 9.987160335794357e-06, "loss": 3.3969, "step": 115900 }, { "epoch": 0.11422925056942222, "grad_norm": 2.4522342681884766, "learning_rate": 9.98714925026496e-06, "loss": 3.3311, "step": 115950 }, { "epoch": 0.1142785085472443, "grad_norm": 2.6093015670776367, "learning_rate": 9.987138159958268e-06, "loss": 3.3944, "step": 116000 }, { "epoch": 0.1143277665250664, "grad_norm": 2.5351269245147705, "learning_rate": 9.987127064874284e-06, "loss": 3.4455, "step": 116050 }, { "epoch": 0.11437702450288849, "grad_norm": 2.4875807762145996, "learning_rate": 9.987115965013026e-06, "loss": 3.4511, "step": 116100 }, { "epoch": 0.11442628248071057, "grad_norm": 2.4287095069885254, "learning_rate": 9.9871048603745e-06, "loss": 3.3922, "step": 116150 }, { "epoch": 0.11447554045853267, "grad_norm": 2.602886438369751, "learning_rate": 9.987093750958719e-06, "loss": 3.3649, "step": 116200 }, { "epoch": 0.11452479843635475, "grad_norm": 2.7385787963867188, "learning_rate": 9.987082636765692e-06, "loss": 3.3948, "step": 116250 }, { "epoch": 0.11457405641417684, "grad_norm": 2.4290661811828613, "learning_rate": 9.98707151779543e-06, "loss": 3.3955, "step": 116300 }, { "epoch": 0.11462331439199892, "grad_norm": 2.3842856884002686, "learning_rate": 9.987060394047945e-06, "loss": 3.3444, "step": 116350 }, { "epoch": 0.11467257236982102, "grad_norm": 2.7101097106933594, "learning_rate": 9.987049265523245e-06, "loss": 3.3841, "step": 116400 }, { "epoch": 0.1147218303476431, "grad_norm": 2.384512424468994, "learning_rate": 9.987038132221345e-06, "loss": 3.4431, "step": 116450 }, { "epoch": 0.11477108832546519, "grad_norm": 2.711897373199463, "learning_rate": 9.98702699414225e-06, "loss": 3.4281, "step": 116500 }, { "epoch": 0.11482034630328727, "grad_norm": 3.0361146926879883, "learning_rate": 9.987015851285976e-06, "loss": 3.3605, "step": 116550 }, { "epoch": 0.11486960428110937, "grad_norm": 2.562206506729126, "learning_rate": 9.987004703652531e-06, "loss": 3.4053, "step": 116600 }, { "epoch": 0.11491886225893146, "grad_norm": 2.438856601715088, "learning_rate": 9.986993551241926e-06, "loss": 3.4045, "step": 116650 }, { "epoch": 0.11496812023675354, "grad_norm": 2.4061732292175293, "learning_rate": 9.98698239405417e-06, "loss": 3.4771, "step": 116700 }, { "epoch": 0.11501737821457564, "grad_norm": 2.3539230823516846, "learning_rate": 9.986971232089277e-06, "loss": 3.4231, "step": 116750 }, { "epoch": 0.11506663619239772, "grad_norm": 2.4087886810302734, "learning_rate": 9.986960065347255e-06, "loss": 3.4331, "step": 116800 }, { "epoch": 0.1151158941702198, "grad_norm": 2.420719861984253, "learning_rate": 9.986948893828116e-06, "loss": 3.351, "step": 116850 }, { "epoch": 0.11516515214804189, "grad_norm": 2.4583969116210938, "learning_rate": 9.986937717531873e-06, "loss": 3.3334, "step": 116900 }, { "epoch": 0.11521441012586399, "grad_norm": 2.504702568054199, "learning_rate": 9.986926536458531e-06, "loss": 3.3842, "step": 116950 }, { "epoch": 0.11526366810368607, "grad_norm": 2.506558895111084, "learning_rate": 9.986915350608104e-06, "loss": 3.3571, "step": 117000 }, { "epoch": 0.11531292608150816, "grad_norm": 2.288512706756592, "learning_rate": 9.986904159980604e-06, "loss": 3.4083, "step": 117050 }, { "epoch": 0.11536218405933026, "grad_norm": 2.3676340579986572, "learning_rate": 9.98689296457604e-06, "loss": 3.3842, "step": 117100 }, { "epoch": 0.11541144203715234, "grad_norm": 2.7400388717651367, "learning_rate": 9.986881764394423e-06, "loss": 3.3377, "step": 117150 }, { "epoch": 0.11546070001497442, "grad_norm": 2.4120335578918457, "learning_rate": 9.986870559435763e-06, "loss": 3.4129, "step": 117200 }, { "epoch": 0.11550995799279651, "grad_norm": 2.4424989223480225, "learning_rate": 9.986859349700073e-06, "loss": 3.3205, "step": 117250 }, { "epoch": 0.1155592159706186, "grad_norm": 2.194718360900879, "learning_rate": 9.986848135187362e-06, "loss": 3.3958, "step": 117300 }, { "epoch": 0.11560847394844069, "grad_norm": 2.450803518295288, "learning_rate": 9.98683691589764e-06, "loss": 3.3719, "step": 117350 }, { "epoch": 0.11565773192626277, "grad_norm": 2.6813697814941406, "learning_rate": 9.98682569183092e-06, "loss": 3.3923, "step": 117400 }, { "epoch": 0.11570698990408486, "grad_norm": 2.2688100337982178, "learning_rate": 9.98681446298721e-06, "loss": 3.4133, "step": 117450 }, { "epoch": 0.11575624788190696, "grad_norm": 2.422471284866333, "learning_rate": 9.986803229366523e-06, "loss": 3.4232, "step": 117500 }, { "epoch": 0.11580550585972904, "grad_norm": 2.672807216644287, "learning_rate": 9.986791990968869e-06, "loss": 3.3791, "step": 117550 }, { "epoch": 0.11585476383755113, "grad_norm": 2.564429521560669, "learning_rate": 9.986780747794258e-06, "loss": 3.3959, "step": 117600 }, { "epoch": 0.11590402181537322, "grad_norm": 2.615173816680908, "learning_rate": 9.986769499842703e-06, "loss": 3.4127, "step": 117650 }, { "epoch": 0.11595327979319531, "grad_norm": 2.2499983310699463, "learning_rate": 9.986758247114213e-06, "loss": 3.4047, "step": 117700 }, { "epoch": 0.11600253777101739, "grad_norm": 2.393465042114258, "learning_rate": 9.986746989608797e-06, "loss": 3.4065, "step": 117750 }, { "epoch": 0.11605179574883948, "grad_norm": 2.6967051029205322, "learning_rate": 9.986735727326471e-06, "loss": 3.3891, "step": 117800 }, { "epoch": 0.11610105372666157, "grad_norm": 2.344642162322998, "learning_rate": 9.98672446026724e-06, "loss": 3.3676, "step": 117850 }, { "epoch": 0.11615031170448366, "grad_norm": 2.5381789207458496, "learning_rate": 9.986713188431118e-06, "loss": 3.4245, "step": 117900 }, { "epoch": 0.11619956968230574, "grad_norm": 2.7226550579071045, "learning_rate": 9.986701911818118e-06, "loss": 3.3739, "step": 117950 }, { "epoch": 0.11624882766012784, "grad_norm": 2.496734142303467, "learning_rate": 9.986690630428245e-06, "loss": 3.4016, "step": 118000 }, { "epoch": 0.11629808563794992, "grad_norm": 2.3350565433502197, "learning_rate": 9.986679344261515e-06, "loss": 3.3929, "step": 118050 }, { "epoch": 0.11634734361577201, "grad_norm": 2.6802561283111572, "learning_rate": 9.986668053317933e-06, "loss": 3.3455, "step": 118100 }, { "epoch": 0.11639660159359409, "grad_norm": 2.4967687129974365, "learning_rate": 9.986656757597517e-06, "loss": 3.3472, "step": 118150 }, { "epoch": 0.11644585957141619, "grad_norm": 2.9904658794403076, "learning_rate": 9.986645457100273e-06, "loss": 3.3658, "step": 118200 }, { "epoch": 0.11649511754923828, "grad_norm": 2.658781051635742, "learning_rate": 9.986634151826214e-06, "loss": 3.4207, "step": 118250 }, { "epoch": 0.11654437552706036, "grad_norm": 2.4548611640930176, "learning_rate": 9.98662284177535e-06, "loss": 3.3874, "step": 118300 }, { "epoch": 0.11659363350488246, "grad_norm": 2.3201584815979004, "learning_rate": 9.986611526947691e-06, "loss": 3.3915, "step": 118350 }, { "epoch": 0.11664289148270454, "grad_norm": 2.2402336597442627, "learning_rate": 9.986600207343249e-06, "loss": 3.3725, "step": 118400 }, { "epoch": 0.11669214946052663, "grad_norm": 2.5013039112091064, "learning_rate": 9.986588882962034e-06, "loss": 3.3552, "step": 118450 }, { "epoch": 0.11674140743834871, "grad_norm": 2.4952564239501953, "learning_rate": 9.986577553804058e-06, "loss": 3.406, "step": 118500 }, { "epoch": 0.11679066541617081, "grad_norm": 2.4043290615081787, "learning_rate": 9.98656621986933e-06, "loss": 3.3593, "step": 118550 }, { "epoch": 0.11683992339399289, "grad_norm": 2.5888142585754395, "learning_rate": 9.986554881157864e-06, "loss": 3.3795, "step": 118600 }, { "epoch": 0.11688918137181498, "grad_norm": 2.7791879177093506, "learning_rate": 9.986543537669667e-06, "loss": 3.4292, "step": 118650 }, { "epoch": 0.11693843934963706, "grad_norm": 2.4515879154205322, "learning_rate": 9.986532189404754e-06, "loss": 3.3872, "step": 118700 }, { "epoch": 0.11698769732745916, "grad_norm": 2.5687601566314697, "learning_rate": 9.986520836363132e-06, "loss": 3.4178, "step": 118750 }, { "epoch": 0.11703695530528124, "grad_norm": 2.5656723976135254, "learning_rate": 9.986509478544812e-06, "loss": 3.4869, "step": 118800 }, { "epoch": 0.11708621328310333, "grad_norm": 2.6637625694274902, "learning_rate": 9.98649811594981e-06, "loss": 3.3672, "step": 118850 }, { "epoch": 0.11713547126092543, "grad_norm": 2.489666223526001, "learning_rate": 9.98648674857813e-06, "loss": 3.4695, "step": 118900 }, { "epoch": 0.11718472923874751, "grad_norm": 2.4020326137542725, "learning_rate": 9.986475376429787e-06, "loss": 3.4194, "step": 118950 }, { "epoch": 0.1172339872165696, "grad_norm": 2.356001615524292, "learning_rate": 9.986463999504792e-06, "loss": 3.4037, "step": 119000 }, { "epoch": 0.11728324519439168, "grad_norm": 2.5342743396759033, "learning_rate": 9.986452617803155e-06, "loss": 3.3623, "step": 119050 }, { "epoch": 0.11733250317221378, "grad_norm": 2.199453353881836, "learning_rate": 9.986441231324886e-06, "loss": 3.3526, "step": 119100 }, { "epoch": 0.11738176115003586, "grad_norm": 2.533907651901245, "learning_rate": 9.986429840069998e-06, "loss": 3.3416, "step": 119150 }, { "epoch": 0.11743101912785794, "grad_norm": 2.598482608795166, "learning_rate": 9.986418444038499e-06, "loss": 3.3478, "step": 119200 }, { "epoch": 0.11748027710568004, "grad_norm": 2.2924203872680664, "learning_rate": 9.986407043230404e-06, "loss": 3.3704, "step": 119250 }, { "epoch": 0.11752953508350213, "grad_norm": 3.296250104904175, "learning_rate": 9.98639563764572e-06, "loss": 3.3679, "step": 119300 }, { "epoch": 0.11757879306132421, "grad_norm": 2.2870922088623047, "learning_rate": 9.986384227284459e-06, "loss": 3.3773, "step": 119350 }, { "epoch": 0.1176280510391463, "grad_norm": 2.4280543327331543, "learning_rate": 9.986372812146633e-06, "loss": 3.4006, "step": 119400 }, { "epoch": 0.11767730901696839, "grad_norm": 2.4473037719726562, "learning_rate": 9.986361392232252e-06, "loss": 3.3982, "step": 119450 }, { "epoch": 0.11772656699479048, "grad_norm": 2.367959976196289, "learning_rate": 9.986349967541326e-06, "loss": 3.3366, "step": 119500 }, { "epoch": 0.11777582497261256, "grad_norm": 2.499955177307129, "learning_rate": 9.98633853807387e-06, "loss": 3.3938, "step": 119550 }, { "epoch": 0.11782508295043466, "grad_norm": 2.601670980453491, "learning_rate": 9.986327103829892e-06, "loss": 3.4265, "step": 119600 }, { "epoch": 0.11787434092825674, "grad_norm": 2.3009002208709717, "learning_rate": 9.986315664809401e-06, "loss": 3.363, "step": 119650 }, { "epoch": 0.11792359890607883, "grad_norm": 2.57027268409729, "learning_rate": 9.986304221012411e-06, "loss": 3.3786, "step": 119700 }, { "epoch": 0.11797285688390091, "grad_norm": 2.846162796020508, "learning_rate": 9.98629277243893e-06, "loss": 3.3378, "step": 119750 }, { "epoch": 0.11802211486172301, "grad_norm": 2.499687671661377, "learning_rate": 9.986281319088976e-06, "loss": 3.3746, "step": 119800 }, { "epoch": 0.1180713728395451, "grad_norm": 2.413245439529419, "learning_rate": 9.98626986096255e-06, "loss": 3.3939, "step": 119850 }, { "epoch": 0.11812063081736718, "grad_norm": 2.565244197845459, "learning_rate": 9.986258398059669e-06, "loss": 3.3749, "step": 119900 }, { "epoch": 0.11816988879518926, "grad_norm": 2.9420268535614014, "learning_rate": 9.986246930380346e-06, "loss": 3.3252, "step": 119950 }, { "epoch": 0.11821914677301136, "grad_norm": 2.5591540336608887, "learning_rate": 9.986235457924585e-06, "loss": 3.3613, "step": 120000 }, { "epoch": 0.11826840475083344, "grad_norm": 2.420945644378662, "learning_rate": 9.986223980692404e-06, "loss": 3.4282, "step": 120050 }, { "epoch": 0.11831766272865553, "grad_norm": 2.4914259910583496, "learning_rate": 9.986212498683808e-06, "loss": 3.333, "step": 120100 }, { "epoch": 0.11836692070647763, "grad_norm": 2.4967002868652344, "learning_rate": 9.986201011898813e-06, "loss": 3.411, "step": 120150 }, { "epoch": 0.11841617868429971, "grad_norm": 2.440558433532715, "learning_rate": 9.986189520337428e-06, "loss": 3.358, "step": 120200 }, { "epoch": 0.1184654366621218, "grad_norm": 2.6565065383911133, "learning_rate": 9.986178023999661e-06, "loss": 3.3219, "step": 120250 }, { "epoch": 0.11851469463994388, "grad_norm": 2.4349400997161865, "learning_rate": 9.986166522885529e-06, "loss": 3.412, "step": 120300 }, { "epoch": 0.11856395261776598, "grad_norm": 2.342496395111084, "learning_rate": 9.986155016995038e-06, "loss": 3.3634, "step": 120350 }, { "epoch": 0.11861321059558806, "grad_norm": 2.357039213180542, "learning_rate": 9.986143506328203e-06, "loss": 3.3296, "step": 120400 }, { "epoch": 0.11866246857341015, "grad_norm": 2.4615566730499268, "learning_rate": 9.986131990885031e-06, "loss": 3.4237, "step": 120450 }, { "epoch": 0.11871172655123224, "grad_norm": 2.640629768371582, "learning_rate": 9.986120470665537e-06, "loss": 3.3853, "step": 120500 }, { "epoch": 0.11876098452905433, "grad_norm": 3.8036375045776367, "learning_rate": 9.986108945669728e-06, "loss": 3.4622, "step": 120550 }, { "epoch": 0.11881024250687641, "grad_norm": 2.3227899074554443, "learning_rate": 9.986097415897618e-06, "loss": 3.3347, "step": 120600 }, { "epoch": 0.1188595004846985, "grad_norm": 2.4758241176605225, "learning_rate": 9.986085881349216e-06, "loss": 3.3862, "step": 120650 }, { "epoch": 0.1189087584625206, "grad_norm": 2.5483996868133545, "learning_rate": 9.986074342024536e-06, "loss": 3.3601, "step": 120700 }, { "epoch": 0.11895801644034268, "grad_norm": 2.3937344551086426, "learning_rate": 9.986062797923587e-06, "loss": 3.3481, "step": 120750 }, { "epoch": 0.11900727441816476, "grad_norm": 2.416670560836792, "learning_rate": 9.986051249046379e-06, "loss": 3.3648, "step": 120800 }, { "epoch": 0.11905653239598685, "grad_norm": 2.3983354568481445, "learning_rate": 9.986039695392925e-06, "loss": 3.3242, "step": 120850 }, { "epoch": 0.11910579037380895, "grad_norm": 2.3442978858947754, "learning_rate": 9.986028136963234e-06, "loss": 3.3946, "step": 120900 }, { "epoch": 0.11915504835163103, "grad_norm": 2.420311689376831, "learning_rate": 9.986016573757322e-06, "loss": 3.3143, "step": 120950 }, { "epoch": 0.11920430632945311, "grad_norm": 2.454395055770874, "learning_rate": 9.986005005775195e-06, "loss": 3.3566, "step": 121000 }, { "epoch": 0.11925356430727521, "grad_norm": 2.85882306098938, "learning_rate": 9.985993433016864e-06, "loss": 3.3512, "step": 121050 }, { "epoch": 0.1193028222850973, "grad_norm": 2.697298049926758, "learning_rate": 9.985981855482343e-06, "loss": 3.3747, "step": 121100 }, { "epoch": 0.11935208026291938, "grad_norm": 3.051520347595215, "learning_rate": 9.985970273171641e-06, "loss": 3.3738, "step": 121150 }, { "epoch": 0.11940133824074146, "grad_norm": Infinity, "learning_rate": 9.985958686084772e-06, "loss": 3.424, "step": 121200 }, { "epoch": 0.11945059621856356, "grad_norm": 2.6462533473968506, "learning_rate": 9.985947094221743e-06, "loss": 3.3941, "step": 121250 }, { "epoch": 0.11949985419638565, "grad_norm": 2.2422988414764404, "learning_rate": 9.985935497582567e-06, "loss": 3.3592, "step": 121300 }, { "epoch": 0.11954911217420773, "grad_norm": 2.5982553958892822, "learning_rate": 9.985923896167257e-06, "loss": 3.3838, "step": 121350 }, { "epoch": 0.11959837015202983, "grad_norm": 2.4534506797790527, "learning_rate": 9.98591228997582e-06, "loss": 3.3209, "step": 121400 }, { "epoch": 0.11964762812985191, "grad_norm": 2.539276361465454, "learning_rate": 9.985900679008272e-06, "loss": 3.3532, "step": 121450 }, { "epoch": 0.119696886107674, "grad_norm": 2.7114434242248535, "learning_rate": 9.98588906326462e-06, "loss": 3.3845, "step": 121500 }, { "epoch": 0.11974614408549608, "grad_norm": 2.4282960891723633, "learning_rate": 9.985877442744876e-06, "loss": 3.387, "step": 121550 }, { "epoch": 0.11979540206331818, "grad_norm": 2.381016492843628, "learning_rate": 9.985865817449054e-06, "loss": 3.3797, "step": 121600 }, { "epoch": 0.11984466004114026, "grad_norm": 2.7338695526123047, "learning_rate": 9.985854187377162e-06, "loss": 3.4115, "step": 121650 }, { "epoch": 0.11989391801896235, "grad_norm": 2.306929111480713, "learning_rate": 9.98584255252921e-06, "loss": 3.308, "step": 121700 }, { "epoch": 0.11994317599678445, "grad_norm": 2.5156071186065674, "learning_rate": 9.985830912905214e-06, "loss": 3.3536, "step": 121750 }, { "epoch": 0.11999243397460653, "grad_norm": 2.9159653186798096, "learning_rate": 9.98581926850518e-06, "loss": 3.3338, "step": 121800 }, { "epoch": 0.12004169195242861, "grad_norm": 2.4436471462249756, "learning_rate": 9.985807619329125e-06, "loss": 3.3451, "step": 121850 }, { "epoch": 0.1200909499302507, "grad_norm": 2.936408281326294, "learning_rate": 9.985795965377055e-06, "loss": 3.409, "step": 121900 }, { "epoch": 0.1201402079080728, "grad_norm": 2.6326308250427246, "learning_rate": 9.985784306648983e-06, "loss": 3.3498, "step": 121950 }, { "epoch": 0.12018946588589488, "grad_norm": 2.7693350315093994, "learning_rate": 9.985772643144919e-06, "loss": 3.4038, "step": 122000 }, { "epoch": 0.12023872386371696, "grad_norm": 2.5912423133850098, "learning_rate": 9.985760974864877e-06, "loss": 3.3555, "step": 122050 }, { "epoch": 0.12028798184153905, "grad_norm": 2.664357900619507, "learning_rate": 9.985749301808865e-06, "loss": 3.4534, "step": 122100 }, { "epoch": 0.12033723981936115, "grad_norm": 2.665661573410034, "learning_rate": 9.985737623976896e-06, "loss": 3.4263, "step": 122150 }, { "epoch": 0.12038649779718323, "grad_norm": 2.4949114322662354, "learning_rate": 9.985725941368982e-06, "loss": 3.3545, "step": 122200 }, { "epoch": 0.12043575577500532, "grad_norm": 2.5493500232696533, "learning_rate": 9.98571425398513e-06, "loss": 3.3484, "step": 122250 }, { "epoch": 0.12048501375282741, "grad_norm": 2.5368669033050537, "learning_rate": 9.985702561825357e-06, "loss": 3.3268, "step": 122300 }, { "epoch": 0.1205342717306495, "grad_norm": 2.389108180999756, "learning_rate": 9.98569086488967e-06, "loss": 3.3515, "step": 122350 }, { "epoch": 0.12058352970847158, "grad_norm": 2.8298611640930176, "learning_rate": 9.985679163178081e-06, "loss": 3.3818, "step": 122400 }, { "epoch": 0.12063278768629367, "grad_norm": 2.499889373779297, "learning_rate": 9.985667456690603e-06, "loss": 3.3564, "step": 122450 }, { "epoch": 0.12068204566411576, "grad_norm": 2.4032227993011475, "learning_rate": 9.985655745427246e-06, "loss": 3.3711, "step": 122500 }, { "epoch": 0.12073130364193785, "grad_norm": 2.599724292755127, "learning_rate": 9.985644029388022e-06, "loss": 3.2988, "step": 122550 }, { "epoch": 0.12078056161975993, "grad_norm": 2.9838271141052246, "learning_rate": 9.98563230857294e-06, "loss": 3.3695, "step": 122600 }, { "epoch": 0.12082981959758203, "grad_norm": 2.3377504348754883, "learning_rate": 9.985620582982011e-06, "loss": 3.3895, "step": 122650 }, { "epoch": 0.12087907757540411, "grad_norm": 2.69498610496521, "learning_rate": 9.985608852615252e-06, "loss": 3.3382, "step": 122700 }, { "epoch": 0.1209283355532262, "grad_norm": 3.016826868057251, "learning_rate": 9.985597117472667e-06, "loss": 3.3315, "step": 122750 }, { "epoch": 0.12097759353104828, "grad_norm": 2.6515443325042725, "learning_rate": 9.985585377554272e-06, "loss": 3.4221, "step": 122800 }, { "epoch": 0.12102685150887038, "grad_norm": 2.4090754985809326, "learning_rate": 9.985573632860075e-06, "loss": 3.3556, "step": 122850 }, { "epoch": 0.12107610948669247, "grad_norm": 2.5005359649658203, "learning_rate": 9.98556188339009e-06, "loss": 3.3568, "step": 122900 }, { "epoch": 0.12112536746451455, "grad_norm": 2.7486608028411865, "learning_rate": 9.985550129144326e-06, "loss": 3.3633, "step": 122950 }, { "epoch": 0.12117462544233665, "grad_norm": 2.646340847015381, "learning_rate": 9.985538370122796e-06, "loss": 3.4326, "step": 123000 }, { "epoch": 0.12122388342015873, "grad_norm": 2.5295801162719727, "learning_rate": 9.985526606325512e-06, "loss": 3.4044, "step": 123050 }, { "epoch": 0.12127314139798082, "grad_norm": 2.5088963508605957, "learning_rate": 9.985514837752483e-06, "loss": 3.3727, "step": 123100 }, { "epoch": 0.1213223993758029, "grad_norm": 2.481874465942383, "learning_rate": 9.985503064403721e-06, "loss": 3.3439, "step": 123150 }, { "epoch": 0.121371657353625, "grad_norm": 2.664064645767212, "learning_rate": 9.985491286279238e-06, "loss": 3.3924, "step": 123200 }, { "epoch": 0.12142091533144708, "grad_norm": 2.445732593536377, "learning_rate": 9.985479503379043e-06, "loss": 3.3778, "step": 123250 }, { "epoch": 0.12147017330926917, "grad_norm": 2.5974678993225098, "learning_rate": 9.985467715703151e-06, "loss": 3.37, "step": 123300 }, { "epoch": 0.12151943128709125, "grad_norm": 2.5868520736694336, "learning_rate": 9.98545592325157e-06, "loss": 3.3612, "step": 123350 }, { "epoch": 0.12156868926491335, "grad_norm": 2.523359537124634, "learning_rate": 9.985444126024313e-06, "loss": 3.356, "step": 123400 }, { "epoch": 0.12161794724273543, "grad_norm": 2.445063829421997, "learning_rate": 9.985432324021392e-06, "loss": 3.3267, "step": 123450 }, { "epoch": 0.12166720522055752, "grad_norm": 2.591714859008789, "learning_rate": 9.985420517242816e-06, "loss": 3.4095, "step": 123500 }, { "epoch": 0.12171646319837962, "grad_norm": 2.659104347229004, "learning_rate": 9.985408705688598e-06, "loss": 3.4179, "step": 123550 }, { "epoch": 0.1217657211762017, "grad_norm": 2.5036659240722656, "learning_rate": 9.985396889358748e-06, "loss": 3.3967, "step": 123600 }, { "epoch": 0.12181497915402378, "grad_norm": 2.3203115463256836, "learning_rate": 9.98538506825328e-06, "loss": 3.4213, "step": 123650 }, { "epoch": 0.12186423713184587, "grad_norm": 2.3608579635620117, "learning_rate": 9.985373242372202e-06, "loss": 3.3931, "step": 123700 }, { "epoch": 0.12191349510966797, "grad_norm": 2.6284995079040527, "learning_rate": 9.985361411715527e-06, "loss": 3.3248, "step": 123750 }, { "epoch": 0.12196275308749005, "grad_norm": 2.335951089859009, "learning_rate": 9.985349576283267e-06, "loss": 3.4039, "step": 123800 }, { "epoch": 0.12201201106531213, "grad_norm": 2.4036426544189453, "learning_rate": 9.985337736075431e-06, "loss": 3.3789, "step": 123850 }, { "epoch": 0.12206126904313423, "grad_norm": 2.565584897994995, "learning_rate": 9.985325891092033e-06, "loss": 3.4236, "step": 123900 }, { "epoch": 0.12211052702095632, "grad_norm": 2.6063807010650635, "learning_rate": 9.985314041333083e-06, "loss": 3.4226, "step": 123950 }, { "epoch": 0.1221597849987784, "grad_norm": 2.611232280731201, "learning_rate": 9.985302186798593e-06, "loss": 3.3117, "step": 124000 }, { "epoch": 0.12220904297660048, "grad_norm": 2.4905476570129395, "learning_rate": 9.985290327488572e-06, "loss": 3.365, "step": 124050 }, { "epoch": 0.12225830095442258, "grad_norm": 2.5467653274536133, "learning_rate": 9.985278463403034e-06, "loss": 3.3794, "step": 124100 }, { "epoch": 0.12230755893224467, "grad_norm": 2.6284213066101074, "learning_rate": 9.985266594541991e-06, "loss": 3.3888, "step": 124150 }, { "epoch": 0.12235681691006675, "grad_norm": 2.4711623191833496, "learning_rate": 9.985254720905454e-06, "loss": 3.3722, "step": 124200 }, { "epoch": 0.12240607488788885, "grad_norm": 3.0786995887756348, "learning_rate": 9.98524284249343e-06, "loss": 3.4041, "step": 124250 }, { "epoch": 0.12245533286571093, "grad_norm": 2.5038070678710938, "learning_rate": 9.985230959305935e-06, "loss": 3.3604, "step": 124300 }, { "epoch": 0.12250459084353302, "grad_norm": 2.5674710273742676, "learning_rate": 9.98521907134298e-06, "loss": 3.4048, "step": 124350 }, { "epoch": 0.1225538488213551, "grad_norm": 2.59773325920105, "learning_rate": 9.985207178604574e-06, "loss": 3.4055, "step": 124400 }, { "epoch": 0.1226031067991772, "grad_norm": 2.373234272003174, "learning_rate": 9.98519528109073e-06, "loss": 3.3459, "step": 124450 }, { "epoch": 0.12265236477699928, "grad_norm": 2.448775291442871, "learning_rate": 9.985183378801461e-06, "loss": 3.3651, "step": 124500 }, { "epoch": 0.12270162275482137, "grad_norm": 2.51942777633667, "learning_rate": 9.985171471736775e-06, "loss": 3.4245, "step": 124550 }, { "epoch": 0.12275088073264345, "grad_norm": 2.5790319442749023, "learning_rate": 9.985159559896686e-06, "loss": 3.4078, "step": 124600 }, { "epoch": 0.12280013871046555, "grad_norm": 2.4631943702697754, "learning_rate": 9.985147643281205e-06, "loss": 3.3464, "step": 124650 }, { "epoch": 0.12284939668828763, "grad_norm": 2.5700247287750244, "learning_rate": 9.985135721890342e-06, "loss": 3.3293, "step": 124700 }, { "epoch": 0.12289865466610972, "grad_norm": 2.6977152824401855, "learning_rate": 9.98512379572411e-06, "loss": 3.286, "step": 124750 }, { "epoch": 0.12294791264393182, "grad_norm": 2.402130365371704, "learning_rate": 9.98511186478252e-06, "loss": 3.4421, "step": 124800 }, { "epoch": 0.1229971706217539, "grad_norm": 2.6723873615264893, "learning_rate": 9.985099929065582e-06, "loss": 3.3605, "step": 124850 }, { "epoch": 0.12304642859957599, "grad_norm": 2.8979437351226807, "learning_rate": 9.98508798857331e-06, "loss": 3.3561, "step": 124900 }, { "epoch": 0.12309568657739807, "grad_norm": 2.2580835819244385, "learning_rate": 9.985076043305713e-06, "loss": 3.3198, "step": 124950 }, { "epoch": 0.12314494455522017, "grad_norm": 2.860356330871582, "learning_rate": 9.985064093262803e-06, "loss": 3.325, "step": 125000 }, { "epoch": 0.12319420253304225, "grad_norm": 2.4417760372161865, "learning_rate": 9.985052138444593e-06, "loss": 3.3606, "step": 125050 }, { "epoch": 0.12324346051086434, "grad_norm": 2.25171160697937, "learning_rate": 9.985040178851093e-06, "loss": 3.3449, "step": 125100 }, { "epoch": 0.12329271848868643, "grad_norm": 2.885326623916626, "learning_rate": 9.985028214482315e-06, "loss": 3.4114, "step": 125150 }, { "epoch": 0.12334197646650852, "grad_norm": 2.3991408348083496, "learning_rate": 9.98501624533827e-06, "loss": 3.3933, "step": 125200 }, { "epoch": 0.1233912344443306, "grad_norm": 2.696899890899658, "learning_rate": 9.98500427141897e-06, "loss": 3.348, "step": 125250 }, { "epoch": 0.12344049242215269, "grad_norm": 2.531999111175537, "learning_rate": 9.984992292724427e-06, "loss": 3.4239, "step": 125300 }, { "epoch": 0.12348975039997478, "grad_norm": 2.6619131565093994, "learning_rate": 9.98498030925465e-06, "loss": 3.3471, "step": 125350 }, { "epoch": 0.12353900837779687, "grad_norm": 2.84726881980896, "learning_rate": 9.984968321009654e-06, "loss": 3.3269, "step": 125400 }, { "epoch": 0.12358826635561895, "grad_norm": 2.5031566619873047, "learning_rate": 9.984956327989448e-06, "loss": 3.3903, "step": 125450 }, { "epoch": 0.12363752433344104, "grad_norm": 2.4966518878936768, "learning_rate": 9.984944330194045e-06, "loss": 3.345, "step": 125500 }, { "epoch": 0.12368678231126314, "grad_norm": 2.308844804763794, "learning_rate": 9.984932327623454e-06, "loss": 3.4259, "step": 125550 }, { "epoch": 0.12373604028908522, "grad_norm": 2.545069694519043, "learning_rate": 9.984920320277689e-06, "loss": 3.4161, "step": 125600 }, { "epoch": 0.1237852982669073, "grad_norm": 2.506284713745117, "learning_rate": 9.984908308156763e-06, "loss": 3.3083, "step": 125650 }, { "epoch": 0.1238345562447294, "grad_norm": 2.846367835998535, "learning_rate": 9.984896291260682e-06, "loss": 3.4033, "step": 125700 }, { "epoch": 0.12388381422255149, "grad_norm": 2.473738670349121, "learning_rate": 9.984884269589462e-06, "loss": 3.3572, "step": 125750 }, { "epoch": 0.12393307220037357, "grad_norm": 2.783252716064453, "learning_rate": 9.984872243143111e-06, "loss": 3.3345, "step": 125800 }, { "epoch": 0.12398233017819565, "grad_norm": 2.513139486312866, "learning_rate": 9.984860211921645e-06, "loss": 3.3149, "step": 125850 }, { "epoch": 0.12403158815601775, "grad_norm": 2.4643876552581787, "learning_rate": 9.984848175925073e-06, "loss": 3.3681, "step": 125900 }, { "epoch": 0.12408084613383984, "grad_norm": 2.3363003730773926, "learning_rate": 9.984836135153409e-06, "loss": 3.414, "step": 125950 }, { "epoch": 0.12413010411166192, "grad_norm": 2.3982656002044678, "learning_rate": 9.984824089606659e-06, "loss": 3.311, "step": 126000 }, { "epoch": 0.12417936208948402, "grad_norm": 2.3449435234069824, "learning_rate": 9.984812039284838e-06, "loss": 3.3731, "step": 126050 }, { "epoch": 0.1242286200673061, "grad_norm": 2.5280776023864746, "learning_rate": 9.984799984187958e-06, "loss": 3.3904, "step": 126100 }, { "epoch": 0.12427787804512819, "grad_norm": 2.6861987113952637, "learning_rate": 9.984787924316033e-06, "loss": 3.3425, "step": 126150 }, { "epoch": 0.12432713602295027, "grad_norm": 2.347813367843628, "learning_rate": 9.984775859669067e-06, "loss": 3.4089, "step": 126200 }, { "epoch": 0.12437639400077237, "grad_norm": 2.3460347652435303, "learning_rate": 9.984763790247078e-06, "loss": 3.3517, "step": 126250 }, { "epoch": 0.12442565197859445, "grad_norm": 2.53999400138855, "learning_rate": 9.984751716050077e-06, "loss": 3.2911, "step": 126300 }, { "epoch": 0.12447490995641654, "grad_norm": 2.590735673904419, "learning_rate": 9.984739637078072e-06, "loss": 3.3965, "step": 126350 }, { "epoch": 0.12452416793423864, "grad_norm": 2.4318368434906006, "learning_rate": 9.984727553331078e-06, "loss": 3.3027, "step": 126400 }, { "epoch": 0.12457342591206072, "grad_norm": 2.2616639137268066, "learning_rate": 9.984715464809103e-06, "loss": 3.396, "step": 126450 }, { "epoch": 0.1246226838898828, "grad_norm": 2.5016982555389404, "learning_rate": 9.984703371512163e-06, "loss": 3.3572, "step": 126500 }, { "epoch": 0.12467194186770489, "grad_norm": 2.3482229709625244, "learning_rate": 9.984691273440269e-06, "loss": 3.3767, "step": 126550 }, { "epoch": 0.12472119984552699, "grad_norm": 2.5039920806884766, "learning_rate": 9.984679170593427e-06, "loss": 3.3509, "step": 126600 }, { "epoch": 0.12477045782334907, "grad_norm": 2.7355797290802, "learning_rate": 9.984667062971656e-06, "loss": 3.3252, "step": 126650 }, { "epoch": 0.12481971580117115, "grad_norm": 2.572892904281616, "learning_rate": 9.984654950574963e-06, "loss": 3.3435, "step": 126700 }, { "epoch": 0.12486897377899324, "grad_norm": 2.4872069358825684, "learning_rate": 9.984642833403361e-06, "loss": 3.354, "step": 126750 }, { "epoch": 0.12491823175681534, "grad_norm": 2.4385619163513184, "learning_rate": 9.984630711456862e-06, "loss": 3.372, "step": 126800 }, { "epoch": 0.12496748973463742, "grad_norm": 2.4161319732666016, "learning_rate": 9.984618584735477e-06, "loss": 3.4139, "step": 126850 }, { "epoch": 0.12501674771245952, "grad_norm": 2.577685594558716, "learning_rate": 9.984606453239218e-06, "loss": 3.375, "step": 126900 }, { "epoch": 0.1250660056902816, "grad_norm": 2.304260730743408, "learning_rate": 9.984594316968095e-06, "loss": 3.3388, "step": 126950 }, { "epoch": 0.1251152636681037, "grad_norm": 2.5747199058532715, "learning_rate": 9.984582175922121e-06, "loss": 3.3446, "step": 127000 }, { "epoch": 0.12516452164592579, "grad_norm": 2.859485149383545, "learning_rate": 9.98457003010131e-06, "loss": 3.3462, "step": 127050 }, { "epoch": 0.12521377962374786, "grad_norm": 2.40120267868042, "learning_rate": 9.98455787950567e-06, "loss": 3.3476, "step": 127100 }, { "epoch": 0.12526303760156995, "grad_norm": 2.2962496280670166, "learning_rate": 9.984545724135213e-06, "loss": 3.3969, "step": 127150 }, { "epoch": 0.12531229557939202, "grad_norm": 3.085458517074585, "learning_rate": 9.984533563989952e-06, "loss": 3.3507, "step": 127200 }, { "epoch": 0.12536155355721412, "grad_norm": 2.4457104206085205, "learning_rate": 9.984521399069898e-06, "loss": 3.3291, "step": 127250 }, { "epoch": 0.12541081153503622, "grad_norm": 2.6180813312530518, "learning_rate": 9.984509229375064e-06, "loss": 3.404, "step": 127300 }, { "epoch": 0.1254600695128583, "grad_norm": 2.5495731830596924, "learning_rate": 9.984497054905459e-06, "loss": 3.3685, "step": 127350 }, { "epoch": 0.1255093274906804, "grad_norm": 2.4389400482177734, "learning_rate": 9.984484875661098e-06, "loss": 3.3128, "step": 127400 }, { "epoch": 0.1255585854685025, "grad_norm": 2.565800666809082, "learning_rate": 9.984472691641989e-06, "loss": 3.2781, "step": 127450 }, { "epoch": 0.12560784344632456, "grad_norm": 2.2861902713775635, "learning_rate": 9.984460502848146e-06, "loss": 3.3634, "step": 127500 }, { "epoch": 0.12565710142414666, "grad_norm": 2.599116086959839, "learning_rate": 9.98444830927958e-06, "loss": 3.333, "step": 127550 }, { "epoch": 0.12570635940196875, "grad_norm": 2.475360155105591, "learning_rate": 9.984436110936302e-06, "loss": 3.4194, "step": 127600 }, { "epoch": 0.12575561737979082, "grad_norm": 2.8991916179656982, "learning_rate": 9.984423907818325e-06, "loss": 3.3118, "step": 127650 }, { "epoch": 0.12580487535761292, "grad_norm": 2.756822347640991, "learning_rate": 9.984411699925663e-06, "loss": 3.4449, "step": 127700 }, { "epoch": 0.12585413333543502, "grad_norm": 2.6148695945739746, "learning_rate": 9.984399487258321e-06, "loss": 3.3703, "step": 127750 }, { "epoch": 0.1259033913132571, "grad_norm": 2.5003671646118164, "learning_rate": 9.984387269816316e-06, "loss": 3.3664, "step": 127800 }, { "epoch": 0.1259526492910792, "grad_norm": 2.6351938247680664, "learning_rate": 9.984375047599659e-06, "loss": 3.2768, "step": 127850 }, { "epoch": 0.12600190726890126, "grad_norm": 2.361804246902466, "learning_rate": 9.98436282060836e-06, "loss": 3.3174, "step": 127900 }, { "epoch": 0.12605116524672336, "grad_norm": 2.313678026199341, "learning_rate": 9.984350588842433e-06, "loss": 3.3497, "step": 127950 }, { "epoch": 0.12610042322454545, "grad_norm": 2.446336030960083, "learning_rate": 9.984338352301888e-06, "loss": 3.3676, "step": 128000 }, { "epoch": 0.12614968120236753, "grad_norm": 2.4840900897979736, "learning_rate": 9.984326110986737e-06, "loss": 3.352, "step": 128050 }, { "epoch": 0.12619893918018962, "grad_norm": 2.4800477027893066, "learning_rate": 9.98431386489699e-06, "loss": 3.3736, "step": 128100 }, { "epoch": 0.12624819715801172, "grad_norm": 2.34621262550354, "learning_rate": 9.984301614032664e-06, "loss": 3.3201, "step": 128150 }, { "epoch": 0.1262974551358338, "grad_norm": 2.4865853786468506, "learning_rate": 9.984289358393765e-06, "loss": 3.394, "step": 128200 }, { "epoch": 0.1263467131136559, "grad_norm": 2.546816110610962, "learning_rate": 9.984277097980308e-06, "loss": 3.3659, "step": 128250 }, { "epoch": 0.126395971091478, "grad_norm": 2.4290060997009277, "learning_rate": 9.984264832792303e-06, "loss": 3.2442, "step": 128300 }, { "epoch": 0.12644522906930006, "grad_norm": 2.3757212162017822, "learning_rate": 9.984252562829764e-06, "loss": 3.4223, "step": 128350 }, { "epoch": 0.12649448704712216, "grad_norm": 2.5061349868774414, "learning_rate": 9.9842402880927e-06, "loss": 3.3722, "step": 128400 }, { "epoch": 0.12654374502494423, "grad_norm": 2.4515717029571533, "learning_rate": 9.984228008581125e-06, "loss": 3.3824, "step": 128450 }, { "epoch": 0.12659300300276632, "grad_norm": 2.509746551513672, "learning_rate": 9.98421572429505e-06, "loss": 3.4066, "step": 128500 }, { "epoch": 0.12664226098058842, "grad_norm": 2.514237403869629, "learning_rate": 9.984203435234485e-06, "loss": 3.3711, "step": 128550 }, { "epoch": 0.1266915189584105, "grad_norm": 2.3045802116394043, "learning_rate": 9.984191141399447e-06, "loss": 3.3603, "step": 128600 }, { "epoch": 0.1267407769362326, "grad_norm": 2.6559059619903564, "learning_rate": 9.98417884278994e-06, "loss": 3.3484, "step": 128650 }, { "epoch": 0.1267900349140547, "grad_norm": 2.2892990112304688, "learning_rate": 9.984166539405982e-06, "loss": 3.3772, "step": 128700 }, { "epoch": 0.12683929289187676, "grad_norm": 2.599705934524536, "learning_rate": 9.984154231247583e-06, "loss": 3.3043, "step": 128750 }, { "epoch": 0.12688855086969886, "grad_norm": 2.4101521968841553, "learning_rate": 9.984141918314753e-06, "loss": 3.3553, "step": 128800 }, { "epoch": 0.12693780884752096, "grad_norm": 2.4606688022613525, "learning_rate": 9.984129600607508e-06, "loss": 3.3417, "step": 128850 }, { "epoch": 0.12698706682534303, "grad_norm": 2.3904287815093994, "learning_rate": 9.984117278125855e-06, "loss": 3.3756, "step": 128900 }, { "epoch": 0.12703632480316512, "grad_norm": 2.428767204284668, "learning_rate": 9.984104950869809e-06, "loss": 3.3664, "step": 128950 }, { "epoch": 0.12708558278098722, "grad_norm": 2.3415589332580566, "learning_rate": 9.984092618839379e-06, "loss": 3.3881, "step": 129000 }, { "epoch": 0.1271348407588093, "grad_norm": 2.3242931365966797, "learning_rate": 9.98408028203458e-06, "loss": 3.4072, "step": 129050 }, { "epoch": 0.1271840987366314, "grad_norm": 2.567004442214966, "learning_rate": 9.984067940455423e-06, "loss": 3.3348, "step": 129100 }, { "epoch": 0.12723335671445346, "grad_norm": 2.415694236755371, "learning_rate": 9.984055594101918e-06, "loss": 3.4026, "step": 129150 }, { "epoch": 0.12728261469227556, "grad_norm": 2.474930763244629, "learning_rate": 9.984043242974078e-06, "loss": 3.3146, "step": 129200 }, { "epoch": 0.12733187267009766, "grad_norm": 2.440046787261963, "learning_rate": 9.984030887071915e-06, "loss": 3.3101, "step": 129250 }, { "epoch": 0.12738113064791973, "grad_norm": 2.488495111465454, "learning_rate": 9.98401852639544e-06, "loss": 3.3929, "step": 129300 }, { "epoch": 0.12743038862574183, "grad_norm": 2.4777398109436035, "learning_rate": 9.984006160944667e-06, "loss": 3.4003, "step": 129350 }, { "epoch": 0.12747964660356392, "grad_norm": 2.858377695083618, "learning_rate": 9.983993790719605e-06, "loss": 3.4004, "step": 129400 }, { "epoch": 0.127528904581386, "grad_norm": 2.243535280227661, "learning_rate": 9.98398141572027e-06, "loss": 3.3571, "step": 129450 }, { "epoch": 0.1275781625592081, "grad_norm": 2.4967029094696045, "learning_rate": 9.983969035946669e-06, "loss": 3.4008, "step": 129500 }, { "epoch": 0.1276274205370302, "grad_norm": 2.518839120864868, "learning_rate": 9.983956651398815e-06, "loss": 3.4043, "step": 129550 }, { "epoch": 0.12767667851485226, "grad_norm": 2.799847364425659, "learning_rate": 9.983944262076722e-06, "loss": 3.3867, "step": 129600 }, { "epoch": 0.12772593649267436, "grad_norm": 2.2840347290039062, "learning_rate": 9.983931867980402e-06, "loss": 3.3585, "step": 129650 }, { "epoch": 0.12777519447049643, "grad_norm": 2.584991216659546, "learning_rate": 9.983919469109863e-06, "loss": 3.3536, "step": 129700 }, { "epoch": 0.12782445244831853, "grad_norm": 2.4160144329071045, "learning_rate": 9.98390706546512e-06, "loss": 3.4074, "step": 129750 }, { "epoch": 0.12787371042614062, "grad_norm": 2.4027867317199707, "learning_rate": 9.983894657046184e-06, "loss": 3.3337, "step": 129800 }, { "epoch": 0.1279229684039627, "grad_norm": 2.308525800704956, "learning_rate": 9.98388224385307e-06, "loss": 3.3701, "step": 129850 }, { "epoch": 0.1279722263817848, "grad_norm": 2.4812049865722656, "learning_rate": 9.983869825885783e-06, "loss": 3.3853, "step": 129900 }, { "epoch": 0.1280214843596069, "grad_norm": 2.2166528701782227, "learning_rate": 9.983857403144342e-06, "loss": 3.2372, "step": 129950 }, { "epoch": 0.12807074233742896, "grad_norm": 2.6954221725463867, "learning_rate": 9.983844975628755e-06, "loss": 3.2424, "step": 130000 }, { "epoch": 0.12812000031525106, "grad_norm": 2.6420326232910156, "learning_rate": 9.983832543339035e-06, "loss": 3.3877, "step": 130050 }, { "epoch": 0.12816925829307316, "grad_norm": 2.455517292022705, "learning_rate": 9.983820106275193e-06, "loss": 3.3032, "step": 130100 }, { "epoch": 0.12821851627089523, "grad_norm": 2.4757537841796875, "learning_rate": 9.983807664437242e-06, "loss": 3.3668, "step": 130150 }, { "epoch": 0.12826777424871733, "grad_norm": 2.6717588901519775, "learning_rate": 9.983795217825194e-06, "loss": 3.3114, "step": 130200 }, { "epoch": 0.12831703222653942, "grad_norm": 2.5731923580169678, "learning_rate": 9.98378276643906e-06, "loss": 3.3721, "step": 130250 }, { "epoch": 0.1283662902043615, "grad_norm": 2.5594942569732666, "learning_rate": 9.983770310278852e-06, "loss": 3.3748, "step": 130300 }, { "epoch": 0.1284155481821836, "grad_norm": 2.729557752609253, "learning_rate": 9.983757849344581e-06, "loss": 3.3027, "step": 130350 }, { "epoch": 0.12846480616000566, "grad_norm": 2.6334259510040283, "learning_rate": 9.983745383636264e-06, "loss": 3.3768, "step": 130400 }, { "epoch": 0.12851406413782776, "grad_norm": 2.390185832977295, "learning_rate": 9.983732913153908e-06, "loss": 3.3419, "step": 130450 }, { "epoch": 0.12856332211564986, "grad_norm": 2.517690420150757, "learning_rate": 9.983720437897523e-06, "loss": 3.3187, "step": 130500 }, { "epoch": 0.12861258009347193, "grad_norm": 2.3586184978485107, "learning_rate": 9.983707957867127e-06, "loss": 3.3218, "step": 130550 }, { "epoch": 0.12866183807129403, "grad_norm": 2.5505306720733643, "learning_rate": 9.98369547306273e-06, "loss": 3.3987, "step": 130600 }, { "epoch": 0.12871109604911612, "grad_norm": 2.8113210201263428, "learning_rate": 9.983682983484339e-06, "loss": 3.3257, "step": 130650 }, { "epoch": 0.1287603540269382, "grad_norm": 2.822786331176758, "learning_rate": 9.983670489131974e-06, "loss": 3.3675, "step": 130700 }, { "epoch": 0.1288096120047603, "grad_norm": 2.6194067001342773, "learning_rate": 9.983657990005639e-06, "loss": 3.3179, "step": 130750 }, { "epoch": 0.1288588699825824, "grad_norm": 2.5005898475646973, "learning_rate": 9.983645486105353e-06, "loss": 3.3272, "step": 130800 }, { "epoch": 0.12890812796040446, "grad_norm": 2.4400925636291504, "learning_rate": 9.983632977431123e-06, "loss": 3.3259, "step": 130850 }, { "epoch": 0.12895738593822656, "grad_norm": 2.3297295570373535, "learning_rate": 9.983620463982965e-06, "loss": 3.3491, "step": 130900 }, { "epoch": 0.12900664391604863, "grad_norm": 2.541872262954712, "learning_rate": 9.983607945760886e-06, "loss": 3.3252, "step": 130950 }, { "epoch": 0.12905590189387073, "grad_norm": 2.4682912826538086, "learning_rate": 9.983595422764905e-06, "loss": 3.3592, "step": 131000 }, { "epoch": 0.12910515987169283, "grad_norm": 2.2377772331237793, "learning_rate": 9.983582894995027e-06, "loss": 3.3833, "step": 131050 }, { "epoch": 0.1291544178495149, "grad_norm": 2.6657533645629883, "learning_rate": 9.983570362451266e-06, "loss": 3.3513, "step": 131100 }, { "epoch": 0.129203675827337, "grad_norm": 2.37954044342041, "learning_rate": 9.983557825133637e-06, "loss": 3.3383, "step": 131150 }, { "epoch": 0.1292529338051591, "grad_norm": 2.6916258335113525, "learning_rate": 9.98354528304215e-06, "loss": 3.3272, "step": 131200 }, { "epoch": 0.12930219178298116, "grad_norm": 2.580704927444458, "learning_rate": 9.983532736176813e-06, "loss": 3.365, "step": 131250 }, { "epoch": 0.12935144976080326, "grad_norm": 2.2672581672668457, "learning_rate": 9.983520184537645e-06, "loss": 3.344, "step": 131300 }, { "epoch": 0.12940070773862536, "grad_norm": 2.870878219604492, "learning_rate": 9.983507628124656e-06, "loss": 3.3709, "step": 131350 }, { "epoch": 0.12944996571644743, "grad_norm": 2.403296709060669, "learning_rate": 9.983495066937854e-06, "loss": 3.2934, "step": 131400 }, { "epoch": 0.12949922369426953, "grad_norm": 2.4617629051208496, "learning_rate": 9.983482500977256e-06, "loss": 3.3159, "step": 131450 }, { "epoch": 0.12954848167209163, "grad_norm": 2.588036298751831, "learning_rate": 9.983469930242872e-06, "loss": 3.3195, "step": 131500 }, { "epoch": 0.1295977396499137, "grad_norm": 2.6028099060058594, "learning_rate": 9.983457354734714e-06, "loss": 3.3944, "step": 131550 }, { "epoch": 0.1296469976277358, "grad_norm": 3.217900037765503, "learning_rate": 9.983444774452793e-06, "loss": 3.3862, "step": 131600 }, { "epoch": 0.12969625560555786, "grad_norm": 2.891491651535034, "learning_rate": 9.983432189397122e-06, "loss": 3.3248, "step": 131650 }, { "epoch": 0.12974551358337996, "grad_norm": 2.4643173217773438, "learning_rate": 9.983419599567713e-06, "loss": 3.3493, "step": 131700 }, { "epoch": 0.12979477156120206, "grad_norm": 2.7450125217437744, "learning_rate": 9.983407004964581e-06, "loss": 3.3022, "step": 131750 }, { "epoch": 0.12984402953902413, "grad_norm": 2.49796199798584, "learning_rate": 9.983394405587732e-06, "loss": 3.4453, "step": 131800 }, { "epoch": 0.12989328751684623, "grad_norm": 2.4183664321899414, "learning_rate": 9.983381801437184e-06, "loss": 3.2929, "step": 131850 }, { "epoch": 0.12994254549466833, "grad_norm": 2.501638889312744, "learning_rate": 9.983369192512945e-06, "loss": 3.4204, "step": 131900 }, { "epoch": 0.1299918034724904, "grad_norm": 2.937954902648926, "learning_rate": 9.98335657881503e-06, "loss": 3.3738, "step": 131950 }, { "epoch": 0.1300410614503125, "grad_norm": 2.5155978202819824, "learning_rate": 9.983343960343447e-06, "loss": 3.3179, "step": 132000 }, { "epoch": 0.1300903194281346, "grad_norm": 2.299220323562622, "learning_rate": 9.983331337098213e-06, "loss": 3.3936, "step": 132050 }, { "epoch": 0.13013957740595666, "grad_norm": 2.628406286239624, "learning_rate": 9.983318709079336e-06, "loss": 3.3773, "step": 132100 }, { "epoch": 0.13018883538377876, "grad_norm": 2.6008517742156982, "learning_rate": 9.983306076286831e-06, "loss": 3.3366, "step": 132150 }, { "epoch": 0.13023809336160083, "grad_norm": 2.501002311706543, "learning_rate": 9.983293438720709e-06, "loss": 3.3073, "step": 132200 }, { "epoch": 0.13028735133942293, "grad_norm": 2.3524646759033203, "learning_rate": 9.983280796380984e-06, "loss": 3.2445, "step": 132250 }, { "epoch": 0.13033660931724503, "grad_norm": 2.5214614868164062, "learning_rate": 9.983268149267663e-06, "loss": 3.3661, "step": 132300 }, { "epoch": 0.1303858672950671, "grad_norm": 2.677401542663574, "learning_rate": 9.983255497380762e-06, "loss": 3.3722, "step": 132350 }, { "epoch": 0.1304351252728892, "grad_norm": 2.203618049621582, "learning_rate": 9.983242840720293e-06, "loss": 3.3688, "step": 132400 }, { "epoch": 0.1304843832507113, "grad_norm": 2.5540359020233154, "learning_rate": 9.983230179286268e-06, "loss": 3.3171, "step": 132450 }, { "epoch": 0.13053364122853336, "grad_norm": 2.3545377254486084, "learning_rate": 9.983217513078698e-06, "loss": 3.2852, "step": 132500 }, { "epoch": 0.13058289920635546, "grad_norm": 2.4534616470336914, "learning_rate": 9.983204842097597e-06, "loss": 3.3513, "step": 132550 }, { "epoch": 0.13063215718417756, "grad_norm": 2.4522480964660645, "learning_rate": 9.983192166342974e-06, "loss": 3.2542, "step": 132600 }, { "epoch": 0.13068141516199963, "grad_norm": 2.426788568496704, "learning_rate": 9.983179485814844e-06, "loss": 3.3133, "step": 132650 }, { "epoch": 0.13073067313982173, "grad_norm": 2.192673683166504, "learning_rate": 9.98316680051322e-06, "loss": 3.358, "step": 132700 }, { "epoch": 0.13077993111764383, "grad_norm": 2.5046803951263428, "learning_rate": 9.983154110438111e-06, "loss": 3.4099, "step": 132750 }, { "epoch": 0.1308291890954659, "grad_norm": 2.310718536376953, "learning_rate": 9.983141415589531e-06, "loss": 3.383, "step": 132800 }, { "epoch": 0.130878447073288, "grad_norm": 2.536156415939331, "learning_rate": 9.98312871596749e-06, "loss": 3.298, "step": 132850 }, { "epoch": 0.13092770505111007, "grad_norm": 2.384089946746826, "learning_rate": 9.983116011572005e-06, "loss": 3.331, "step": 132900 }, { "epoch": 0.13097696302893216, "grad_norm": 2.5198779106140137, "learning_rate": 9.983103302403082e-06, "loss": 3.3225, "step": 132950 }, { "epoch": 0.13102622100675426, "grad_norm": 2.49294376373291, "learning_rate": 9.983090588460738e-06, "loss": 3.2792, "step": 133000 }, { "epoch": 0.13107547898457633, "grad_norm": 2.8805341720581055, "learning_rate": 9.983077869744982e-06, "loss": 3.3651, "step": 133050 }, { "epoch": 0.13112473696239843, "grad_norm": 2.5350656509399414, "learning_rate": 9.983065146255829e-06, "loss": 3.311, "step": 133100 }, { "epoch": 0.13117399494022053, "grad_norm": 2.5297765731811523, "learning_rate": 9.983052417993288e-06, "loss": 3.3358, "step": 133150 }, { "epoch": 0.1312232529180426, "grad_norm": 2.3900156021118164, "learning_rate": 9.983039684957376e-06, "loss": 3.3739, "step": 133200 }, { "epoch": 0.1312725108958647, "grad_norm": 2.492724895477295, "learning_rate": 9.9830269471481e-06, "loss": 3.3252, "step": 133250 }, { "epoch": 0.1313217688736868, "grad_norm": 2.492194890975952, "learning_rate": 9.983014204565476e-06, "loss": 3.3908, "step": 133300 }, { "epoch": 0.13137102685150887, "grad_norm": 2.5742130279541016, "learning_rate": 9.983001457209513e-06, "loss": 3.2843, "step": 133350 }, { "epoch": 0.13142028482933096, "grad_norm": 2.5196924209594727, "learning_rate": 9.982988705080225e-06, "loss": 3.3609, "step": 133400 }, { "epoch": 0.13146954280715303, "grad_norm": 2.7247695922851562, "learning_rate": 9.982975948177625e-06, "loss": 3.3113, "step": 133450 }, { "epoch": 0.13151880078497513, "grad_norm": 2.998905658721924, "learning_rate": 9.982963186501723e-06, "loss": 3.3593, "step": 133500 }, { "epoch": 0.13156805876279723, "grad_norm": 2.5062928199768066, "learning_rate": 9.982950420052533e-06, "loss": 3.432, "step": 133550 }, { "epoch": 0.1316173167406193, "grad_norm": 2.3104939460754395, "learning_rate": 9.982937648830067e-06, "loss": 3.3176, "step": 133600 }, { "epoch": 0.1316665747184414, "grad_norm": 2.659116744995117, "learning_rate": 9.982924872834336e-06, "loss": 3.2904, "step": 133650 }, { "epoch": 0.1317158326962635, "grad_norm": 2.500839948654175, "learning_rate": 9.982912092065353e-06, "loss": 3.3102, "step": 133700 }, { "epoch": 0.13176509067408557, "grad_norm": 2.4487648010253906, "learning_rate": 9.982899306523132e-06, "loss": 3.2999, "step": 133750 }, { "epoch": 0.13181434865190766, "grad_norm": 2.499160051345825, "learning_rate": 9.982886516207684e-06, "loss": 3.32, "step": 133800 }, { "epoch": 0.13186360662972976, "grad_norm": 2.2931950092315674, "learning_rate": 9.98287372111902e-06, "loss": 3.3765, "step": 133850 }, { "epoch": 0.13191286460755183, "grad_norm": 2.695383310317993, "learning_rate": 9.982860921257151e-06, "loss": 3.3323, "step": 133900 }, { "epoch": 0.13196212258537393, "grad_norm": 2.518108606338501, "learning_rate": 9.982848116622095e-06, "loss": 3.2461, "step": 133950 }, { "epoch": 0.13201138056319603, "grad_norm": 2.49371075630188, "learning_rate": 9.982835307213859e-06, "loss": 3.3342, "step": 134000 }, { "epoch": 0.1320606385410181, "grad_norm": 2.5274276733398438, "learning_rate": 9.982822493032456e-06, "loss": 3.3699, "step": 134050 }, { "epoch": 0.1321098965188402, "grad_norm": 2.755802869796753, "learning_rate": 9.9828096740779e-06, "loss": 3.307, "step": 134100 }, { "epoch": 0.13215915449666227, "grad_norm": 2.741234302520752, "learning_rate": 9.982796850350202e-06, "loss": 3.3371, "step": 134150 }, { "epoch": 0.13220841247448437, "grad_norm": 2.401212215423584, "learning_rate": 9.982784021849376e-06, "loss": 3.3105, "step": 134200 }, { "epoch": 0.13225767045230646, "grad_norm": 2.590445041656494, "learning_rate": 9.982771188575432e-06, "loss": 3.354, "step": 134250 }, { "epoch": 0.13230692843012853, "grad_norm": 2.3939108848571777, "learning_rate": 9.982758350528385e-06, "loss": 3.3233, "step": 134300 }, { "epoch": 0.13235618640795063, "grad_norm": 2.8216092586517334, "learning_rate": 9.982745507708245e-06, "loss": 3.3169, "step": 134350 }, { "epoch": 0.13240544438577273, "grad_norm": 2.461487293243408, "learning_rate": 9.982732660115025e-06, "loss": 3.3017, "step": 134400 }, { "epoch": 0.1324547023635948, "grad_norm": 2.415942668914795, "learning_rate": 9.982719807748737e-06, "loss": 3.3261, "step": 134450 }, { "epoch": 0.1325039603414169, "grad_norm": 2.4388933181762695, "learning_rate": 9.982706950609395e-06, "loss": 3.3266, "step": 134500 }, { "epoch": 0.132553218319239, "grad_norm": 2.363949775695801, "learning_rate": 9.982694088697006e-06, "loss": 3.2988, "step": 134550 }, { "epoch": 0.13260247629706107, "grad_norm": 2.2934038639068604, "learning_rate": 9.982681222011589e-06, "loss": 3.3184, "step": 134600 }, { "epoch": 0.13265173427488317, "grad_norm": 2.3930673599243164, "learning_rate": 9.982668350553155e-06, "loss": 3.3232, "step": 134650 }, { "epoch": 0.13270099225270524, "grad_norm": 2.5029680728912354, "learning_rate": 9.982655474321713e-06, "loss": 3.4058, "step": 134700 }, { "epoch": 0.13275025023052733, "grad_norm": 2.4833905696868896, "learning_rate": 9.982642593317277e-06, "loss": 3.3072, "step": 134750 }, { "epoch": 0.13279950820834943, "grad_norm": 2.5127246379852295, "learning_rate": 9.982629707539861e-06, "loss": 3.3105, "step": 134800 }, { "epoch": 0.1328487661861715, "grad_norm": 2.600982427597046, "learning_rate": 9.982616816989475e-06, "loss": 3.3695, "step": 134850 }, { "epoch": 0.1328980241639936, "grad_norm": 2.790300130844116, "learning_rate": 9.982603921666131e-06, "loss": 3.2848, "step": 134900 }, { "epoch": 0.1329472821418157, "grad_norm": 2.61690354347229, "learning_rate": 9.982591021569845e-06, "loss": 3.3811, "step": 134950 }, { "epoch": 0.13299654011963777, "grad_norm": 2.4919846057891846, "learning_rate": 9.982578116700625e-06, "loss": 3.3484, "step": 135000 }, { "epoch": 0.13304579809745987, "grad_norm": 3.043800115585327, "learning_rate": 9.982565207058487e-06, "loss": 3.2603, "step": 135050 }, { "epoch": 0.13309505607528196, "grad_norm": 2.5260419845581055, "learning_rate": 9.982552292643441e-06, "loss": 3.3228, "step": 135100 }, { "epoch": 0.13314431405310403, "grad_norm": 2.420222520828247, "learning_rate": 9.9825393734555e-06, "loss": 3.3619, "step": 135150 }, { "epoch": 0.13319357203092613, "grad_norm": 2.51693058013916, "learning_rate": 9.982526449494677e-06, "loss": 3.3347, "step": 135200 }, { "epoch": 0.1332428300087482, "grad_norm": 2.514651298522949, "learning_rate": 9.982513520760983e-06, "loss": 3.3382, "step": 135250 }, { "epoch": 0.1332920879865703, "grad_norm": 2.1774184703826904, "learning_rate": 9.98250058725443e-06, "loss": 3.3766, "step": 135300 }, { "epoch": 0.1333413459643924, "grad_norm": 2.3952198028564453, "learning_rate": 9.982487648975034e-06, "loss": 3.2743, "step": 135350 }, { "epoch": 0.13339060394221447, "grad_norm": 2.4841527938842773, "learning_rate": 9.982474705922805e-06, "loss": 3.331, "step": 135400 }, { "epoch": 0.13343986192003657, "grad_norm": 2.3666741847991943, "learning_rate": 9.982461758097752e-06, "loss": 3.3645, "step": 135450 }, { "epoch": 0.13348911989785867, "grad_norm": 2.5795412063598633, "learning_rate": 9.982448805499894e-06, "loss": 3.324, "step": 135500 }, { "epoch": 0.13353837787568074, "grad_norm": 2.449688673019409, "learning_rate": 9.98243584812924e-06, "loss": 3.2925, "step": 135550 }, { "epoch": 0.13358763585350283, "grad_norm": 2.411428213119507, "learning_rate": 9.982422885985802e-06, "loss": 3.2568, "step": 135600 }, { "epoch": 0.13363689383132493, "grad_norm": 2.6189448833465576, "learning_rate": 9.982409919069594e-06, "loss": 3.3406, "step": 135650 }, { "epoch": 0.133686151809147, "grad_norm": 2.38584303855896, "learning_rate": 9.982396947380626e-06, "loss": 3.3161, "step": 135700 }, { "epoch": 0.1337354097869691, "grad_norm": 2.3299152851104736, "learning_rate": 9.982383970918912e-06, "loss": 3.3126, "step": 135750 }, { "epoch": 0.1337846677647912, "grad_norm": 2.528596878051758, "learning_rate": 9.982370989684465e-06, "loss": 3.3046, "step": 135800 }, { "epoch": 0.13383392574261327, "grad_norm": 2.4794840812683105, "learning_rate": 9.982358003677297e-06, "loss": 3.3107, "step": 135850 }, { "epoch": 0.13388318372043537, "grad_norm": 2.376394271850586, "learning_rate": 9.982345012897419e-06, "loss": 3.349, "step": 135900 }, { "epoch": 0.13393244169825744, "grad_norm": 2.5136525630950928, "learning_rate": 9.982332017344845e-06, "loss": 3.3232, "step": 135950 }, { "epoch": 0.13398169967607954, "grad_norm": 2.6303584575653076, "learning_rate": 9.982319017019587e-06, "loss": 3.3295, "step": 136000 }, { "epoch": 0.13403095765390163, "grad_norm": 2.5867836475372314, "learning_rate": 9.982306011921659e-06, "loss": 3.3331, "step": 136050 }, { "epoch": 0.1340802156317237, "grad_norm": 2.4861154556274414, "learning_rate": 9.982293002051068e-06, "loss": 3.3347, "step": 136100 }, { "epoch": 0.1341294736095458, "grad_norm": 2.449524402618408, "learning_rate": 9.982279987407835e-06, "loss": 3.2921, "step": 136150 }, { "epoch": 0.1341787315873679, "grad_norm": 2.6459333896636963, "learning_rate": 9.982266967991966e-06, "loss": 3.3242, "step": 136200 }, { "epoch": 0.13422798956518997, "grad_norm": 2.4930968284606934, "learning_rate": 9.982253943803476e-06, "loss": 3.3387, "step": 136250 }, { "epoch": 0.13427724754301207, "grad_norm": 2.3944690227508545, "learning_rate": 9.982240914842377e-06, "loss": 3.3327, "step": 136300 }, { "epoch": 0.13432650552083417, "grad_norm": 2.52659273147583, "learning_rate": 9.982227881108679e-06, "loss": 3.3072, "step": 136350 }, { "epoch": 0.13437576349865624, "grad_norm": 2.3399899005889893, "learning_rate": 9.9822148426024e-06, "loss": 3.3135, "step": 136400 }, { "epoch": 0.13442502147647833, "grad_norm": 2.50480055809021, "learning_rate": 9.982201799323547e-06, "loss": 3.3475, "step": 136450 }, { "epoch": 0.1344742794543004, "grad_norm": 2.542318105697632, "learning_rate": 9.982188751272137e-06, "loss": 3.307, "step": 136500 }, { "epoch": 0.1345235374321225, "grad_norm": 2.481058359146118, "learning_rate": 9.982175698448178e-06, "loss": 3.3891, "step": 136550 }, { "epoch": 0.1345727954099446, "grad_norm": 2.457432508468628, "learning_rate": 9.982162640851687e-06, "loss": 3.2939, "step": 136600 }, { "epoch": 0.13462205338776667, "grad_norm": 2.557265520095825, "learning_rate": 9.982149578482672e-06, "loss": 3.3961, "step": 136650 }, { "epoch": 0.13467131136558877, "grad_norm": 2.4533510208129883, "learning_rate": 9.98213651134115e-06, "loss": 3.3834, "step": 136700 }, { "epoch": 0.13472056934341087, "grad_norm": 2.3988099098205566, "learning_rate": 9.98212343942713e-06, "loss": 3.3672, "step": 136750 }, { "epoch": 0.13476982732123294, "grad_norm": 2.5094661712646484, "learning_rate": 9.982110362740628e-06, "loss": 3.4218, "step": 136800 }, { "epoch": 0.13481908529905504, "grad_norm": 2.4124655723571777, "learning_rate": 9.982097281281651e-06, "loss": 3.2796, "step": 136850 }, { "epoch": 0.13486834327687713, "grad_norm": 2.3302738666534424, "learning_rate": 9.982084195050217e-06, "loss": 3.3458, "step": 136900 }, { "epoch": 0.1349176012546992, "grad_norm": 2.357494354248047, "learning_rate": 9.982071104046335e-06, "loss": 3.305, "step": 136950 }, { "epoch": 0.1349668592325213, "grad_norm": 2.5255463123321533, "learning_rate": 9.982058008270022e-06, "loss": 3.3301, "step": 137000 }, { "epoch": 0.1350161172103434, "grad_norm": 2.3327341079711914, "learning_rate": 9.982044907721285e-06, "loss": 3.3674, "step": 137050 }, { "epoch": 0.13506537518816547, "grad_norm": 2.4407548904418945, "learning_rate": 9.982031802400139e-06, "loss": 3.3199, "step": 137100 }, { "epoch": 0.13511463316598757, "grad_norm": 2.3638646602630615, "learning_rate": 9.982018692306599e-06, "loss": 3.4032, "step": 137150 }, { "epoch": 0.13516389114380964, "grad_norm": 2.269465684890747, "learning_rate": 9.982005577440672e-06, "loss": 3.3223, "step": 137200 }, { "epoch": 0.13521314912163174, "grad_norm": 2.482313632965088, "learning_rate": 9.981992457802377e-06, "loss": 3.2915, "step": 137250 }, { "epoch": 0.13526240709945384, "grad_norm": 2.482332944869995, "learning_rate": 9.98197933339172e-06, "loss": 3.2595, "step": 137300 }, { "epoch": 0.1353116650772759, "grad_norm": 2.4373786449432373, "learning_rate": 9.98196620420872e-06, "loss": 3.3754, "step": 137350 }, { "epoch": 0.135360923055098, "grad_norm": 2.510951280593872, "learning_rate": 9.981953070253385e-06, "loss": 3.3194, "step": 137400 }, { "epoch": 0.1354101810329201, "grad_norm": 2.49735164642334, "learning_rate": 9.981939931525729e-06, "loss": 3.352, "step": 137450 }, { "epoch": 0.13545943901074217, "grad_norm": 2.3950843811035156, "learning_rate": 9.981926788025765e-06, "loss": 3.279, "step": 137500 }, { "epoch": 0.13550869698856427, "grad_norm": 2.4150116443634033, "learning_rate": 9.981913639753506e-06, "loss": 3.3117, "step": 137550 }, { "epoch": 0.13555795496638637, "grad_norm": 2.2711334228515625, "learning_rate": 9.981900486708962e-06, "loss": 3.2994, "step": 137600 }, { "epoch": 0.13560721294420844, "grad_norm": 2.7237069606781006, "learning_rate": 9.98188732889215e-06, "loss": 3.3311, "step": 137650 }, { "epoch": 0.13565647092203054, "grad_norm": 2.4380433559417725, "learning_rate": 9.981874166303078e-06, "loss": 3.3437, "step": 137700 }, { "epoch": 0.1357057288998526, "grad_norm": 2.4385933876037598, "learning_rate": 9.981860998941761e-06, "loss": 3.3122, "step": 137750 }, { "epoch": 0.1357549868776747, "grad_norm": 2.4435009956359863, "learning_rate": 9.981847826808211e-06, "loss": 3.3869, "step": 137800 }, { "epoch": 0.1358042448554968, "grad_norm": 2.4125919342041016, "learning_rate": 9.981834649902442e-06, "loss": 3.3299, "step": 137850 }, { "epoch": 0.13585350283331887, "grad_norm": 2.591780662536621, "learning_rate": 9.981821468224464e-06, "loss": 3.3554, "step": 137900 }, { "epoch": 0.13590276081114097, "grad_norm": 2.4262301921844482, "learning_rate": 9.981808281774292e-06, "loss": 3.2731, "step": 137950 }, { "epoch": 0.13595201878896307, "grad_norm": 2.5718352794647217, "learning_rate": 9.981795090551938e-06, "loss": 3.3008, "step": 138000 }, { "epoch": 0.13600127676678514, "grad_norm": 2.5085978507995605, "learning_rate": 9.981781894557414e-06, "loss": 3.2593, "step": 138050 }, { "epoch": 0.13605053474460724, "grad_norm": 2.4398982524871826, "learning_rate": 9.981768693790734e-06, "loss": 3.3507, "step": 138100 }, { "epoch": 0.13609979272242934, "grad_norm": 2.4069535732269287, "learning_rate": 9.98175548825191e-06, "loss": 3.347, "step": 138150 }, { "epoch": 0.1361490507002514, "grad_norm": 3.339010238647461, "learning_rate": 9.981742277940952e-06, "loss": 3.3073, "step": 138200 }, { "epoch": 0.1361983086780735, "grad_norm": 2.544299602508545, "learning_rate": 9.981729062857877e-06, "loss": 3.2752, "step": 138250 }, { "epoch": 0.1362475666558956, "grad_norm": 2.288745641708374, "learning_rate": 9.981715843002696e-06, "loss": 3.3364, "step": 138300 }, { "epoch": 0.13629682463371767, "grad_norm": 2.608415365219116, "learning_rate": 9.981702618375419e-06, "loss": 3.328, "step": 138350 }, { "epoch": 0.13634608261153977, "grad_norm": 2.364572763442993, "learning_rate": 9.981689388976062e-06, "loss": 3.3116, "step": 138400 }, { "epoch": 0.13639534058936184, "grad_norm": 2.633402109146118, "learning_rate": 9.981676154804637e-06, "loss": 3.3377, "step": 138450 }, { "epoch": 0.13644459856718394, "grad_norm": 2.5236153602600098, "learning_rate": 9.981662915861156e-06, "loss": 3.3366, "step": 138500 }, { "epoch": 0.13649385654500604, "grad_norm": 2.477294921875, "learning_rate": 9.981649672145633e-06, "loss": 3.3233, "step": 138550 }, { "epoch": 0.1365431145228281, "grad_norm": 2.413985013961792, "learning_rate": 9.981636423658078e-06, "loss": 3.3572, "step": 138600 }, { "epoch": 0.1365923725006502, "grad_norm": 2.4874064922332764, "learning_rate": 9.981623170398506e-06, "loss": 3.2786, "step": 138650 }, { "epoch": 0.1366416304784723, "grad_norm": 2.2424540519714355, "learning_rate": 9.98160991236693e-06, "loss": 3.3565, "step": 138700 }, { "epoch": 0.13669088845629437, "grad_norm": 2.5076889991760254, "learning_rate": 9.981596649563362e-06, "loss": 3.3086, "step": 138750 }, { "epoch": 0.13674014643411647, "grad_norm": 2.5143632888793945, "learning_rate": 9.981583381987813e-06, "loss": 3.2996, "step": 138800 }, { "epoch": 0.13678940441193857, "grad_norm": 2.3328802585601807, "learning_rate": 9.9815701096403e-06, "loss": 3.3422, "step": 138850 }, { "epoch": 0.13683866238976064, "grad_norm": 2.7330501079559326, "learning_rate": 9.98155683252083e-06, "loss": 3.2874, "step": 138900 }, { "epoch": 0.13688792036758274, "grad_norm": 2.7586309909820557, "learning_rate": 9.98154355062942e-06, "loss": 3.311, "step": 138950 }, { "epoch": 0.1369371783454048, "grad_norm": 2.735335350036621, "learning_rate": 9.981530263966081e-06, "loss": 3.3583, "step": 139000 }, { "epoch": 0.1369864363232269, "grad_norm": 2.2423620223999023, "learning_rate": 9.981516972530826e-06, "loss": 3.3061, "step": 139050 }, { "epoch": 0.137035694301049, "grad_norm": 2.4437153339385986, "learning_rate": 9.981503676323668e-06, "loss": 3.3654, "step": 139100 }, { "epoch": 0.13708495227887108, "grad_norm": 2.4011268615722656, "learning_rate": 9.981490375344619e-06, "loss": 3.3963, "step": 139150 }, { "epoch": 0.13713421025669317, "grad_norm": 2.522320508956909, "learning_rate": 9.981477069593692e-06, "loss": 3.3289, "step": 139200 }, { "epoch": 0.13718346823451527, "grad_norm": 2.4019522666931152, "learning_rate": 9.981463759070901e-06, "loss": 3.3304, "step": 139250 }, { "epoch": 0.13723272621233734, "grad_norm": 2.5387160778045654, "learning_rate": 9.981450443776259e-06, "loss": 3.2708, "step": 139300 }, { "epoch": 0.13728198419015944, "grad_norm": 2.7194929122924805, "learning_rate": 9.981437123709775e-06, "loss": 3.4019, "step": 139350 }, { "epoch": 0.13733124216798154, "grad_norm": 2.411836624145508, "learning_rate": 9.981423798871466e-06, "loss": 3.3217, "step": 139400 }, { "epoch": 0.1373805001458036, "grad_norm": 2.504457950592041, "learning_rate": 9.981410469261343e-06, "loss": 3.3431, "step": 139450 }, { "epoch": 0.1374297581236257, "grad_norm": 2.446812629699707, "learning_rate": 9.981397134879418e-06, "loss": 3.3536, "step": 139500 }, { "epoch": 0.1374790161014478, "grad_norm": 2.2976324558258057, "learning_rate": 9.981383795725706e-06, "loss": 3.2978, "step": 139550 }, { "epoch": 0.13752827407926987, "grad_norm": 2.5022976398468018, "learning_rate": 9.981370451800217e-06, "loss": 3.2924, "step": 139600 }, { "epoch": 0.13757753205709197, "grad_norm": 2.5271923542022705, "learning_rate": 9.981357103102966e-06, "loss": 3.3034, "step": 139650 }, { "epoch": 0.13762679003491404, "grad_norm": 2.452100992202759, "learning_rate": 9.981343749633963e-06, "loss": 3.3335, "step": 139700 }, { "epoch": 0.13767604801273614, "grad_norm": 2.3178234100341797, "learning_rate": 9.981330391393224e-06, "loss": 3.402, "step": 139750 }, { "epoch": 0.13772530599055824, "grad_norm": 2.3183038234710693, "learning_rate": 9.98131702838076e-06, "loss": 3.3484, "step": 139800 }, { "epoch": 0.1377745639683803, "grad_norm": 2.401075839996338, "learning_rate": 9.981303660596588e-06, "loss": 3.3119, "step": 139850 }, { "epoch": 0.1378238219462024, "grad_norm": 2.5254030227661133, "learning_rate": 9.981290288040714e-06, "loss": 3.2726, "step": 139900 }, { "epoch": 0.1378730799240245, "grad_norm": 2.428209066390991, "learning_rate": 9.981276910713153e-06, "loss": 3.3335, "step": 139950 }, { "epoch": 0.13792233790184658, "grad_norm": 2.383347749710083, "learning_rate": 9.981263528613922e-06, "loss": 3.3597, "step": 140000 }, { "epoch": 0.13797159587966867, "grad_norm": 2.399376392364502, "learning_rate": 9.981250141743029e-06, "loss": 3.3078, "step": 140050 }, { "epoch": 0.13802085385749077, "grad_norm": 2.3909976482391357, "learning_rate": 9.981236750100488e-06, "loss": 3.2902, "step": 140100 }, { "epoch": 0.13807011183531284, "grad_norm": 3.0182032585144043, "learning_rate": 9.981223353686312e-06, "loss": 3.2781, "step": 140150 }, { "epoch": 0.13811936981313494, "grad_norm": 2.3577322959899902, "learning_rate": 9.981209952500514e-06, "loss": 3.3139, "step": 140200 }, { "epoch": 0.138168627790957, "grad_norm": 2.508078098297119, "learning_rate": 9.981196546543108e-06, "loss": 3.3224, "step": 140250 }, { "epoch": 0.1382178857687791, "grad_norm": 2.4417014122009277, "learning_rate": 9.981183135814105e-06, "loss": 3.3467, "step": 140300 }, { "epoch": 0.1382671437466012, "grad_norm": 2.4924166202545166, "learning_rate": 9.981169720313519e-06, "loss": 3.3427, "step": 140350 }, { "epoch": 0.13831640172442328, "grad_norm": 2.6018028259277344, "learning_rate": 9.981156300041362e-06, "loss": 3.3043, "step": 140400 }, { "epoch": 0.13836565970224537, "grad_norm": 2.520836114883423, "learning_rate": 9.981142874997648e-06, "loss": 3.2591, "step": 140450 }, { "epoch": 0.13841491768006747, "grad_norm": 2.571235418319702, "learning_rate": 9.98112944518239e-06, "loss": 3.3571, "step": 140500 }, { "epoch": 0.13846417565788954, "grad_norm": 2.361311674118042, "learning_rate": 9.981116010595597e-06, "loss": 3.3328, "step": 140550 }, { "epoch": 0.13851343363571164, "grad_norm": 2.5676746368408203, "learning_rate": 9.981102571237287e-06, "loss": 3.3541, "step": 140600 }, { "epoch": 0.13856269161353374, "grad_norm": 3.442856550216675, "learning_rate": 9.98108912710747e-06, "loss": 3.2382, "step": 140650 }, { "epoch": 0.1386119495913558, "grad_norm": 2.6859328746795654, "learning_rate": 9.981075678206162e-06, "loss": 3.261, "step": 140700 }, { "epoch": 0.1386612075691779, "grad_norm": 2.3391921520233154, "learning_rate": 9.981062224533372e-06, "loss": 3.3058, "step": 140750 }, { "epoch": 0.138710465547, "grad_norm": 2.2615370750427246, "learning_rate": 9.981048766089115e-06, "loss": 3.3154, "step": 140800 }, { "epoch": 0.13875972352482208, "grad_norm": 2.5623269081115723, "learning_rate": 9.9810353028734e-06, "loss": 3.2701, "step": 140850 }, { "epoch": 0.13880898150264417, "grad_norm": 2.3537538051605225, "learning_rate": 9.981021834886246e-06, "loss": 3.2571, "step": 140900 }, { "epoch": 0.13885823948046624, "grad_norm": 2.3050129413604736, "learning_rate": 9.981008362127664e-06, "loss": 3.4048, "step": 140950 }, { "epoch": 0.13890749745828834, "grad_norm": 2.6156623363494873, "learning_rate": 9.980994884597666e-06, "loss": 3.2993, "step": 141000 }, { "epoch": 0.13895675543611044, "grad_norm": 2.460026741027832, "learning_rate": 9.980981402296263e-06, "loss": 3.3134, "step": 141050 }, { "epoch": 0.1390060134139325, "grad_norm": 2.410754680633545, "learning_rate": 9.980967915223471e-06, "loss": 3.3067, "step": 141100 }, { "epoch": 0.1390552713917546, "grad_norm": 2.6898746490478516, "learning_rate": 9.9809544233793e-06, "loss": 3.3313, "step": 141150 }, { "epoch": 0.1391045293695767, "grad_norm": 2.347651958465576, "learning_rate": 9.980940926763766e-06, "loss": 3.3558, "step": 141200 }, { "epoch": 0.13915378734739878, "grad_norm": 2.3405380249023438, "learning_rate": 9.980927425376883e-06, "loss": 3.3223, "step": 141250 }, { "epoch": 0.13920304532522088, "grad_norm": 2.6139001846313477, "learning_rate": 9.980913919218659e-06, "loss": 3.3155, "step": 141300 }, { "epoch": 0.13925230330304297, "grad_norm": 2.2898507118225098, "learning_rate": 9.98090040828911e-06, "loss": 3.2795, "step": 141350 }, { "epoch": 0.13930156128086504, "grad_norm": 2.4258646965026855, "learning_rate": 9.980886892588249e-06, "loss": 3.3546, "step": 141400 }, { "epoch": 0.13935081925868714, "grad_norm": 2.526224374771118, "learning_rate": 9.980873372116086e-06, "loss": 3.3914, "step": 141450 }, { "epoch": 0.1394000772365092, "grad_norm": 2.792062282562256, "learning_rate": 9.980859846872639e-06, "loss": 3.2804, "step": 141500 }, { "epoch": 0.1394493352143313, "grad_norm": 2.583101272583008, "learning_rate": 9.980846316857917e-06, "loss": 3.3249, "step": 141550 }, { "epoch": 0.1394985931921534, "grad_norm": 2.4814393520355225, "learning_rate": 9.980832782071936e-06, "loss": 3.3686, "step": 141600 }, { "epoch": 0.13954785116997548, "grad_norm": 2.41717791557312, "learning_rate": 9.980819242514706e-06, "loss": 3.3066, "step": 141650 }, { "epoch": 0.13959710914779758, "grad_norm": 2.666429281234741, "learning_rate": 9.98080569818624e-06, "loss": 3.3273, "step": 141700 }, { "epoch": 0.13964636712561967, "grad_norm": 2.4969065189361572, "learning_rate": 9.980792149086553e-06, "loss": 3.2931, "step": 141750 }, { "epoch": 0.13969562510344175, "grad_norm": 2.339334726333618, "learning_rate": 9.980778595215659e-06, "loss": 3.2778, "step": 141800 }, { "epoch": 0.13974488308126384, "grad_norm": 2.5912668704986572, "learning_rate": 9.980765036573568e-06, "loss": 3.2935, "step": 141850 }, { "epoch": 0.13979414105908594, "grad_norm": 2.5058205127716064, "learning_rate": 9.980751473160292e-06, "loss": 3.2877, "step": 141900 }, { "epoch": 0.139843399036908, "grad_norm": 2.9673266410827637, "learning_rate": 9.980737904975847e-06, "loss": 3.3279, "step": 141950 }, { "epoch": 0.1398926570147301, "grad_norm": 2.7983357906341553, "learning_rate": 9.980724332020247e-06, "loss": 3.2946, "step": 142000 }, { "epoch": 0.13994191499255218, "grad_norm": 2.388613700866699, "learning_rate": 9.980710754293502e-06, "loss": 3.3299, "step": 142050 }, { "epoch": 0.13999117297037428, "grad_norm": 2.5358829498291016, "learning_rate": 9.980697171795627e-06, "loss": 3.2597, "step": 142100 }, { "epoch": 0.14004043094819638, "grad_norm": 2.5365257263183594, "learning_rate": 9.980683584526633e-06, "loss": 3.3348, "step": 142150 }, { "epoch": 0.14008968892601845, "grad_norm": 2.57611346244812, "learning_rate": 9.980669992486534e-06, "loss": 3.31, "step": 142200 }, { "epoch": 0.14013894690384054, "grad_norm": 2.3117446899414062, "learning_rate": 9.980656395675343e-06, "loss": 3.251, "step": 142250 }, { "epoch": 0.14018820488166264, "grad_norm": 2.5046188831329346, "learning_rate": 9.980642794093075e-06, "loss": 3.3143, "step": 142300 }, { "epoch": 0.1402374628594847, "grad_norm": 2.4090585708618164, "learning_rate": 9.98062918773974e-06, "loss": 3.3309, "step": 142350 }, { "epoch": 0.1402867208373068, "grad_norm": 2.466533899307251, "learning_rate": 9.980615576615353e-06, "loss": 3.2988, "step": 142400 }, { "epoch": 0.1403359788151289, "grad_norm": 2.5477700233459473, "learning_rate": 9.980601960719924e-06, "loss": 3.299, "step": 142450 }, { "epoch": 0.14038523679295098, "grad_norm": 2.5420734882354736, "learning_rate": 9.98058834005347e-06, "loss": 3.3358, "step": 142500 }, { "epoch": 0.14043449477077308, "grad_norm": 2.5244507789611816, "learning_rate": 9.980574714616003e-06, "loss": 3.3163, "step": 142550 }, { "epoch": 0.14048375274859518, "grad_norm": 2.596418619155884, "learning_rate": 9.980561084407533e-06, "loss": 3.3001, "step": 142600 }, { "epoch": 0.14053301072641725, "grad_norm": 2.7721688747406006, "learning_rate": 9.98054744942808e-06, "loss": 3.2385, "step": 142650 }, { "epoch": 0.14058226870423934, "grad_norm": 2.2778868675231934, "learning_rate": 9.980533809677648e-06, "loss": 3.2198, "step": 142700 }, { "epoch": 0.14063152668206141, "grad_norm": 2.211996078491211, "learning_rate": 9.980520165156258e-06, "loss": 3.2215, "step": 142750 }, { "epoch": 0.1406807846598835, "grad_norm": 2.358234405517578, "learning_rate": 9.980506515863917e-06, "loss": 3.3586, "step": 142800 }, { "epoch": 0.1407300426377056, "grad_norm": 3.623809576034546, "learning_rate": 9.980492861800642e-06, "loss": 3.3484, "step": 142850 }, { "epoch": 0.14077930061552768, "grad_norm": 2.1086010932922363, "learning_rate": 9.980479202966444e-06, "loss": 3.2874, "step": 142900 }, { "epoch": 0.14082855859334978, "grad_norm": 2.375112533569336, "learning_rate": 9.980465539361338e-06, "loss": 3.2871, "step": 142950 }, { "epoch": 0.14087781657117188, "grad_norm": 2.394282817840576, "learning_rate": 9.980451870985336e-06, "loss": 3.3133, "step": 143000 }, { "epoch": 0.14092707454899395, "grad_norm": 2.338268756866455, "learning_rate": 9.98043819783845e-06, "loss": 3.28, "step": 143050 }, { "epoch": 0.14097633252681604, "grad_norm": 2.3822615146636963, "learning_rate": 9.980424519920696e-06, "loss": 3.3147, "step": 143100 }, { "epoch": 0.14102559050463814, "grad_norm": 2.5267529487609863, "learning_rate": 9.980410837232084e-06, "loss": 3.3271, "step": 143150 }, { "epoch": 0.1410748484824602, "grad_norm": 2.6227684020996094, "learning_rate": 9.980397149772629e-06, "loss": 3.3035, "step": 143200 }, { "epoch": 0.1411241064602823, "grad_norm": 2.37418794631958, "learning_rate": 9.980383457542341e-06, "loss": 3.3356, "step": 143250 }, { "epoch": 0.14117336443810438, "grad_norm": 2.325625419616699, "learning_rate": 9.98036976054124e-06, "loss": 3.3008, "step": 143300 }, { "epoch": 0.14122262241592648, "grad_norm": 2.4494218826293945, "learning_rate": 9.980356058769331e-06, "loss": 3.2898, "step": 143350 }, { "epoch": 0.14127188039374858, "grad_norm": 2.671747922897339, "learning_rate": 9.980342352226633e-06, "loss": 3.2971, "step": 143400 }, { "epoch": 0.14132113837157065, "grad_norm": 2.265096426010132, "learning_rate": 9.980328640913158e-06, "loss": 3.3082, "step": 143450 }, { "epoch": 0.14137039634939275, "grad_norm": 2.483980894088745, "learning_rate": 9.980314924828915e-06, "loss": 3.296, "step": 143500 }, { "epoch": 0.14141965432721484, "grad_norm": 2.399890899658203, "learning_rate": 9.980301203973924e-06, "loss": 3.3342, "step": 143550 }, { "epoch": 0.14146891230503691, "grad_norm": 2.6143224239349365, "learning_rate": 9.980287478348192e-06, "loss": 3.2991, "step": 143600 }, { "epoch": 0.141518170282859, "grad_norm": 2.608013153076172, "learning_rate": 9.980273747951734e-06, "loss": 3.3001, "step": 143650 }, { "epoch": 0.1415674282606811, "grad_norm": 2.6112849712371826, "learning_rate": 9.980260012784565e-06, "loss": 3.3152, "step": 143700 }, { "epoch": 0.14161668623850318, "grad_norm": 2.4555299282073975, "learning_rate": 9.980246272846696e-06, "loss": 3.2798, "step": 143750 }, { "epoch": 0.14166594421632528, "grad_norm": 2.323209524154663, "learning_rate": 9.980232528138143e-06, "loss": 3.321, "step": 143800 }, { "epoch": 0.14171520219414738, "grad_norm": 2.4593558311462402, "learning_rate": 9.980218778658914e-06, "loss": 3.2553, "step": 143850 }, { "epoch": 0.14176446017196945, "grad_norm": 2.4153497219085693, "learning_rate": 9.980205024409028e-06, "loss": 3.3317, "step": 143900 }, { "epoch": 0.14181371814979155, "grad_norm": 2.6605224609375, "learning_rate": 9.980191265388494e-06, "loss": 3.3507, "step": 143950 }, { "epoch": 0.14186297612761362, "grad_norm": 2.712503671646118, "learning_rate": 9.98017750159733e-06, "loss": 3.3041, "step": 144000 }, { "epoch": 0.14191223410543571, "grad_norm": 2.367201089859009, "learning_rate": 9.980163733035543e-06, "loss": 3.2922, "step": 144050 }, { "epoch": 0.1419614920832578, "grad_norm": 2.682112216949463, "learning_rate": 9.98014995970315e-06, "loss": 3.3373, "step": 144100 }, { "epoch": 0.14201075006107988, "grad_norm": 2.3647031784057617, "learning_rate": 9.980136181600162e-06, "loss": 3.3124, "step": 144150 }, { "epoch": 0.14206000803890198, "grad_norm": 2.3073883056640625, "learning_rate": 9.980122398726596e-06, "loss": 3.3142, "step": 144200 }, { "epoch": 0.14210926601672408, "grad_norm": 2.36147403717041, "learning_rate": 9.98010861108246e-06, "loss": 3.3034, "step": 144250 }, { "epoch": 0.14215852399454615, "grad_norm": 2.4521796703338623, "learning_rate": 9.980094818667772e-06, "loss": 3.2424, "step": 144300 }, { "epoch": 0.14220778197236825, "grad_norm": 2.544224739074707, "learning_rate": 9.980081021482542e-06, "loss": 3.3594, "step": 144350 }, { "epoch": 0.14225703995019034, "grad_norm": 2.52839732170105, "learning_rate": 9.980067219526784e-06, "loss": 3.316, "step": 144400 }, { "epoch": 0.14230629792801242, "grad_norm": 2.4467718601226807, "learning_rate": 9.980053412800512e-06, "loss": 3.298, "step": 144450 }, { "epoch": 0.1423555559058345, "grad_norm": 2.5469326972961426, "learning_rate": 9.980039601303739e-06, "loss": 3.3457, "step": 144500 }, { "epoch": 0.14240481388365658, "grad_norm": 2.413027048110962, "learning_rate": 9.980025785036477e-06, "loss": 3.3194, "step": 144550 }, { "epoch": 0.14245407186147868, "grad_norm": 2.489638090133667, "learning_rate": 9.980011963998742e-06, "loss": 3.2625, "step": 144600 }, { "epoch": 0.14250332983930078, "grad_norm": 2.381376266479492, "learning_rate": 9.979998138190546e-06, "loss": 3.2212, "step": 144650 }, { "epoch": 0.14255258781712285, "grad_norm": 2.418457508087158, "learning_rate": 9.979984307611901e-06, "loss": 3.3328, "step": 144700 }, { "epoch": 0.14260184579494495, "grad_norm": 2.3505351543426514, "learning_rate": 9.97997047226282e-06, "loss": 3.2762, "step": 144750 }, { "epoch": 0.14265110377276705, "grad_norm": 2.9312517642974854, "learning_rate": 9.979956632143317e-06, "loss": 3.2973, "step": 144800 }, { "epoch": 0.14270036175058912, "grad_norm": 2.4469308853149414, "learning_rate": 9.979942787253408e-06, "loss": 3.2861, "step": 144850 }, { "epoch": 0.14274961972841121, "grad_norm": 2.610560417175293, "learning_rate": 9.979928937593101e-06, "loss": 3.2806, "step": 144900 }, { "epoch": 0.1427988777062333, "grad_norm": 2.4830546379089355, "learning_rate": 9.979915083162413e-06, "loss": 3.3391, "step": 144950 }, { "epoch": 0.14284813568405538, "grad_norm": 2.3902671337127686, "learning_rate": 9.979901223961358e-06, "loss": 3.3256, "step": 145000 }, { "epoch": 0.14289739366187748, "grad_norm": 2.420818328857422, "learning_rate": 9.979887359989945e-06, "loss": 3.3159, "step": 145050 }, { "epoch": 0.14294665163969958, "grad_norm": 2.5970120429992676, "learning_rate": 9.979873491248191e-06, "loss": 3.3146, "step": 145100 }, { "epoch": 0.14299590961752165, "grad_norm": 2.414000988006592, "learning_rate": 9.979859617736108e-06, "loss": 3.2436, "step": 145150 }, { "epoch": 0.14304516759534375, "grad_norm": 2.2196333408355713, "learning_rate": 9.979845739453709e-06, "loss": 3.2837, "step": 145200 }, { "epoch": 0.14309442557316582, "grad_norm": 2.356818914413452, "learning_rate": 9.979831856401008e-06, "loss": 3.2604, "step": 145250 }, { "epoch": 0.14314368355098792, "grad_norm": 2.592839241027832, "learning_rate": 9.979817968578018e-06, "loss": 3.1863, "step": 145300 }, { "epoch": 0.14319294152881, "grad_norm": 2.6258652210235596, "learning_rate": 9.979804075984754e-06, "loss": 3.3675, "step": 145350 }, { "epoch": 0.14324219950663208, "grad_norm": 2.339650869369507, "learning_rate": 9.979790178621225e-06, "loss": 3.3662, "step": 145400 }, { "epoch": 0.14329145748445418, "grad_norm": 2.4103870391845703, "learning_rate": 9.97977627648745e-06, "loss": 3.2871, "step": 145450 }, { "epoch": 0.14334071546227628, "grad_norm": 2.3108596801757812, "learning_rate": 9.979762369583436e-06, "loss": 3.3147, "step": 145500 }, { "epoch": 0.14338997344009835, "grad_norm": 2.426852226257324, "learning_rate": 9.979748457909201e-06, "loss": 3.2642, "step": 145550 }, { "epoch": 0.14343923141792045, "grad_norm": 2.2711663246154785, "learning_rate": 9.979734541464758e-06, "loss": 3.3544, "step": 145600 }, { "epoch": 0.14348848939574255, "grad_norm": 2.4099419116973877, "learning_rate": 9.979720620250118e-06, "loss": 3.2756, "step": 145650 }, { "epoch": 0.14353774737356462, "grad_norm": 2.439168930053711, "learning_rate": 9.979706694265295e-06, "loss": 3.2948, "step": 145700 }, { "epoch": 0.14358700535138672, "grad_norm": 2.604386806488037, "learning_rate": 9.979692763510305e-06, "loss": 3.2835, "step": 145750 }, { "epoch": 0.14363626332920879, "grad_norm": 2.438323736190796, "learning_rate": 9.979678827985157e-06, "loss": 3.3061, "step": 145800 }, { "epoch": 0.14368552130703088, "grad_norm": 2.368328094482422, "learning_rate": 9.979664887689868e-06, "loss": 3.3573, "step": 145850 }, { "epoch": 0.14373477928485298, "grad_norm": 2.4357810020446777, "learning_rate": 9.979650942624451e-06, "loss": 3.2772, "step": 145900 }, { "epoch": 0.14378403726267505, "grad_norm": 2.2624030113220215, "learning_rate": 9.979636992788915e-06, "loss": 3.3247, "step": 145950 }, { "epoch": 0.14383329524049715, "grad_norm": 2.5204734802246094, "learning_rate": 9.97962303818328e-06, "loss": 3.2395, "step": 146000 }, { "epoch": 0.14388255321831925, "grad_norm": 2.54004168510437, "learning_rate": 9.979609078807556e-06, "loss": 3.2424, "step": 146050 }, { "epoch": 0.14393181119614132, "grad_norm": 2.509308099746704, "learning_rate": 9.979595114661754e-06, "loss": 3.3464, "step": 146100 }, { "epoch": 0.14398106917396342, "grad_norm": 2.679945468902588, "learning_rate": 9.979581145745891e-06, "loss": 3.246, "step": 146150 }, { "epoch": 0.14403032715178551, "grad_norm": 2.6270086765289307, "learning_rate": 9.97956717205998e-06, "loss": 3.2905, "step": 146200 }, { "epoch": 0.14407958512960758, "grad_norm": 2.493008613586426, "learning_rate": 9.979553193604032e-06, "loss": 3.2916, "step": 146250 }, { "epoch": 0.14412884310742968, "grad_norm": 2.4219274520874023, "learning_rate": 9.979539210378063e-06, "loss": 3.2347, "step": 146300 }, { "epoch": 0.14417810108525178, "grad_norm": 3.127563953399658, "learning_rate": 9.979525222382085e-06, "loss": 3.3334, "step": 146350 }, { "epoch": 0.14422735906307385, "grad_norm": 2.3080618381500244, "learning_rate": 9.979511229616112e-06, "loss": 3.3062, "step": 146400 }, { "epoch": 0.14427661704089595, "grad_norm": 2.4787940979003906, "learning_rate": 9.979497232080156e-06, "loss": 3.2242, "step": 146450 }, { "epoch": 0.14432587501871802, "grad_norm": 2.26639986038208, "learning_rate": 9.979483229774235e-06, "loss": 3.2811, "step": 146500 }, { "epoch": 0.14437513299654012, "grad_norm": 2.4112660884857178, "learning_rate": 9.979469222698355e-06, "loss": 3.2658, "step": 146550 }, { "epoch": 0.14442439097436222, "grad_norm": 2.349797248840332, "learning_rate": 9.979455210852535e-06, "loss": 3.3028, "step": 146600 }, { "epoch": 0.14447364895218429, "grad_norm": 2.6167349815368652, "learning_rate": 9.979441194236788e-06, "loss": 3.3002, "step": 146650 }, { "epoch": 0.14452290693000638, "grad_norm": 2.5947835445404053, "learning_rate": 9.979427172851124e-06, "loss": 3.2312, "step": 146700 }, { "epoch": 0.14457216490782848, "grad_norm": 2.3929131031036377, "learning_rate": 9.97941314669556e-06, "loss": 3.2819, "step": 146750 }, { "epoch": 0.14462142288565055, "grad_norm": 2.3144378662109375, "learning_rate": 9.979399115770107e-06, "loss": 3.2715, "step": 146800 }, { "epoch": 0.14467068086347265, "grad_norm": 2.5739693641662598, "learning_rate": 9.979385080074782e-06, "loss": 3.3078, "step": 146850 }, { "epoch": 0.14471993884129475, "grad_norm": 2.396960735321045, "learning_rate": 9.979371039609594e-06, "loss": 3.2647, "step": 146900 }, { "epoch": 0.14476919681911682, "grad_norm": 2.5673041343688965, "learning_rate": 9.979356994374558e-06, "loss": 3.2826, "step": 146950 }, { "epoch": 0.14481845479693892, "grad_norm": 2.5432779788970947, "learning_rate": 9.97934294436969e-06, "loss": 3.2812, "step": 147000 }, { "epoch": 0.144867712774761, "grad_norm": 2.698669910430908, "learning_rate": 9.979328889595e-06, "loss": 3.278, "step": 147050 }, { "epoch": 0.14491697075258309, "grad_norm": 2.3214752674102783, "learning_rate": 9.979314830050504e-06, "loss": 3.34, "step": 147100 }, { "epoch": 0.14496622873040518, "grad_norm": 2.4732139110565186, "learning_rate": 9.979300765736214e-06, "loss": 3.2034, "step": 147150 }, { "epoch": 0.14501548670822725, "grad_norm": 2.45590877532959, "learning_rate": 9.979286696652142e-06, "loss": 3.3291, "step": 147200 }, { "epoch": 0.14506474468604935, "grad_norm": 2.368332624435425, "learning_rate": 9.979272622798304e-06, "loss": 3.2576, "step": 147250 }, { "epoch": 0.14511400266387145, "grad_norm": 2.514828681945801, "learning_rate": 9.979258544174714e-06, "loss": 3.2614, "step": 147300 }, { "epoch": 0.14516326064169352, "grad_norm": 2.501635789871216, "learning_rate": 9.979244460781383e-06, "loss": 3.306, "step": 147350 }, { "epoch": 0.14521251861951562, "grad_norm": 2.827171802520752, "learning_rate": 9.979230372618328e-06, "loss": 3.2906, "step": 147400 }, { "epoch": 0.14526177659733772, "grad_norm": 2.3729114532470703, "learning_rate": 9.979216279685558e-06, "loss": 3.3687, "step": 147450 }, { "epoch": 0.1453110345751598, "grad_norm": 2.6385669708251953, "learning_rate": 9.979202181983087e-06, "loss": 3.2612, "step": 147500 }, { "epoch": 0.14536029255298188, "grad_norm": 2.4721758365631104, "learning_rate": 9.979188079510933e-06, "loss": 3.2914, "step": 147550 }, { "epoch": 0.14540955053080398, "grad_norm": 2.2799479961395264, "learning_rate": 9.979173972269107e-06, "loss": 3.2835, "step": 147600 }, { "epoch": 0.14545880850862605, "grad_norm": 2.340970277786255, "learning_rate": 9.97915986025762e-06, "loss": 3.2837, "step": 147650 }, { "epoch": 0.14550806648644815, "grad_norm": 2.490550994873047, "learning_rate": 9.979145743476489e-06, "loss": 3.2892, "step": 147700 }, { "epoch": 0.14555732446427022, "grad_norm": 2.597238063812256, "learning_rate": 9.979131621925726e-06, "loss": 3.305, "step": 147750 }, { "epoch": 0.14560658244209232, "grad_norm": 2.574496269226074, "learning_rate": 9.979117495605346e-06, "loss": 3.2265, "step": 147800 }, { "epoch": 0.14565584041991442, "grad_norm": 2.226213216781616, "learning_rate": 9.979103364515361e-06, "loss": 3.3109, "step": 147850 }, { "epoch": 0.1457050983977365, "grad_norm": 2.3243770599365234, "learning_rate": 9.979089228655784e-06, "loss": 3.3261, "step": 147900 }, { "epoch": 0.14575435637555859, "grad_norm": 2.465961456298828, "learning_rate": 9.97907508802663e-06, "loss": 3.289, "step": 147950 }, { "epoch": 0.14580361435338068, "grad_norm": 2.276695966720581, "learning_rate": 9.979060942627911e-06, "loss": 3.3005, "step": 148000 }, { "epoch": 0.14585287233120275, "grad_norm": 2.5324413776397705, "learning_rate": 9.979046792459642e-06, "loss": 3.287, "step": 148050 }, { "epoch": 0.14590213030902485, "grad_norm": 2.4110751152038574, "learning_rate": 9.979032637521838e-06, "loss": 3.3059, "step": 148100 }, { "epoch": 0.14595138828684695, "grad_norm": 2.535853624343872, "learning_rate": 9.979018477814507e-06, "loss": 3.326, "step": 148150 }, { "epoch": 0.14600064626466902, "grad_norm": 2.435337543487549, "learning_rate": 9.979004313337668e-06, "loss": 3.2113, "step": 148200 }, { "epoch": 0.14604990424249112, "grad_norm": 2.5442047119140625, "learning_rate": 9.978990144091334e-06, "loss": 3.2839, "step": 148250 }, { "epoch": 0.1460991622203132, "grad_norm": 2.5221338272094727, "learning_rate": 9.978975970075515e-06, "loss": 3.3352, "step": 148300 }, { "epoch": 0.1461484201981353, "grad_norm": 2.4341981410980225, "learning_rate": 9.97896179129023e-06, "loss": 3.3155, "step": 148350 }, { "epoch": 0.14619767817595739, "grad_norm": 2.3526546955108643, "learning_rate": 9.978947607735485e-06, "loss": 3.3023, "step": 148400 }, { "epoch": 0.14624693615377946, "grad_norm": 2.400481700897217, "learning_rate": 9.978933419411302e-06, "loss": 3.2909, "step": 148450 }, { "epoch": 0.14629619413160155, "grad_norm": 2.4070205688476562, "learning_rate": 9.978919226317689e-06, "loss": 3.2199, "step": 148500 }, { "epoch": 0.14634545210942365, "grad_norm": 2.480579376220703, "learning_rate": 9.978905028454661e-06, "loss": 3.2443, "step": 148550 }, { "epoch": 0.14639471008724572, "grad_norm": 2.4894566535949707, "learning_rate": 9.978890825822232e-06, "loss": 3.2814, "step": 148600 }, { "epoch": 0.14644396806506782, "grad_norm": 2.521941900253296, "learning_rate": 9.978876618420416e-06, "loss": 3.3343, "step": 148650 }, { "epoch": 0.14649322604288992, "grad_norm": 2.449779748916626, "learning_rate": 9.978862406249227e-06, "loss": 3.3327, "step": 148700 }, { "epoch": 0.146542484020712, "grad_norm": 2.2670562267303467, "learning_rate": 9.978848189308677e-06, "loss": 3.3424, "step": 148750 }, { "epoch": 0.1465917419985341, "grad_norm": 2.377495527267456, "learning_rate": 9.97883396759878e-06, "loss": 3.2847, "step": 148800 }, { "epoch": 0.14664099997635618, "grad_norm": 2.3170268535614014, "learning_rate": 9.97881974111955e-06, "loss": 3.2765, "step": 148850 }, { "epoch": 0.14669025795417825, "grad_norm": 2.2896487712860107, "learning_rate": 9.978805509870999e-06, "loss": 3.288, "step": 148900 }, { "epoch": 0.14673951593200035, "grad_norm": 2.5009007453918457, "learning_rate": 9.978791273853145e-06, "loss": 3.2615, "step": 148950 }, { "epoch": 0.14678877390982242, "grad_norm": 3.0417041778564453, "learning_rate": 9.978777033065997e-06, "loss": 3.2558, "step": 149000 }, { "epoch": 0.14683803188764452, "grad_norm": 2.407058000564575, "learning_rate": 9.97876278750957e-06, "loss": 3.2533, "step": 149050 }, { "epoch": 0.14688728986546662, "grad_norm": 2.4789116382598877, "learning_rate": 9.97874853718388e-06, "loss": 3.3029, "step": 149100 }, { "epoch": 0.1469365478432887, "grad_norm": 2.379430055618286, "learning_rate": 9.978734282088937e-06, "loss": 3.3172, "step": 149150 }, { "epoch": 0.1469858058211108, "grad_norm": 2.349109172821045, "learning_rate": 9.978720022224758e-06, "loss": 3.2582, "step": 149200 }, { "epoch": 0.14703506379893289, "grad_norm": 2.450807571411133, "learning_rate": 9.978705757591354e-06, "loss": 3.2481, "step": 149250 }, { "epoch": 0.14708432177675496, "grad_norm": 2.4101028442382812, "learning_rate": 9.97869148818874e-06, "loss": 3.2536, "step": 149300 }, { "epoch": 0.14713357975457705, "grad_norm": 2.4175539016723633, "learning_rate": 9.97867721401693e-06, "loss": 3.3054, "step": 149350 }, { "epoch": 0.14718283773239915, "grad_norm": 2.233492136001587, "learning_rate": 9.978662935075936e-06, "loss": 3.3238, "step": 149400 }, { "epoch": 0.14723209571022122, "grad_norm": 2.4466259479522705, "learning_rate": 9.978648651365774e-06, "loss": 3.3276, "step": 149450 }, { "epoch": 0.14728135368804332, "grad_norm": 2.45829439163208, "learning_rate": 9.978634362886456e-06, "loss": 3.3413, "step": 149500 }, { "epoch": 0.1473306116658654, "grad_norm": 2.4345948696136475, "learning_rate": 9.978620069637997e-06, "loss": 3.3234, "step": 149550 }, { "epoch": 0.1473798696436875, "grad_norm": 2.7117388248443604, "learning_rate": 9.978605771620408e-06, "loss": 3.2672, "step": 149600 }, { "epoch": 0.1474291276215096, "grad_norm": 2.7042853832244873, "learning_rate": 9.978591468833705e-06, "loss": 3.3143, "step": 149650 }, { "epoch": 0.14747838559933166, "grad_norm": 2.5825064182281494, "learning_rate": 9.9785771612779e-06, "loss": 3.3432, "step": 149700 }, { "epoch": 0.14752764357715376, "grad_norm": 2.7188947200775146, "learning_rate": 9.97856284895301e-06, "loss": 3.3423, "step": 149750 }, { "epoch": 0.14757690155497585, "grad_norm": 2.5379478931427, "learning_rate": 9.978548531859047e-06, "loss": 3.3949, "step": 149800 }, { "epoch": 0.14762615953279792, "grad_norm": 2.414306640625, "learning_rate": 9.978534209996023e-06, "loss": 3.2833, "step": 149850 }, { "epoch": 0.14767541751062002, "grad_norm": 2.649200439453125, "learning_rate": 9.978519883363955e-06, "loss": 3.2816, "step": 149900 }, { "epoch": 0.14772467548844212, "grad_norm": 2.369764566421509, "learning_rate": 9.978505551962853e-06, "loss": 3.2828, "step": 149950 }, { "epoch": 0.1477739334662642, "grad_norm": 2.336366653442383, "learning_rate": 9.978491215792732e-06, "loss": 3.3031, "step": 150000 }, { "epoch": 0.1478231914440863, "grad_norm": 2.4197518825531006, "learning_rate": 9.97847687485361e-06, "loss": 3.2846, "step": 150050 }, { "epoch": 0.14787244942190836, "grad_norm": 2.4224517345428467, "learning_rate": 9.978462529145492e-06, "loss": 3.3006, "step": 150100 }, { "epoch": 0.14792170739973046, "grad_norm": 2.4161429405212402, "learning_rate": 9.978448178668399e-06, "loss": 3.3157, "step": 150150 }, { "epoch": 0.14797096537755255, "grad_norm": 2.299060344696045, "learning_rate": 9.978433823422342e-06, "loss": 3.293, "step": 150200 }, { "epoch": 0.14802022335537462, "grad_norm": 2.337463140487671, "learning_rate": 9.978419463407336e-06, "loss": 3.336, "step": 150250 }, { "epoch": 0.14806948133319672, "grad_norm": 2.403433084487915, "learning_rate": 9.978405098623393e-06, "loss": 3.2861, "step": 150300 }, { "epoch": 0.14811873931101882, "grad_norm": 2.5146632194519043, "learning_rate": 9.97839072907053e-06, "loss": 3.3833, "step": 150350 }, { "epoch": 0.1481679972888409, "grad_norm": 2.423896074295044, "learning_rate": 9.978376354748756e-06, "loss": 3.271, "step": 150400 }, { "epoch": 0.148217255266663, "grad_norm": 2.517756700515747, "learning_rate": 9.978361975658088e-06, "loss": 3.3072, "step": 150450 }, { "epoch": 0.1482665132444851, "grad_norm": 2.423351287841797, "learning_rate": 9.978347591798541e-06, "loss": 3.2788, "step": 150500 }, { "epoch": 0.14831577122230716, "grad_norm": 2.2556159496307373, "learning_rate": 9.978333203170123e-06, "loss": 3.3211, "step": 150550 }, { "epoch": 0.14836502920012926, "grad_norm": 2.3965258598327637, "learning_rate": 9.978318809772854e-06, "loss": 3.3346, "step": 150600 }, { "epoch": 0.14841428717795135, "grad_norm": 2.3876726627349854, "learning_rate": 9.978304411606745e-06, "loss": 3.2629, "step": 150650 }, { "epoch": 0.14846354515577342, "grad_norm": 2.68920636177063, "learning_rate": 9.978290008671811e-06, "loss": 3.3458, "step": 150700 }, { "epoch": 0.14851280313359552, "grad_norm": 2.1873795986175537, "learning_rate": 9.978275600968064e-06, "loss": 3.3014, "step": 150750 }, { "epoch": 0.1485620611114176, "grad_norm": 2.2328312397003174, "learning_rate": 9.97826118849552e-06, "loss": 3.2473, "step": 150800 }, { "epoch": 0.1486113190892397, "grad_norm": 2.8184449672698975, "learning_rate": 9.978246771254191e-06, "loss": 3.3464, "step": 150850 }, { "epoch": 0.1486605770670618, "grad_norm": 2.5048348903656006, "learning_rate": 9.97823234924409e-06, "loss": 3.3421, "step": 150900 }, { "epoch": 0.14870983504488386, "grad_norm": 2.426356077194214, "learning_rate": 9.978217922465235e-06, "loss": 3.284, "step": 150950 }, { "epoch": 0.14875909302270596, "grad_norm": 2.207216262817383, "learning_rate": 9.978203490917635e-06, "loss": 3.2471, "step": 151000 }, { "epoch": 0.14880835100052806, "grad_norm": 2.3343188762664795, "learning_rate": 9.978189054601306e-06, "loss": 3.2607, "step": 151050 }, { "epoch": 0.14885760897835013, "grad_norm": 2.44858717918396, "learning_rate": 9.978174613516263e-06, "loss": 3.3335, "step": 151100 }, { "epoch": 0.14890686695617222, "grad_norm": 2.4627461433410645, "learning_rate": 9.978160167662517e-06, "loss": 3.2515, "step": 151150 }, { "epoch": 0.14895612493399432, "grad_norm": 2.604360342025757, "learning_rate": 9.978145717040085e-06, "loss": 3.3334, "step": 151200 }, { "epoch": 0.1490053829118164, "grad_norm": 2.368360996246338, "learning_rate": 9.978131261648977e-06, "loss": 3.2695, "step": 151250 }, { "epoch": 0.1490546408896385, "grad_norm": 2.4292421340942383, "learning_rate": 9.978116801489211e-06, "loss": 3.3069, "step": 151300 }, { "epoch": 0.14910389886746056, "grad_norm": 2.3855268955230713, "learning_rate": 9.978102336560797e-06, "loss": 3.3683, "step": 151350 }, { "epoch": 0.14915315684528266, "grad_norm": 2.316948413848877, "learning_rate": 9.978087866863753e-06, "loss": 3.3332, "step": 151400 }, { "epoch": 0.14920241482310476, "grad_norm": 2.392646312713623, "learning_rate": 9.97807339239809e-06, "loss": 3.2646, "step": 151450 }, { "epoch": 0.14925167280092683, "grad_norm": 2.8090627193450928, "learning_rate": 9.978058913163822e-06, "loss": 3.3435, "step": 151500 }, { "epoch": 0.14930093077874892, "grad_norm": 2.486398220062256, "learning_rate": 9.978044429160963e-06, "loss": 3.3209, "step": 151550 }, { "epoch": 0.14935018875657102, "grad_norm": 2.2862391471862793, "learning_rate": 9.978029940389528e-06, "loss": 3.2569, "step": 151600 }, { "epoch": 0.1493994467343931, "grad_norm": 2.331777334213257, "learning_rate": 9.97801544684953e-06, "loss": 3.2582, "step": 151650 }, { "epoch": 0.1494487047122152, "grad_norm": 2.490126132965088, "learning_rate": 9.978000948540983e-06, "loss": 3.3346, "step": 151700 }, { "epoch": 0.1494979626900373, "grad_norm": 3.2594997882843018, "learning_rate": 9.9779864454639e-06, "loss": 3.2881, "step": 151750 }, { "epoch": 0.14954722066785936, "grad_norm": 2.4209935665130615, "learning_rate": 9.977971937618299e-06, "loss": 3.2753, "step": 151800 }, { "epoch": 0.14959647864568146, "grad_norm": 2.4318056106567383, "learning_rate": 9.977957425004188e-06, "loss": 3.2823, "step": 151850 }, { "epoch": 0.14964573662350356, "grad_norm": 2.597581624984741, "learning_rate": 9.977942907621583e-06, "loss": 3.289, "step": 151900 }, { "epoch": 0.14969499460132563, "grad_norm": 2.473238229751587, "learning_rate": 9.9779283854705e-06, "loss": 3.2445, "step": 151950 }, { "epoch": 0.14974425257914772, "grad_norm": 2.4434046745300293, "learning_rate": 9.97791385855095e-06, "loss": 3.2827, "step": 152000 }, { "epoch": 0.1497935105569698, "grad_norm": 2.627157688140869, "learning_rate": 9.97789932686295e-06, "loss": 3.3365, "step": 152050 }, { "epoch": 0.1498427685347919, "grad_norm": 2.139827013015747, "learning_rate": 9.97788479040651e-06, "loss": 3.3235, "step": 152100 }, { "epoch": 0.149892026512614, "grad_norm": 2.7437503337860107, "learning_rate": 9.97787024918165e-06, "loss": 3.3048, "step": 152150 }, { "epoch": 0.14994128449043606, "grad_norm": 2.331782102584839, "learning_rate": 9.977855703188377e-06, "loss": 3.3181, "step": 152200 }, { "epoch": 0.14999054246825816, "grad_norm": 2.710099220275879, "learning_rate": 9.977841152426708e-06, "loss": 3.2854, "step": 152250 }, { "epoch": 0.15003980044608026, "grad_norm": 2.3837549686431885, "learning_rate": 9.977826596896658e-06, "loss": 3.2619, "step": 152300 }, { "epoch": 0.15008905842390233, "grad_norm": 2.5437936782836914, "learning_rate": 9.97781203659824e-06, "loss": 3.235, "step": 152350 }, { "epoch": 0.15013831640172443, "grad_norm": 3.37489914894104, "learning_rate": 9.977797471531468e-06, "loss": 3.2802, "step": 152400 }, { "epoch": 0.15018757437954652, "grad_norm": 2.6280834674835205, "learning_rate": 9.977782901696354e-06, "loss": 3.2462, "step": 152450 }, { "epoch": 0.1502368323573686, "grad_norm": 2.301314115524292, "learning_rate": 9.977768327092917e-06, "loss": 3.2525, "step": 152500 }, { "epoch": 0.1502860903351907, "grad_norm": 2.7387471199035645, "learning_rate": 9.977753747721164e-06, "loss": 3.2933, "step": 152550 }, { "epoch": 0.15033534831301276, "grad_norm": 2.5955018997192383, "learning_rate": 9.977739163581115e-06, "loss": 3.2982, "step": 152600 }, { "epoch": 0.15038460629083486, "grad_norm": 2.530768394470215, "learning_rate": 9.977724574672781e-06, "loss": 3.2864, "step": 152650 }, { "epoch": 0.15043386426865696, "grad_norm": 2.3504109382629395, "learning_rate": 9.977709980996177e-06, "loss": 3.289, "step": 152700 }, { "epoch": 0.15048312224647903, "grad_norm": 2.2651355266571045, "learning_rate": 9.977695382551317e-06, "loss": 3.2959, "step": 152750 }, { "epoch": 0.15053238022430113, "grad_norm": 2.5837149620056152, "learning_rate": 9.977680779338214e-06, "loss": 3.3368, "step": 152800 }, { "epoch": 0.15058163820212322, "grad_norm": 2.345137357711792, "learning_rate": 9.977666171356882e-06, "loss": 3.2261, "step": 152850 }, { "epoch": 0.1506308961799453, "grad_norm": 2.2083981037139893, "learning_rate": 9.977651558607336e-06, "loss": 3.2318, "step": 152900 }, { "epoch": 0.1506801541577674, "grad_norm": 2.286292314529419, "learning_rate": 9.97763694108959e-06, "loss": 3.2576, "step": 152950 }, { "epoch": 0.1507294121355895, "grad_norm": 2.2421185970306396, "learning_rate": 9.977622318803658e-06, "loss": 3.2813, "step": 153000 }, { "epoch": 0.15077867011341156, "grad_norm": 2.309407949447632, "learning_rate": 9.977607691749553e-06, "loss": 3.3366, "step": 153050 }, { "epoch": 0.15082792809123366, "grad_norm": 2.35783314704895, "learning_rate": 9.97759305992729e-06, "loss": 3.3906, "step": 153100 }, { "epoch": 0.15087718606905576, "grad_norm": 2.681591510772705, "learning_rate": 9.977578423336883e-06, "loss": 3.2705, "step": 153150 }, { "epoch": 0.15092644404687783, "grad_norm": 2.5046284198760986, "learning_rate": 9.977563781978343e-06, "loss": 3.2609, "step": 153200 }, { "epoch": 0.15097570202469993, "grad_norm": 2.347092866897583, "learning_rate": 9.977549135851689e-06, "loss": 3.2569, "step": 153250 }, { "epoch": 0.151024960002522, "grad_norm": 2.471653461456299, "learning_rate": 9.977534484956932e-06, "loss": 3.273, "step": 153300 }, { "epoch": 0.1510742179803441, "grad_norm": 2.446086883544922, "learning_rate": 9.977519829294088e-06, "loss": 3.2916, "step": 153350 }, { "epoch": 0.1511234759581662, "grad_norm": 2.4059042930603027, "learning_rate": 9.977505168863168e-06, "loss": 3.3143, "step": 153400 }, { "epoch": 0.15117273393598826, "grad_norm": 2.59415864944458, "learning_rate": 9.97749050366419e-06, "loss": 3.3057, "step": 153450 }, { "epoch": 0.15122199191381036, "grad_norm": 2.501274347305298, "learning_rate": 9.977475833697163e-06, "loss": 3.2568, "step": 153500 }, { "epoch": 0.15127124989163246, "grad_norm": 2.516129970550537, "learning_rate": 9.977461158962106e-06, "loss": 3.3436, "step": 153550 }, { "epoch": 0.15132050786945453, "grad_norm": 2.435983896255493, "learning_rate": 9.97744647945903e-06, "loss": 3.2691, "step": 153600 }, { "epoch": 0.15136976584727663, "grad_norm": 2.6737070083618164, "learning_rate": 9.977431795187951e-06, "loss": 3.2848, "step": 153650 }, { "epoch": 0.15141902382509873, "grad_norm": 2.5116662979125977, "learning_rate": 9.977417106148882e-06, "loss": 3.3348, "step": 153700 }, { "epoch": 0.1514682818029208, "grad_norm": 2.5109643936157227, "learning_rate": 9.977402412341836e-06, "loss": 3.3156, "step": 153750 }, { "epoch": 0.1515175397807429, "grad_norm": 2.3035101890563965, "learning_rate": 9.97738771376683e-06, "loss": 3.2049, "step": 153800 }, { "epoch": 0.15156679775856496, "grad_norm": 2.3835597038269043, "learning_rate": 9.977373010423874e-06, "loss": 3.3418, "step": 153850 }, { "epoch": 0.15161605573638706, "grad_norm": 2.4332709312438965, "learning_rate": 9.977358302312986e-06, "loss": 3.2651, "step": 153900 }, { "epoch": 0.15166531371420916, "grad_norm": 2.4367105960845947, "learning_rate": 9.977343589434178e-06, "loss": 3.2972, "step": 153950 }, { "epoch": 0.15171457169203123, "grad_norm": 2.3251142501831055, "learning_rate": 9.977328871787467e-06, "loss": 3.3117, "step": 154000 }, { "epoch": 0.15176382966985333, "grad_norm": 2.31186842918396, "learning_rate": 9.977314149372861e-06, "loss": 3.3653, "step": 154050 }, { "epoch": 0.15181308764767543, "grad_norm": 2.3509304523468018, "learning_rate": 9.97729942219038e-06, "loss": 3.3388, "step": 154100 }, { "epoch": 0.1518623456254975, "grad_norm": 2.434520959854126, "learning_rate": 9.977284690240035e-06, "loss": 3.2536, "step": 154150 }, { "epoch": 0.1519116036033196, "grad_norm": 2.380880832672119, "learning_rate": 9.977269953521842e-06, "loss": 3.2944, "step": 154200 }, { "epoch": 0.1519608615811417, "grad_norm": 2.4723386764526367, "learning_rate": 9.977255212035812e-06, "loss": 3.2684, "step": 154250 }, { "epoch": 0.15201011955896376, "grad_norm": 2.5194687843322754, "learning_rate": 9.977240465781964e-06, "loss": 3.2823, "step": 154300 }, { "epoch": 0.15205937753678586, "grad_norm": 2.4119274616241455, "learning_rate": 9.977225714760308e-06, "loss": 3.2884, "step": 154350 }, { "epoch": 0.15210863551460796, "grad_norm": 2.4193413257598877, "learning_rate": 9.977210958970859e-06, "loss": 3.326, "step": 154400 }, { "epoch": 0.15215789349243003, "grad_norm": 2.2917275428771973, "learning_rate": 9.977196198413632e-06, "loss": 3.2287, "step": 154450 }, { "epoch": 0.15220715147025213, "grad_norm": 2.298164129257202, "learning_rate": 9.97718143308864e-06, "loss": 3.2516, "step": 154500 }, { "epoch": 0.1522564094480742, "grad_norm": 2.438342332839966, "learning_rate": 9.9771666629959e-06, "loss": 3.2583, "step": 154550 }, { "epoch": 0.1523056674258963, "grad_norm": 2.330731153488159, "learning_rate": 9.977151888135424e-06, "loss": 3.2195, "step": 154600 }, { "epoch": 0.1523549254037184, "grad_norm": 2.8021440505981445, "learning_rate": 9.977137108507224e-06, "loss": 3.3307, "step": 154650 }, { "epoch": 0.15240418338154046, "grad_norm": 2.300248622894287, "learning_rate": 9.977122324111316e-06, "loss": 3.2732, "step": 154700 }, { "epoch": 0.15245344135936256, "grad_norm": 2.3328301906585693, "learning_rate": 9.977107534947717e-06, "loss": 3.2772, "step": 154750 }, { "epoch": 0.15250269933718466, "grad_norm": 2.4833085536956787, "learning_rate": 9.977092741016438e-06, "loss": 3.2807, "step": 154800 }, { "epoch": 0.15255195731500673, "grad_norm": 2.4440841674804688, "learning_rate": 9.977077942317495e-06, "loss": 3.2053, "step": 154850 }, { "epoch": 0.15260121529282883, "grad_norm": 2.5936126708984375, "learning_rate": 9.9770631388509e-06, "loss": 3.3171, "step": 154900 }, { "epoch": 0.15265047327065093, "grad_norm": 2.4147815704345703, "learning_rate": 9.977048330616667e-06, "loss": 3.2485, "step": 154950 }, { "epoch": 0.152699731248473, "grad_norm": 3.1126468181610107, "learning_rate": 9.977033517614814e-06, "loss": 3.2606, "step": 155000 }, { "epoch": 0.1527489892262951, "grad_norm": 2.517970323562622, "learning_rate": 9.97701869984535e-06, "loss": 3.2546, "step": 155050 }, { "epoch": 0.15279824720411717, "grad_norm": 2.313511848449707, "learning_rate": 9.977003877308293e-06, "loss": 3.3184, "step": 155100 }, { "epoch": 0.15284750518193926, "grad_norm": 2.3873114585876465, "learning_rate": 9.976989050003656e-06, "loss": 3.2548, "step": 155150 }, { "epoch": 0.15289676315976136, "grad_norm": 2.8141448497772217, "learning_rate": 9.976974217931454e-06, "loss": 3.2496, "step": 155200 }, { "epoch": 0.15294602113758343, "grad_norm": 2.4146671295166016, "learning_rate": 9.9769593810917e-06, "loss": 3.2429, "step": 155250 }, { "epoch": 0.15299527911540553, "grad_norm": 2.491251230239868, "learning_rate": 9.976944539484408e-06, "loss": 3.3404, "step": 155300 }, { "epoch": 0.15304453709322763, "grad_norm": 2.4193804264068604, "learning_rate": 9.976929693109594e-06, "loss": 3.3084, "step": 155350 }, { "epoch": 0.1530937950710497, "grad_norm": 2.3924407958984375, "learning_rate": 9.97691484196727e-06, "loss": 3.2557, "step": 155400 }, { "epoch": 0.1531430530488718, "grad_norm": 2.333134174346924, "learning_rate": 9.97689998605745e-06, "loss": 3.2634, "step": 155450 }, { "epoch": 0.1531923110266939, "grad_norm": 2.806565284729004, "learning_rate": 9.976885125380151e-06, "loss": 3.2315, "step": 155500 }, { "epoch": 0.15324156900451597, "grad_norm": 2.208111047744751, "learning_rate": 9.976870259935388e-06, "loss": 3.2211, "step": 155550 }, { "epoch": 0.15329082698233806, "grad_norm": 2.738959312438965, "learning_rate": 9.97685538972317e-06, "loss": 3.2976, "step": 155600 }, { "epoch": 0.15334008496016016, "grad_norm": 2.4517202377319336, "learning_rate": 9.976840514743515e-06, "loss": 3.2367, "step": 155650 }, { "epoch": 0.15338934293798223, "grad_norm": 2.759275436401367, "learning_rate": 9.976825634996437e-06, "loss": 3.3007, "step": 155700 }, { "epoch": 0.15343860091580433, "grad_norm": 2.303468942642212, "learning_rate": 9.976810750481948e-06, "loss": 3.3229, "step": 155750 }, { "epoch": 0.1534878588936264, "grad_norm": 2.4701743125915527, "learning_rate": 9.976795861200067e-06, "loss": 3.2357, "step": 155800 }, { "epoch": 0.1535371168714485, "grad_norm": 2.5161702632904053, "learning_rate": 9.976780967150802e-06, "loss": 3.2847, "step": 155850 }, { "epoch": 0.1535863748492706, "grad_norm": 2.3651835918426514, "learning_rate": 9.976766068334174e-06, "loss": 3.2832, "step": 155900 }, { "epoch": 0.15363563282709267, "grad_norm": 2.305102825164795, "learning_rate": 9.976751164750192e-06, "loss": 3.2414, "step": 155950 }, { "epoch": 0.15368489080491476, "grad_norm": 2.340178966522217, "learning_rate": 9.976736256398871e-06, "loss": 3.2824, "step": 156000 }, { "epoch": 0.15373414878273686, "grad_norm": 2.452003002166748, "learning_rate": 9.976721343280229e-06, "loss": 3.2819, "step": 156050 }, { "epoch": 0.15378340676055893, "grad_norm": 2.4133219718933105, "learning_rate": 9.976706425394277e-06, "loss": 3.2682, "step": 156100 }, { "epoch": 0.15383266473838103, "grad_norm": 2.3131155967712402, "learning_rate": 9.976691502741026e-06, "loss": 3.2495, "step": 156150 }, { "epoch": 0.15388192271620313, "grad_norm": 2.45467209815979, "learning_rate": 9.976676575320499e-06, "loss": 3.2347, "step": 156200 }, { "epoch": 0.1539311806940252, "grad_norm": 2.390944719314575, "learning_rate": 9.976661643132703e-06, "loss": 3.2589, "step": 156250 }, { "epoch": 0.1539804386718473, "grad_norm": 2.56076717376709, "learning_rate": 9.976646706177658e-06, "loss": 3.2709, "step": 156300 }, { "epoch": 0.15402969664966937, "grad_norm": 2.5402331352233887, "learning_rate": 9.976631764455372e-06, "loss": 3.2341, "step": 156350 }, { "epoch": 0.15407895462749147, "grad_norm": 2.519803762435913, "learning_rate": 9.976616817965862e-06, "loss": 3.2426, "step": 156400 }, { "epoch": 0.15412821260531356, "grad_norm": 2.48053240776062, "learning_rate": 9.976601866709146e-06, "loss": 3.2325, "step": 156450 }, { "epoch": 0.15417747058313563, "grad_norm": 2.451362133026123, "learning_rate": 9.976586910685232e-06, "loss": 3.1817, "step": 156500 }, { "epoch": 0.15422672856095773, "grad_norm": 2.366558074951172, "learning_rate": 9.976571949894139e-06, "loss": 3.2684, "step": 156550 }, { "epoch": 0.15427598653877983, "grad_norm": 2.3876562118530273, "learning_rate": 9.97655698433588e-06, "loss": 3.2911, "step": 156600 }, { "epoch": 0.1543252445166019, "grad_norm": 2.2565135955810547, "learning_rate": 9.976542014010469e-06, "loss": 3.2218, "step": 156650 }, { "epoch": 0.154374502494424, "grad_norm": 2.590651512145996, "learning_rate": 9.97652703891792e-06, "loss": 3.2119, "step": 156700 }, { "epoch": 0.1544237604722461, "grad_norm": 2.3665263652801514, "learning_rate": 9.976512059058247e-06, "loss": 3.3024, "step": 156750 }, { "epoch": 0.15447301845006817, "grad_norm": 2.303668260574341, "learning_rate": 9.976497074431465e-06, "loss": 3.254, "step": 156800 }, { "epoch": 0.15452227642789026, "grad_norm": 2.3221631050109863, "learning_rate": 9.97648208503759e-06, "loss": 3.3035, "step": 156850 }, { "epoch": 0.15457153440571236, "grad_norm": 2.301067590713501, "learning_rate": 9.976467090876637e-06, "loss": 3.2774, "step": 156900 }, { "epoch": 0.15462079238353443, "grad_norm": 2.55307674407959, "learning_rate": 9.976452091948613e-06, "loss": 3.3182, "step": 156950 }, { "epoch": 0.15467005036135653, "grad_norm": 2.391819477081299, "learning_rate": 9.976437088253541e-06, "loss": 3.289, "step": 157000 }, { "epoch": 0.1547193083391786, "grad_norm": 2.4568984508514404, "learning_rate": 9.976422079791432e-06, "loss": 3.2943, "step": 157050 }, { "epoch": 0.1547685663170007, "grad_norm": 2.361287832260132, "learning_rate": 9.9764070665623e-06, "loss": 3.2702, "step": 157100 }, { "epoch": 0.1548178242948228, "grad_norm": 2.4821529388427734, "learning_rate": 9.976392048566158e-06, "loss": 3.2573, "step": 157150 }, { "epoch": 0.15486708227264487, "grad_norm": 2.383223533630371, "learning_rate": 9.976377025803023e-06, "loss": 3.2692, "step": 157200 }, { "epoch": 0.15491634025046697, "grad_norm": 2.370103597640991, "learning_rate": 9.97636199827291e-06, "loss": 3.2354, "step": 157250 }, { "epoch": 0.15496559822828906, "grad_norm": 2.358902931213379, "learning_rate": 9.97634696597583e-06, "loss": 3.2772, "step": 157300 }, { "epoch": 0.15501485620611113, "grad_norm": 2.5299248695373535, "learning_rate": 9.9763319289118e-06, "loss": 3.2773, "step": 157350 }, { "epoch": 0.15506411418393323, "grad_norm": 2.514885187149048, "learning_rate": 9.976316887080833e-06, "loss": 3.2201, "step": 157400 }, { "epoch": 0.15511337216175533, "grad_norm": 2.425171136856079, "learning_rate": 9.976301840482946e-06, "loss": 3.2306, "step": 157450 }, { "epoch": 0.1551626301395774, "grad_norm": 2.463677167892456, "learning_rate": 9.97628678911815e-06, "loss": 3.2793, "step": 157500 }, { "epoch": 0.1552118881173995, "grad_norm": 2.603466033935547, "learning_rate": 9.976271732986461e-06, "loss": 3.2586, "step": 157550 }, { "epoch": 0.15526114609522157, "grad_norm": 2.4229319095611572, "learning_rate": 9.976256672087893e-06, "loss": 3.3244, "step": 157600 }, { "epoch": 0.15531040407304367, "grad_norm": 2.3020153045654297, "learning_rate": 9.976241606422463e-06, "loss": 3.3079, "step": 157650 }, { "epoch": 0.15535966205086577, "grad_norm": 2.6308183670043945, "learning_rate": 9.976226535990181e-06, "loss": 3.2728, "step": 157700 }, { "epoch": 0.15540892002868784, "grad_norm": 2.3429410457611084, "learning_rate": 9.976211460791063e-06, "loss": 3.2405, "step": 157750 }, { "epoch": 0.15545817800650993, "grad_norm": 2.586076021194458, "learning_rate": 9.976196380825126e-06, "loss": 3.2596, "step": 157800 }, { "epoch": 0.15550743598433203, "grad_norm": 2.5446383953094482, "learning_rate": 9.976181296092382e-06, "loss": 3.2657, "step": 157850 }, { "epoch": 0.1555566939621541, "grad_norm": 2.7898037433624268, "learning_rate": 9.976166206592845e-06, "loss": 3.2571, "step": 157900 }, { "epoch": 0.1556059519399762, "grad_norm": 2.2260541915893555, "learning_rate": 9.976151112326532e-06, "loss": 3.3077, "step": 157950 }, { "epoch": 0.1556552099177983, "grad_norm": 2.329389810562134, "learning_rate": 9.976136013293455e-06, "loss": 3.32, "step": 158000 }, { "epoch": 0.15570446789562037, "grad_norm": 2.417072296142578, "learning_rate": 9.97612090949363e-06, "loss": 3.2907, "step": 158050 }, { "epoch": 0.15575372587344247, "grad_norm": 2.41607666015625, "learning_rate": 9.97610580092707e-06, "loss": 3.2577, "step": 158100 }, { "epoch": 0.15580298385126454, "grad_norm": 2.471095085144043, "learning_rate": 9.976090687593791e-06, "loss": 3.2543, "step": 158150 }, { "epoch": 0.15585224182908664, "grad_norm": 2.5231313705444336, "learning_rate": 9.976075569493806e-06, "loss": 3.2247, "step": 158200 }, { "epoch": 0.15590149980690873, "grad_norm": 2.5799996852874756, "learning_rate": 9.976060446627132e-06, "loss": 3.254, "step": 158250 }, { "epoch": 0.1559507577847308, "grad_norm": 2.439098834991455, "learning_rate": 9.976045318993779e-06, "loss": 3.247, "step": 158300 }, { "epoch": 0.1560000157625529, "grad_norm": 2.2421817779541016, "learning_rate": 9.976030186593767e-06, "loss": 3.2679, "step": 158350 }, { "epoch": 0.156049273740375, "grad_norm": 2.361891269683838, "learning_rate": 9.976015049427105e-06, "loss": 3.2192, "step": 158400 }, { "epoch": 0.15609853171819707, "grad_norm": 2.5467379093170166, "learning_rate": 9.975999907493811e-06, "loss": 3.2446, "step": 158450 }, { "epoch": 0.15614778969601917, "grad_norm": 2.68713116645813, "learning_rate": 9.975984760793899e-06, "loss": 3.286, "step": 158500 }, { "epoch": 0.15619704767384127, "grad_norm": 2.6045749187469482, "learning_rate": 9.975969609327385e-06, "loss": 3.2655, "step": 158550 }, { "epoch": 0.15624630565166334, "grad_norm": 2.1771633625030518, "learning_rate": 9.975954453094281e-06, "loss": 3.2546, "step": 158600 }, { "epoch": 0.15629556362948543, "grad_norm": 2.3881595134735107, "learning_rate": 9.975939292094601e-06, "loss": 3.193, "step": 158650 }, { "epoch": 0.15634482160730753, "grad_norm": 2.473228931427002, "learning_rate": 9.975924126328362e-06, "loss": 3.2561, "step": 158700 }, { "epoch": 0.1563940795851296, "grad_norm": 2.3984456062316895, "learning_rate": 9.975908955795577e-06, "loss": 3.3194, "step": 158750 }, { "epoch": 0.1564433375629517, "grad_norm": 2.685887336730957, "learning_rate": 9.97589378049626e-06, "loss": 3.3194, "step": 158800 }, { "epoch": 0.15649259554077377, "grad_norm": 2.4245779514312744, "learning_rate": 9.975878600430428e-06, "loss": 3.2909, "step": 158850 }, { "epoch": 0.15654185351859587, "grad_norm": 2.189951181411743, "learning_rate": 9.975863415598092e-06, "loss": 3.3106, "step": 158900 }, { "epoch": 0.15659111149641797, "grad_norm": 2.4818761348724365, "learning_rate": 9.97584822599927e-06, "loss": 3.2693, "step": 158950 }, { "epoch": 0.15664036947424004, "grad_norm": 2.373173236846924, "learning_rate": 9.975833031633974e-06, "loss": 3.2634, "step": 159000 }, { "epoch": 0.15668962745206214, "grad_norm": 2.3683037757873535, "learning_rate": 9.97581783250222e-06, "loss": 3.3133, "step": 159050 }, { "epoch": 0.15673888542988423, "grad_norm": 2.558323621749878, "learning_rate": 9.975802628604023e-06, "loss": 3.2526, "step": 159100 }, { "epoch": 0.1567881434077063, "grad_norm": 2.6029269695281982, "learning_rate": 9.975787419939396e-06, "loss": 3.3302, "step": 159150 }, { "epoch": 0.1568374013855284, "grad_norm": 2.276942253112793, "learning_rate": 9.975772206508354e-06, "loss": 3.2402, "step": 159200 }, { "epoch": 0.1568866593633505, "grad_norm": 2.692389726638794, "learning_rate": 9.975756988310912e-06, "loss": 3.2812, "step": 159250 }, { "epoch": 0.15693591734117257, "grad_norm": 2.3732175827026367, "learning_rate": 9.975741765347086e-06, "loss": 3.2499, "step": 159300 }, { "epoch": 0.15698517531899467, "grad_norm": 2.4103550910949707, "learning_rate": 9.975726537616888e-06, "loss": 3.3174, "step": 159350 }, { "epoch": 0.15703443329681674, "grad_norm": 2.1734941005706787, "learning_rate": 9.975711305120333e-06, "loss": 3.2564, "step": 159400 }, { "epoch": 0.15708369127463884, "grad_norm": 2.4341249465942383, "learning_rate": 9.975696067857436e-06, "loss": 3.3383, "step": 159450 }, { "epoch": 0.15713294925246094, "grad_norm": 2.3326504230499268, "learning_rate": 9.975680825828213e-06, "loss": 3.2316, "step": 159500 }, { "epoch": 0.157182207230283, "grad_norm": 2.4679226875305176, "learning_rate": 9.975665579032676e-06, "loss": 3.3058, "step": 159550 }, { "epoch": 0.1572314652081051, "grad_norm": 2.559429407119751, "learning_rate": 9.975650327470842e-06, "loss": 3.2462, "step": 159600 }, { "epoch": 0.1572807231859272, "grad_norm": 2.2775092124938965, "learning_rate": 9.975635071142725e-06, "loss": 3.279, "step": 159650 }, { "epoch": 0.15732998116374927, "grad_norm": 2.461078643798828, "learning_rate": 9.975619810048338e-06, "loss": 3.2095, "step": 159700 }, { "epoch": 0.15737923914157137, "grad_norm": 2.501760721206665, "learning_rate": 9.975604544187697e-06, "loss": 3.2993, "step": 159750 }, { "epoch": 0.15742849711939347, "grad_norm": 2.6077377796173096, "learning_rate": 9.975589273560816e-06, "loss": 3.2149, "step": 159800 }, { "epoch": 0.15747775509721554, "grad_norm": 2.513956308364868, "learning_rate": 9.97557399816771e-06, "loss": 3.2562, "step": 159850 }, { "epoch": 0.15752701307503764, "grad_norm": 2.3805456161499023, "learning_rate": 9.975558718008396e-06, "loss": 3.2899, "step": 159900 }, { "epoch": 0.15757627105285973, "grad_norm": 2.356175184249878, "learning_rate": 9.975543433082886e-06, "loss": 3.2726, "step": 159950 }, { "epoch": 0.1576255290306818, "grad_norm": 2.30304217338562, "learning_rate": 9.975528143391193e-06, "loss": 3.2843, "step": 160000 }, { "epoch": 0.1576747870085039, "grad_norm": 2.495638847351074, "learning_rate": 9.975512848933336e-06, "loss": 3.3027, "step": 160050 }, { "epoch": 0.15772404498632597, "grad_norm": 2.4209985733032227, "learning_rate": 9.975497549709326e-06, "loss": 3.2801, "step": 160100 }, { "epoch": 0.15777330296414807, "grad_norm": 2.4127249717712402, "learning_rate": 9.97548224571918e-06, "loss": 3.2352, "step": 160150 }, { "epoch": 0.15782256094197017, "grad_norm": 2.3141283988952637, "learning_rate": 9.97546693696291e-06, "loss": 3.2455, "step": 160200 }, { "epoch": 0.15787181891979224, "grad_norm": 2.4455366134643555, "learning_rate": 9.975451623440533e-06, "loss": 3.2768, "step": 160250 }, { "epoch": 0.15792107689761434, "grad_norm": 2.34306001663208, "learning_rate": 9.975436305152062e-06, "loss": 3.2844, "step": 160300 }, { "epoch": 0.15797033487543644, "grad_norm": 2.4881815910339355, "learning_rate": 9.975420982097513e-06, "loss": 3.2288, "step": 160350 }, { "epoch": 0.1580195928532585, "grad_norm": 2.367155075073242, "learning_rate": 9.975405654276902e-06, "loss": 3.2429, "step": 160400 }, { "epoch": 0.1580688508310806, "grad_norm": 2.293860912322998, "learning_rate": 9.975390321690239e-06, "loss": 3.2559, "step": 160450 }, { "epoch": 0.1581181088089027, "grad_norm": 2.5184576511383057, "learning_rate": 9.975374984337545e-06, "loss": 3.1892, "step": 160500 }, { "epoch": 0.15816736678672477, "grad_norm": 2.3683156967163086, "learning_rate": 9.97535964221883e-06, "loss": 3.268, "step": 160550 }, { "epoch": 0.15821662476454687, "grad_norm": 2.399445056915283, "learning_rate": 9.975344295334108e-06, "loss": 3.261, "step": 160600 }, { "epoch": 0.15826588274236894, "grad_norm": 2.7420685291290283, "learning_rate": 9.975328943683399e-06, "loss": 3.2957, "step": 160650 }, { "epoch": 0.15831514072019104, "grad_norm": 2.550940990447998, "learning_rate": 9.975313587266712e-06, "loss": 3.2946, "step": 160700 }, { "epoch": 0.15836439869801314, "grad_norm": 2.3411035537719727, "learning_rate": 9.975298226084066e-06, "loss": 3.2642, "step": 160750 }, { "epoch": 0.1584136566758352, "grad_norm": 2.8769683837890625, "learning_rate": 9.975282860135473e-06, "loss": 3.2598, "step": 160800 }, { "epoch": 0.1584629146536573, "grad_norm": 2.3809103965759277, "learning_rate": 9.975267489420948e-06, "loss": 3.2752, "step": 160850 }, { "epoch": 0.1585121726314794, "grad_norm": 2.5599234104156494, "learning_rate": 9.975252113940508e-06, "loss": 3.2836, "step": 160900 }, { "epoch": 0.15856143060930147, "grad_norm": 2.575878858566284, "learning_rate": 9.975236733694165e-06, "loss": 3.2309, "step": 160950 }, { "epoch": 0.15861068858712357, "grad_norm": 2.320521593093872, "learning_rate": 9.975221348681936e-06, "loss": 3.2435, "step": 161000 }, { "epoch": 0.15865994656494567, "grad_norm": 2.399873733520508, "learning_rate": 9.975205958903834e-06, "loss": 3.2466, "step": 161050 }, { "epoch": 0.15870920454276774, "grad_norm": 2.5881803035736084, "learning_rate": 9.975190564359874e-06, "loss": 3.2574, "step": 161100 }, { "epoch": 0.15875846252058984, "grad_norm": 2.3451972007751465, "learning_rate": 9.97517516505007e-06, "loss": 3.1979, "step": 161150 }, { "epoch": 0.15880772049841194, "grad_norm": 2.441636323928833, "learning_rate": 9.975159760974437e-06, "loss": 3.3197, "step": 161200 }, { "epoch": 0.158856978476234, "grad_norm": 2.2847506999969482, "learning_rate": 9.975144352132993e-06, "loss": 3.2379, "step": 161250 }, { "epoch": 0.1589062364540561, "grad_norm": 2.575822353363037, "learning_rate": 9.97512893852575e-06, "loss": 3.2633, "step": 161300 }, { "epoch": 0.15895549443187817, "grad_norm": 2.2760426998138428, "learning_rate": 9.975113520152722e-06, "loss": 3.329, "step": 161350 }, { "epoch": 0.15900475240970027, "grad_norm": 2.451220989227295, "learning_rate": 9.975098097013926e-06, "loss": 3.1977, "step": 161400 }, { "epoch": 0.15905401038752237, "grad_norm": 3.0568602085113525, "learning_rate": 9.975082669109375e-06, "loss": 3.2664, "step": 161450 }, { "epoch": 0.15910326836534444, "grad_norm": 2.5147864818573, "learning_rate": 9.975067236439085e-06, "loss": 3.2469, "step": 161500 }, { "epoch": 0.15915252634316654, "grad_norm": 2.6028192043304443, "learning_rate": 9.975051799003069e-06, "loss": 3.2653, "step": 161550 }, { "epoch": 0.15920178432098864, "grad_norm": 2.234363079071045, "learning_rate": 9.975036356801344e-06, "loss": 3.2553, "step": 161600 }, { "epoch": 0.1592510422988107, "grad_norm": 2.45231556892395, "learning_rate": 9.975020909833923e-06, "loss": 3.2516, "step": 161650 }, { "epoch": 0.1593003002766328, "grad_norm": 2.5472731590270996, "learning_rate": 9.975005458100821e-06, "loss": 3.2428, "step": 161700 }, { "epoch": 0.1593495582544549, "grad_norm": 2.3638665676116943, "learning_rate": 9.974990001602054e-06, "loss": 3.2315, "step": 161750 }, { "epoch": 0.15939881623227697, "grad_norm": 2.5635008811950684, "learning_rate": 9.974974540337636e-06, "loss": 3.2552, "step": 161800 }, { "epoch": 0.15944807421009907, "grad_norm": 2.347587823867798, "learning_rate": 9.974959074307583e-06, "loss": 3.2654, "step": 161850 }, { "epoch": 0.15949733218792114, "grad_norm": 2.297929048538208, "learning_rate": 9.97494360351191e-06, "loss": 3.2206, "step": 161900 }, { "epoch": 0.15954659016574324, "grad_norm": 2.5409791469573975, "learning_rate": 9.974928127950627e-06, "loss": 3.2447, "step": 161950 }, { "epoch": 0.15959584814356534, "grad_norm": 2.4456892013549805, "learning_rate": 9.974912647623754e-06, "loss": 3.2503, "step": 162000 }, { "epoch": 0.1596451061213874, "grad_norm": 2.463228464126587, "learning_rate": 9.974897162531304e-06, "loss": 3.3117, "step": 162050 }, { "epoch": 0.1596943640992095, "grad_norm": 2.5095057487487793, "learning_rate": 9.974881672673293e-06, "loss": 3.2543, "step": 162100 }, { "epoch": 0.1597436220770316, "grad_norm": 2.442728042602539, "learning_rate": 9.974866178049734e-06, "loss": 3.218, "step": 162150 }, { "epoch": 0.15979288005485368, "grad_norm": 2.33260178565979, "learning_rate": 9.974850678660644e-06, "loss": 3.2395, "step": 162200 }, { "epoch": 0.15984213803267577, "grad_norm": 2.700822591781616, "learning_rate": 9.974835174506035e-06, "loss": 3.2238, "step": 162250 }, { "epoch": 0.15989139601049787, "grad_norm": 2.4309475421905518, "learning_rate": 9.974819665585923e-06, "loss": 3.2515, "step": 162300 }, { "epoch": 0.15994065398831994, "grad_norm": 2.7000277042388916, "learning_rate": 9.974804151900325e-06, "loss": 3.2912, "step": 162350 }, { "epoch": 0.15998991196614204, "grad_norm": 2.5332977771759033, "learning_rate": 9.974788633449255e-06, "loss": 3.2354, "step": 162400 }, { "epoch": 0.16003916994396414, "grad_norm": 2.52994966506958, "learning_rate": 9.974773110232725e-06, "loss": 3.2703, "step": 162450 }, { "epoch": 0.1600884279217862, "grad_norm": 2.4180970191955566, "learning_rate": 9.974757582250753e-06, "loss": 3.258, "step": 162500 }, { "epoch": 0.1601376858996083, "grad_norm": 2.184600830078125, "learning_rate": 9.974742049503353e-06, "loss": 3.2045, "step": 162550 }, { "epoch": 0.16018694387743038, "grad_norm": 2.323749303817749, "learning_rate": 9.974726511990538e-06, "loss": 3.2194, "step": 162600 }, { "epoch": 0.16023620185525247, "grad_norm": 2.2821106910705566, "learning_rate": 9.974710969712326e-06, "loss": 3.2347, "step": 162650 }, { "epoch": 0.16028545983307457, "grad_norm": 2.6876988410949707, "learning_rate": 9.97469542266873e-06, "loss": 3.1942, "step": 162700 }, { "epoch": 0.16033471781089664, "grad_norm": 2.5631306171417236, "learning_rate": 9.974679870859764e-06, "loss": 3.2672, "step": 162750 }, { "epoch": 0.16038397578871874, "grad_norm": 2.278031826019287, "learning_rate": 9.974664314285447e-06, "loss": 3.2874, "step": 162800 }, { "epoch": 0.16043323376654084, "grad_norm": 2.370387554168701, "learning_rate": 9.97464875294579e-06, "loss": 3.2074, "step": 162850 }, { "epoch": 0.1604824917443629, "grad_norm": 2.5671796798706055, "learning_rate": 9.974633186840809e-06, "loss": 3.2763, "step": 162900 }, { "epoch": 0.160531749722185, "grad_norm": 2.334472894668579, "learning_rate": 9.974617615970519e-06, "loss": 3.2144, "step": 162950 }, { "epoch": 0.1605810077000071, "grad_norm": 2.337481737136841, "learning_rate": 9.974602040334935e-06, "loss": 3.1672, "step": 163000 }, { "epoch": 0.16063026567782918, "grad_norm": 2.287097454071045, "learning_rate": 9.97458645993407e-06, "loss": 3.2364, "step": 163050 }, { "epoch": 0.16067952365565127, "grad_norm": 2.4239590167999268, "learning_rate": 9.974570874767942e-06, "loss": 3.3133, "step": 163100 }, { "epoch": 0.16072878163347334, "grad_norm": 2.4097015857696533, "learning_rate": 9.974555284836566e-06, "loss": 3.2568, "step": 163150 }, { "epoch": 0.16077803961129544, "grad_norm": 2.3915295600891113, "learning_rate": 9.974539690139954e-06, "loss": 3.2289, "step": 163200 }, { "epoch": 0.16082729758911754, "grad_norm": 2.7900233268737793, "learning_rate": 9.974524090678123e-06, "loss": 3.2097, "step": 163250 }, { "epoch": 0.1608765555669396, "grad_norm": 2.655670404434204, "learning_rate": 9.974508486451087e-06, "loss": 3.2298, "step": 163300 }, { "epoch": 0.1609258135447617, "grad_norm": 2.619340419769287, "learning_rate": 9.974492877458863e-06, "loss": 3.2728, "step": 163350 }, { "epoch": 0.1609750715225838, "grad_norm": 2.752850294113159, "learning_rate": 9.974477263701461e-06, "loss": 3.2741, "step": 163400 }, { "epoch": 0.16102432950040588, "grad_norm": 2.250701427459717, "learning_rate": 9.974461645178903e-06, "loss": 3.287, "step": 163450 }, { "epoch": 0.16107358747822798, "grad_norm": 2.434692859649658, "learning_rate": 9.974446021891197e-06, "loss": 3.2272, "step": 163500 }, { "epoch": 0.16112284545605007, "grad_norm": 2.444840908050537, "learning_rate": 9.974430393838363e-06, "loss": 3.2033, "step": 163550 }, { "epoch": 0.16117210343387214, "grad_norm": 2.3131494522094727, "learning_rate": 9.974414761020416e-06, "loss": 3.225, "step": 163600 }, { "epoch": 0.16122136141169424, "grad_norm": 2.7039568424224854, "learning_rate": 9.974399123437367e-06, "loss": 3.2521, "step": 163650 }, { "epoch": 0.16127061938951634, "grad_norm": 2.6730096340179443, "learning_rate": 9.974383481089234e-06, "loss": 3.2567, "step": 163700 }, { "epoch": 0.1613198773673384, "grad_norm": 2.6406149864196777, "learning_rate": 9.974367833976031e-06, "loss": 3.2566, "step": 163750 }, { "epoch": 0.1613691353451605, "grad_norm": 2.492889881134033, "learning_rate": 9.974352182097771e-06, "loss": 3.2304, "step": 163800 }, { "epoch": 0.16141839332298258, "grad_norm": 2.5191712379455566, "learning_rate": 9.974336525454474e-06, "loss": 3.235, "step": 163850 }, { "epoch": 0.16146765130080468, "grad_norm": 2.555583953857422, "learning_rate": 9.974320864046151e-06, "loss": 3.188, "step": 163900 }, { "epoch": 0.16151690927862677, "grad_norm": 2.5691208839416504, "learning_rate": 9.974305197872818e-06, "loss": 3.2567, "step": 163950 }, { "epoch": 0.16156616725644884, "grad_norm": 2.5336766242980957, "learning_rate": 9.97428952693449e-06, "loss": 3.1847, "step": 164000 }, { "epoch": 0.16161542523427094, "grad_norm": 2.3060240745544434, "learning_rate": 9.974273851231183e-06, "loss": 3.1885, "step": 164050 }, { "epoch": 0.16166468321209304, "grad_norm": 2.2539544105529785, "learning_rate": 9.974258170762912e-06, "loss": 3.2804, "step": 164100 }, { "epoch": 0.1617139411899151, "grad_norm": 2.66815185546875, "learning_rate": 9.974242485529689e-06, "loss": 3.1809, "step": 164150 }, { "epoch": 0.1617631991677372, "grad_norm": 2.692131757736206, "learning_rate": 9.974226795531533e-06, "loss": 3.2172, "step": 164200 }, { "epoch": 0.1618124571455593, "grad_norm": 2.503390073776245, "learning_rate": 9.974211100768456e-06, "loss": 3.2218, "step": 164250 }, { "epoch": 0.16186171512338138, "grad_norm": 2.422560453414917, "learning_rate": 9.974195401240474e-06, "loss": 3.1958, "step": 164300 }, { "epoch": 0.16191097310120348, "grad_norm": 2.437238931655884, "learning_rate": 9.974179696947604e-06, "loss": 3.298, "step": 164350 }, { "epoch": 0.16196023107902555, "grad_norm": 2.9633736610412598, "learning_rate": 9.974163987889858e-06, "loss": 3.2335, "step": 164400 }, { "epoch": 0.16200948905684764, "grad_norm": 2.436521530151367, "learning_rate": 9.974148274067252e-06, "loss": 3.2471, "step": 164450 }, { "epoch": 0.16205874703466974, "grad_norm": 2.5256268978118896, "learning_rate": 9.974132555479801e-06, "loss": 3.2861, "step": 164500 }, { "epoch": 0.1621080050124918, "grad_norm": 2.4144959449768066, "learning_rate": 9.974116832127523e-06, "loss": 3.2956, "step": 164550 }, { "epoch": 0.1621572629903139, "grad_norm": 2.510390043258667, "learning_rate": 9.97410110401043e-06, "loss": 3.3113, "step": 164600 }, { "epoch": 0.162206520968136, "grad_norm": 2.3978402614593506, "learning_rate": 9.974085371128535e-06, "loss": 3.2662, "step": 164650 }, { "epoch": 0.16225577894595808, "grad_norm": 2.358368396759033, "learning_rate": 9.974069633481859e-06, "loss": 3.2522, "step": 164700 }, { "epoch": 0.16230503692378018, "grad_norm": 2.4287681579589844, "learning_rate": 9.974053891070412e-06, "loss": 3.2661, "step": 164750 }, { "epoch": 0.16235429490160228, "grad_norm": 2.3747003078460693, "learning_rate": 9.974038143894212e-06, "loss": 3.3153, "step": 164800 }, { "epoch": 0.16240355287942435, "grad_norm": 2.2973694801330566, "learning_rate": 9.974022391953271e-06, "loss": 3.217, "step": 164850 }, { "epoch": 0.16245281085724644, "grad_norm": 2.376401901245117, "learning_rate": 9.974006635247608e-06, "loss": 3.2057, "step": 164900 }, { "epoch": 0.1625020688350685, "grad_norm": 2.4252877235412598, "learning_rate": 9.973990873777236e-06, "loss": 3.2184, "step": 164950 }, { "epoch": 0.1625513268128906, "grad_norm": 2.3587210178375244, "learning_rate": 9.973975107542169e-06, "loss": 3.2529, "step": 165000 }, { "epoch": 0.1626005847907127, "grad_norm": 2.354672431945801, "learning_rate": 9.973959336542425e-06, "loss": 3.2774, "step": 165050 }, { "epoch": 0.16264984276853478, "grad_norm": 2.5408666133880615, "learning_rate": 9.973943560778016e-06, "loss": 3.2704, "step": 165100 }, { "epoch": 0.16269910074635688, "grad_norm": 2.3218014240264893, "learning_rate": 9.973927780248958e-06, "loss": 3.1652, "step": 165150 }, { "epoch": 0.16274835872417898, "grad_norm": 2.471491575241089, "learning_rate": 9.973911994955267e-06, "loss": 3.2896, "step": 165200 }, { "epoch": 0.16279761670200105, "grad_norm": 2.339040756225586, "learning_rate": 9.97389620489696e-06, "loss": 3.2435, "step": 165250 }, { "epoch": 0.16284687467982314, "grad_norm": 2.364563226699829, "learning_rate": 9.973880410074049e-06, "loss": 3.232, "step": 165300 }, { "epoch": 0.16289613265764524, "grad_norm": 2.2789087295532227, "learning_rate": 9.973864610486548e-06, "loss": 3.2473, "step": 165350 }, { "epoch": 0.1629453906354673, "grad_norm": 2.5302774906158447, "learning_rate": 9.973848806134477e-06, "loss": 3.2654, "step": 165400 }, { "epoch": 0.1629946486132894, "grad_norm": 2.4698848724365234, "learning_rate": 9.973832997017847e-06, "loss": 3.215, "step": 165450 }, { "epoch": 0.1630439065911115, "grad_norm": 2.4532675743103027, "learning_rate": 9.973817183136674e-06, "loss": 3.2503, "step": 165500 }, { "epoch": 0.16309316456893358, "grad_norm": 2.4959073066711426, "learning_rate": 9.973801364490973e-06, "loss": 3.2812, "step": 165550 }, { "epoch": 0.16314242254675568, "grad_norm": 2.468905448913574, "learning_rate": 9.973785541080762e-06, "loss": 3.1966, "step": 165600 }, { "epoch": 0.16319168052457775, "grad_norm": 2.474400758743286, "learning_rate": 9.973769712906054e-06, "loss": 3.3249, "step": 165650 }, { "epoch": 0.16324093850239985, "grad_norm": 2.4777016639709473, "learning_rate": 9.973753879966861e-06, "loss": 3.2167, "step": 165700 }, { "epoch": 0.16329019648022194, "grad_norm": 2.321035861968994, "learning_rate": 9.973738042263203e-06, "loss": 3.2395, "step": 165750 }, { "epoch": 0.16333945445804401, "grad_norm": 2.303624391555786, "learning_rate": 9.973722199795095e-06, "loss": 3.2329, "step": 165800 }, { "epoch": 0.1633887124358661, "grad_norm": 2.3610687255859375, "learning_rate": 9.973706352562549e-06, "loss": 3.1736, "step": 165850 }, { "epoch": 0.1634379704136882, "grad_norm": 2.414618968963623, "learning_rate": 9.973690500565582e-06, "loss": 3.2749, "step": 165900 }, { "epoch": 0.16348722839151028, "grad_norm": 2.5510413646698, "learning_rate": 9.973674643804208e-06, "loss": 3.1891, "step": 165950 }, { "epoch": 0.16353648636933238, "grad_norm": 2.2854127883911133, "learning_rate": 9.973658782278445e-06, "loss": 3.2653, "step": 166000 }, { "epoch": 0.16358574434715448, "grad_norm": 2.2825701236724854, "learning_rate": 9.973642915988306e-06, "loss": 3.2607, "step": 166050 }, { "epoch": 0.16363500232497655, "grad_norm": 2.5323050022125244, "learning_rate": 9.973627044933806e-06, "loss": 3.2294, "step": 166100 }, { "epoch": 0.16368426030279865, "grad_norm": 2.3168442249298096, "learning_rate": 9.97361116911496e-06, "loss": 3.2706, "step": 166150 }, { "epoch": 0.16373351828062072, "grad_norm": 2.4082138538360596, "learning_rate": 9.973595288531785e-06, "loss": 3.2644, "step": 166200 }, { "epoch": 0.1637827762584428, "grad_norm": 2.4204163551330566, "learning_rate": 9.973579403184294e-06, "loss": 3.2273, "step": 166250 }, { "epoch": 0.1638320342362649, "grad_norm": 2.5267386436462402, "learning_rate": 9.973563513072506e-06, "loss": 3.2145, "step": 166300 }, { "epoch": 0.16388129221408698, "grad_norm": 2.5250329971313477, "learning_rate": 9.973547618196431e-06, "loss": 3.3321, "step": 166350 }, { "epoch": 0.16393055019190908, "grad_norm": 2.3203165531158447, "learning_rate": 9.97353171855609e-06, "loss": 3.253, "step": 166400 }, { "epoch": 0.16397980816973118, "grad_norm": 2.511007070541382, "learning_rate": 9.97351581415149e-06, "loss": 3.251, "step": 166450 }, { "epoch": 0.16402906614755325, "grad_norm": 2.4214298725128174, "learning_rate": 9.973499904982655e-06, "loss": 3.2116, "step": 166500 }, { "epoch": 0.16407832412537535, "grad_norm": 2.253725528717041, "learning_rate": 9.973483991049596e-06, "loss": 3.2826, "step": 166550 }, { "epoch": 0.16412758210319744, "grad_norm": 2.6337883472442627, "learning_rate": 9.973468072352327e-06, "loss": 3.261, "step": 166600 }, { "epoch": 0.16417684008101951, "grad_norm": 3.005277156829834, "learning_rate": 9.973452148890868e-06, "loss": 3.2677, "step": 166650 }, { "epoch": 0.1642260980588416, "grad_norm": 2.4036245346069336, "learning_rate": 9.97343622066523e-06, "loss": 3.245, "step": 166700 }, { "epoch": 0.1642753560366637, "grad_norm": 2.2724854946136475, "learning_rate": 9.97342028767543e-06, "loss": 3.2354, "step": 166750 }, { "epoch": 0.16432461401448578, "grad_norm": 2.4631028175354004, "learning_rate": 9.973404349921482e-06, "loss": 3.271, "step": 166800 }, { "epoch": 0.16437387199230788, "grad_norm": 2.475844621658325, "learning_rate": 9.973388407403402e-06, "loss": 3.2628, "step": 166850 }, { "epoch": 0.16442312997012995, "grad_norm": 2.3671717643737793, "learning_rate": 9.973372460121206e-06, "loss": 3.2566, "step": 166900 }, { "epoch": 0.16447238794795205, "grad_norm": 2.570286989212036, "learning_rate": 9.973356508074907e-06, "loss": 3.2782, "step": 166950 }, { "epoch": 0.16452164592577415, "grad_norm": 2.316213846206665, "learning_rate": 9.973340551264525e-06, "loss": 3.2138, "step": 167000 }, { "epoch": 0.16457090390359622, "grad_norm": 2.358488082885742, "learning_rate": 9.97332458969007e-06, "loss": 3.2498, "step": 167050 }, { "epoch": 0.16462016188141831, "grad_norm": 2.455321788787842, "learning_rate": 9.97330862335156e-06, "loss": 3.2599, "step": 167100 }, { "epoch": 0.1646694198592404, "grad_norm": 2.295921564102173, "learning_rate": 9.97329265224901e-06, "loss": 3.2884, "step": 167150 }, { "epoch": 0.16471867783706248, "grad_norm": 2.2220418453216553, "learning_rate": 9.973276676382433e-06, "loss": 3.2416, "step": 167200 }, { "epoch": 0.16476793581488458, "grad_norm": 2.505247116088867, "learning_rate": 9.97326069575185e-06, "loss": 3.2174, "step": 167250 }, { "epoch": 0.16481719379270668, "grad_norm": 2.403751850128174, "learning_rate": 9.973244710357271e-06, "loss": 3.2767, "step": 167300 }, { "epoch": 0.16486645177052875, "grad_norm": 2.3857574462890625, "learning_rate": 9.973228720198713e-06, "loss": 3.2768, "step": 167350 }, { "epoch": 0.16491570974835085, "grad_norm": 2.4741947650909424, "learning_rate": 9.97321272527619e-06, "loss": 3.2976, "step": 167400 }, { "epoch": 0.16496496772617292, "grad_norm": 2.4477157592773438, "learning_rate": 9.97319672558972e-06, "loss": 3.2816, "step": 167450 }, { "epoch": 0.16501422570399502, "grad_norm": 2.4478719234466553, "learning_rate": 9.973180721139318e-06, "loss": 3.203, "step": 167500 }, { "epoch": 0.1650634836818171, "grad_norm": 2.3998541831970215, "learning_rate": 9.973164711924996e-06, "loss": 3.2041, "step": 167550 }, { "epoch": 0.16511274165963918, "grad_norm": 2.541332960128784, "learning_rate": 9.973148697946772e-06, "loss": 3.1557, "step": 167600 }, { "epoch": 0.16516199963746128, "grad_norm": 2.291151285171509, "learning_rate": 9.973132679204662e-06, "loss": 3.2109, "step": 167650 }, { "epoch": 0.16521125761528338, "grad_norm": 2.589271068572998, "learning_rate": 9.97311665569868e-06, "loss": 3.2819, "step": 167700 }, { "epoch": 0.16526051559310545, "grad_norm": 2.297220468521118, "learning_rate": 9.973100627428842e-06, "loss": 3.179, "step": 167750 }, { "epoch": 0.16530977357092755, "grad_norm": 2.5937283039093018, "learning_rate": 9.973084594395163e-06, "loss": 3.2171, "step": 167800 }, { "epoch": 0.16535903154874965, "grad_norm": 2.7462685108184814, "learning_rate": 9.973068556597659e-06, "loss": 3.1943, "step": 167850 }, { "epoch": 0.16540828952657172, "grad_norm": 2.5025150775909424, "learning_rate": 9.973052514036343e-06, "loss": 3.1982, "step": 167900 }, { "epoch": 0.16545754750439381, "grad_norm": 2.4069015979766846, "learning_rate": 9.973036466711232e-06, "loss": 3.3, "step": 167950 }, { "epoch": 0.1655068054822159, "grad_norm": 2.2212586402893066, "learning_rate": 9.973020414622344e-06, "loss": 3.2683, "step": 168000 }, { "epoch": 0.16555606346003798, "grad_norm": 2.361391067504883, "learning_rate": 9.973004357769688e-06, "loss": 3.2019, "step": 168050 }, { "epoch": 0.16560532143786008, "grad_norm": 2.3977742195129395, "learning_rate": 9.972988296153288e-06, "loss": 3.221, "step": 168100 }, { "epoch": 0.16565457941568215, "grad_norm": 2.4179306030273438, "learning_rate": 9.972972229773151e-06, "loss": 3.228, "step": 168150 }, { "epoch": 0.16570383739350425, "grad_norm": 2.4009392261505127, "learning_rate": 9.972956158629297e-06, "loss": 3.22, "step": 168200 }, { "epoch": 0.16575309537132635, "grad_norm": 2.5648458003997803, "learning_rate": 9.972940082721741e-06, "loss": 3.2073, "step": 168250 }, { "epoch": 0.16580235334914842, "grad_norm": 2.255211353302002, "learning_rate": 9.972924002050497e-06, "loss": 3.2738, "step": 168300 }, { "epoch": 0.16585161132697052, "grad_norm": 2.3374712467193604, "learning_rate": 9.972907916615582e-06, "loss": 3.337, "step": 168350 }, { "epoch": 0.16590086930479261, "grad_norm": 2.404176712036133, "learning_rate": 9.97289182641701e-06, "loss": 3.2574, "step": 168400 }, { "epoch": 0.16595012728261468, "grad_norm": 2.205357789993286, "learning_rate": 9.972875731454797e-06, "loss": 3.3072, "step": 168450 }, { "epoch": 0.16599938526043678, "grad_norm": 2.372446298599243, "learning_rate": 9.97285963172896e-06, "loss": 3.1796, "step": 168500 }, { "epoch": 0.16604864323825888, "grad_norm": 2.388124942779541, "learning_rate": 9.97284352723951e-06, "loss": 3.1837, "step": 168550 }, { "epoch": 0.16609790121608095, "grad_norm": 3.4314990043640137, "learning_rate": 9.972827417986468e-06, "loss": 3.2411, "step": 168600 }, { "epoch": 0.16614715919390305, "grad_norm": 2.3243260383605957, "learning_rate": 9.972811303969846e-06, "loss": 3.2424, "step": 168650 }, { "epoch": 0.16619641717172512, "grad_norm": 2.891624927520752, "learning_rate": 9.97279518518966e-06, "loss": 3.1539, "step": 168700 }, { "epoch": 0.16624567514954722, "grad_norm": 2.4437644481658936, "learning_rate": 9.972779061645926e-06, "loss": 3.245, "step": 168750 }, { "epoch": 0.16629493312736932, "grad_norm": 2.3860108852386475, "learning_rate": 9.972762933338657e-06, "loss": 3.3329, "step": 168800 }, { "epoch": 0.16634419110519139, "grad_norm": 2.1905527114868164, "learning_rate": 9.972746800267873e-06, "loss": 3.2195, "step": 168850 }, { "epoch": 0.16639344908301348, "grad_norm": 2.473388433456421, "learning_rate": 9.972730662433586e-06, "loss": 3.2742, "step": 168900 }, { "epoch": 0.16644270706083558, "grad_norm": 2.5832114219665527, "learning_rate": 9.972714519835812e-06, "loss": 3.2479, "step": 168950 }, { "epoch": 0.16649196503865765, "grad_norm": 2.5272881984710693, "learning_rate": 9.972698372474568e-06, "loss": 3.2061, "step": 169000 }, { "epoch": 0.16654122301647975, "grad_norm": 2.3357207775115967, "learning_rate": 9.972682220349868e-06, "loss": 3.2282, "step": 169050 }, { "epoch": 0.16659048099430185, "grad_norm": 2.3276467323303223, "learning_rate": 9.972666063461727e-06, "loss": 3.231, "step": 169100 }, { "epoch": 0.16663973897212392, "grad_norm": 2.5087954998016357, "learning_rate": 9.972649901810162e-06, "loss": 3.243, "step": 169150 }, { "epoch": 0.16668899694994602, "grad_norm": 2.5327253341674805, "learning_rate": 9.972633735395188e-06, "loss": 3.2477, "step": 169200 }, { "epoch": 0.16673825492776811, "grad_norm": 2.278111219406128, "learning_rate": 9.97261756421682e-06, "loss": 3.2326, "step": 169250 }, { "epoch": 0.16678751290559019, "grad_norm": 2.4280636310577393, "learning_rate": 9.972601388275074e-06, "loss": 3.1736, "step": 169300 }, { "epoch": 0.16683677088341228, "grad_norm": 2.5307745933532715, "learning_rate": 9.972585207569964e-06, "loss": 3.1667, "step": 169350 }, { "epoch": 0.16688602886123435, "grad_norm": 2.3883166313171387, "learning_rate": 9.972569022101509e-06, "loss": 3.262, "step": 169400 }, { "epoch": 0.16693528683905645, "grad_norm": 2.391712188720703, "learning_rate": 9.97255283186972e-06, "loss": 3.2528, "step": 169450 }, { "epoch": 0.16698454481687855, "grad_norm": 2.478403329849243, "learning_rate": 9.972536636874615e-06, "loss": 3.2707, "step": 169500 }, { "epoch": 0.16703380279470062, "grad_norm": 2.325885772705078, "learning_rate": 9.97252043711621e-06, "loss": 3.3226, "step": 169550 }, { "epoch": 0.16708306077252272, "grad_norm": 2.742633819580078, "learning_rate": 9.97250423259452e-06, "loss": 3.248, "step": 169600 }, { "epoch": 0.16713231875034482, "grad_norm": 2.2895331382751465, "learning_rate": 9.97248802330956e-06, "loss": 3.2303, "step": 169650 }, { "epoch": 0.1671815767281669, "grad_norm": 2.2983646392822266, "learning_rate": 9.972471809261346e-06, "loss": 3.2391, "step": 169700 }, { "epoch": 0.16723083470598898, "grad_norm": 2.427401065826416, "learning_rate": 9.972455590449893e-06, "loss": 3.2274, "step": 169750 }, { "epoch": 0.16728009268381108, "grad_norm": 2.306028127670288, "learning_rate": 9.972439366875217e-06, "loss": 3.2631, "step": 169800 }, { "epoch": 0.16732935066163315, "grad_norm": 2.405992031097412, "learning_rate": 9.972423138537335e-06, "loss": 3.2077, "step": 169850 }, { "epoch": 0.16737860863945525, "grad_norm": 2.325310707092285, "learning_rate": 9.97240690543626e-06, "loss": 3.2439, "step": 169900 }, { "epoch": 0.16742786661727732, "grad_norm": 2.519519567489624, "learning_rate": 9.972390667572008e-06, "loss": 3.2298, "step": 169950 }, { "epoch": 0.16747712459509942, "grad_norm": 2.4212136268615723, "learning_rate": 9.972374424944595e-06, "loss": 3.1672, "step": 170000 }, { "epoch": 0.16752638257292152, "grad_norm": 2.4617903232574463, "learning_rate": 9.972358177554036e-06, "loss": 3.2561, "step": 170050 }, { "epoch": 0.1675756405507436, "grad_norm": 2.2961173057556152, "learning_rate": 9.972341925400349e-06, "loss": 3.2511, "step": 170100 }, { "epoch": 0.16762489852856569, "grad_norm": 2.411914825439453, "learning_rate": 9.972325668483546e-06, "loss": 3.1909, "step": 170150 }, { "epoch": 0.16767415650638778, "grad_norm": 2.7543323040008545, "learning_rate": 9.972309406803647e-06, "loss": 3.2086, "step": 170200 }, { "epoch": 0.16772341448420985, "grad_norm": 2.360491991043091, "learning_rate": 9.972293140360663e-06, "loss": 3.205, "step": 170250 }, { "epoch": 0.16777267246203195, "grad_norm": 2.433039665222168, "learning_rate": 9.972276869154612e-06, "loss": 3.2174, "step": 170300 }, { "epoch": 0.16782193043985405, "grad_norm": 2.3675684928894043, "learning_rate": 9.97226059318551e-06, "loss": 3.2572, "step": 170350 }, { "epoch": 0.16787118841767612, "grad_norm": 2.5406646728515625, "learning_rate": 9.972244312453369e-06, "loss": 3.281, "step": 170400 }, { "epoch": 0.16792044639549822, "grad_norm": 2.286390542984009, "learning_rate": 9.97222802695821e-06, "loss": 3.1883, "step": 170450 }, { "epoch": 0.16796970437332032, "grad_norm": 2.4245963096618652, "learning_rate": 9.972211736700046e-06, "loss": 3.2696, "step": 170500 }, { "epoch": 0.1680189623511424, "grad_norm": 2.3237249851226807, "learning_rate": 9.972195441678892e-06, "loss": 3.2409, "step": 170550 }, { "epoch": 0.16806822032896448, "grad_norm": 2.3382763862609863, "learning_rate": 9.972179141894763e-06, "loss": 3.2172, "step": 170600 }, { "epoch": 0.16811747830678656, "grad_norm": 2.44527268409729, "learning_rate": 9.972162837347676e-06, "loss": 3.2409, "step": 170650 }, { "epoch": 0.16816673628460865, "grad_norm": 2.413429021835327, "learning_rate": 9.972146528037647e-06, "loss": 3.1964, "step": 170700 }, { "epoch": 0.16821599426243075, "grad_norm": 2.869380235671997, "learning_rate": 9.972130213964691e-06, "loss": 3.1853, "step": 170750 }, { "epoch": 0.16826525224025282, "grad_norm": 2.30489444732666, "learning_rate": 9.972113895128823e-06, "loss": 3.1096, "step": 170800 }, { "epoch": 0.16831451021807492, "grad_norm": 2.1066465377807617, "learning_rate": 9.97209757153006e-06, "loss": 3.2576, "step": 170850 }, { "epoch": 0.16836376819589702, "grad_norm": 2.743096113204956, "learning_rate": 9.972081243168416e-06, "loss": 3.2896, "step": 170900 }, { "epoch": 0.1684130261737191, "grad_norm": 2.644369602203369, "learning_rate": 9.972064910043908e-06, "loss": 3.2405, "step": 170950 }, { "epoch": 0.1684622841515412, "grad_norm": 2.267193555831909, "learning_rate": 9.972048572156551e-06, "loss": 3.2235, "step": 171000 }, { "epoch": 0.16851154212936328, "grad_norm": 2.5574398040771484, "learning_rate": 9.972032229506362e-06, "loss": 3.2429, "step": 171050 }, { "epoch": 0.16856080010718535, "grad_norm": 2.498792886734009, "learning_rate": 9.972015882093353e-06, "loss": 3.2312, "step": 171100 }, { "epoch": 0.16861005808500745, "grad_norm": 2.3705992698669434, "learning_rate": 9.971999529917545e-06, "loss": 3.2427, "step": 171150 }, { "epoch": 0.16865931606282952, "grad_norm": 2.2677624225616455, "learning_rate": 9.971983172978948e-06, "loss": 3.1684, "step": 171200 }, { "epoch": 0.16870857404065162, "grad_norm": 2.2330195903778076, "learning_rate": 9.971966811277583e-06, "loss": 3.2471, "step": 171250 }, { "epoch": 0.16875783201847372, "grad_norm": 2.7185702323913574, "learning_rate": 9.97195044481346e-06, "loss": 3.2494, "step": 171300 }, { "epoch": 0.1688070899962958, "grad_norm": 2.5487043857574463, "learning_rate": 9.9719340735866e-06, "loss": 3.2224, "step": 171350 }, { "epoch": 0.1688563479741179, "grad_norm": 2.3917417526245117, "learning_rate": 9.971917697597016e-06, "loss": 3.1809, "step": 171400 }, { "epoch": 0.16890560595193999, "grad_norm": 2.423706531524658, "learning_rate": 9.971901316844725e-06, "loss": 3.2835, "step": 171450 }, { "epoch": 0.16895486392976206, "grad_norm": 2.3908448219299316, "learning_rate": 9.97188493132974e-06, "loss": 3.2845, "step": 171500 }, { "epoch": 0.16900412190758415, "grad_norm": 2.406096935272217, "learning_rate": 9.971868541052081e-06, "loss": 3.288, "step": 171550 }, { "epoch": 0.16905337988540625, "grad_norm": 2.4399733543395996, "learning_rate": 9.97185214601176e-06, "loss": 3.1303, "step": 171600 }, { "epoch": 0.16910263786322832, "grad_norm": 2.4967050552368164, "learning_rate": 9.971835746208795e-06, "loss": 3.2488, "step": 171650 }, { "epoch": 0.16915189584105042, "grad_norm": 2.177173137664795, "learning_rate": 9.9718193416432e-06, "loss": 3.2137, "step": 171700 }, { "epoch": 0.16920115381887252, "grad_norm": 2.3297157287597656, "learning_rate": 9.97180293231499e-06, "loss": 3.1859, "step": 171750 }, { "epoch": 0.1692504117966946, "grad_norm": 2.5912609100341797, "learning_rate": 9.971786518224183e-06, "loss": 3.2549, "step": 171800 }, { "epoch": 0.1692996697745167, "grad_norm": 2.3210597038269043, "learning_rate": 9.971770099370795e-06, "loss": 3.2719, "step": 171850 }, { "epoch": 0.16934892775233876, "grad_norm": 2.3501977920532227, "learning_rate": 9.97175367575484e-06, "loss": 3.25, "step": 171900 }, { "epoch": 0.16939818573016086, "grad_norm": 2.4376003742218018, "learning_rate": 9.971737247376332e-06, "loss": 3.2787, "step": 171950 }, { "epoch": 0.16944744370798295, "grad_norm": 2.7095160484313965, "learning_rate": 9.971720814235292e-06, "loss": 3.2282, "step": 172000 }, { "epoch": 0.16949670168580502, "grad_norm": 2.4647998809814453, "learning_rate": 9.971704376331732e-06, "loss": 3.2155, "step": 172050 }, { "epoch": 0.16954595966362712, "grad_norm": 2.3711771965026855, "learning_rate": 9.971687933665669e-06, "loss": 3.2271, "step": 172100 }, { "epoch": 0.16959521764144922, "grad_norm": 2.3051702976226807, "learning_rate": 9.971671486237117e-06, "loss": 3.2281, "step": 172150 }, { "epoch": 0.1696444756192713, "grad_norm": 2.2700905799865723, "learning_rate": 9.971655034046093e-06, "loss": 3.2093, "step": 172200 }, { "epoch": 0.1696937335970934, "grad_norm": 2.195577383041382, "learning_rate": 9.971638577092614e-06, "loss": 3.2203, "step": 172250 }, { "epoch": 0.16974299157491549, "grad_norm": 2.416840076446533, "learning_rate": 9.971622115376694e-06, "loss": 3.2358, "step": 172300 }, { "epoch": 0.16979224955273756, "grad_norm": 2.474241018295288, "learning_rate": 9.97160564889835e-06, "loss": 3.2703, "step": 172350 }, { "epoch": 0.16984150753055965, "grad_norm": 2.5900232791900635, "learning_rate": 9.971589177657596e-06, "loss": 3.2078, "step": 172400 }, { "epoch": 0.16989076550838172, "grad_norm": 2.2069942951202393, "learning_rate": 9.971572701654448e-06, "loss": 3.2314, "step": 172450 }, { "epoch": 0.16994002348620382, "grad_norm": 2.443166732788086, "learning_rate": 9.971556220888924e-06, "loss": 3.2652, "step": 172500 }, { "epoch": 0.16998928146402592, "grad_norm": 2.646296977996826, "learning_rate": 9.97153973536104e-06, "loss": 3.2193, "step": 172550 }, { "epoch": 0.170038539441848, "grad_norm": 2.4543447494506836, "learning_rate": 9.971523245070808e-06, "loss": 3.2515, "step": 172600 }, { "epoch": 0.1700877974196701, "grad_norm": 2.4075093269348145, "learning_rate": 9.971506750018246e-06, "loss": 3.2378, "step": 172650 }, { "epoch": 0.1701370553974922, "grad_norm": 2.411224365234375, "learning_rate": 9.97149025020337e-06, "loss": 3.2964, "step": 172700 }, { "epoch": 0.17018631337531426, "grad_norm": 2.372020721435547, "learning_rate": 9.971473745626197e-06, "loss": 3.2409, "step": 172750 }, { "epoch": 0.17023557135313636, "grad_norm": 2.430980682373047, "learning_rate": 9.97145723628674e-06, "loss": 3.2, "step": 172800 }, { "epoch": 0.17028482933095845, "grad_norm": 2.405514717102051, "learning_rate": 9.971440722185017e-06, "loss": 3.1711, "step": 172850 }, { "epoch": 0.17033408730878052, "grad_norm": 2.162614583969116, "learning_rate": 9.971424203321043e-06, "loss": 3.2052, "step": 172900 }, { "epoch": 0.17038334528660262, "grad_norm": 2.4492428302764893, "learning_rate": 9.971407679694834e-06, "loss": 3.259, "step": 172950 }, { "epoch": 0.1704326032644247, "grad_norm": 2.3386659622192383, "learning_rate": 9.971391151306406e-06, "loss": 3.1712, "step": 173000 }, { "epoch": 0.1704818612422468, "grad_norm": 2.463663101196289, "learning_rate": 9.971374618155773e-06, "loss": 3.2603, "step": 173050 }, { "epoch": 0.1705311192200689, "grad_norm": 2.4989259243011475, "learning_rate": 9.971358080242954e-06, "loss": 3.2787, "step": 173100 }, { "epoch": 0.17058037719789096, "grad_norm": 2.508099317550659, "learning_rate": 9.971341537567962e-06, "loss": 3.2207, "step": 173150 }, { "epoch": 0.17062963517571306, "grad_norm": 2.3569538593292236, "learning_rate": 9.971324990130817e-06, "loss": 3.2078, "step": 173200 }, { "epoch": 0.17067889315353516, "grad_norm": 2.2047271728515625, "learning_rate": 9.971308437931528e-06, "loss": 3.2143, "step": 173250 }, { "epoch": 0.17072815113135723, "grad_norm": 2.886317253112793, "learning_rate": 9.971291880970118e-06, "loss": 3.1974, "step": 173300 }, { "epoch": 0.17077740910917932, "grad_norm": 2.2679877281188965, "learning_rate": 9.971275319246598e-06, "loss": 3.2848, "step": 173350 }, { "epoch": 0.17082666708700142, "grad_norm": 2.631575584411621, "learning_rate": 9.971258752760986e-06, "loss": 3.2026, "step": 173400 }, { "epoch": 0.1708759250648235, "grad_norm": 2.408205986022949, "learning_rate": 9.971242181513296e-06, "loss": 3.1683, "step": 173450 }, { "epoch": 0.1709251830426456, "grad_norm": 2.502215623855591, "learning_rate": 9.971225605503547e-06, "loss": 3.2891, "step": 173500 }, { "epoch": 0.1709744410204677, "grad_norm": 2.265805244445801, "learning_rate": 9.971209024731753e-06, "loss": 3.2223, "step": 173550 }, { "epoch": 0.17102369899828976, "grad_norm": 2.2860562801361084, "learning_rate": 9.97119243919793e-06, "loss": 3.2067, "step": 173600 }, { "epoch": 0.17107295697611186, "grad_norm": 2.3920576572418213, "learning_rate": 9.971175848902093e-06, "loss": 3.2394, "step": 173650 }, { "epoch": 0.17112221495393393, "grad_norm": 2.372739791870117, "learning_rate": 9.97115925384426e-06, "loss": 3.1576, "step": 173700 }, { "epoch": 0.17117147293175602, "grad_norm": 2.5407564640045166, "learning_rate": 9.971142654024445e-06, "loss": 3.2554, "step": 173750 }, { "epoch": 0.17122073090957812, "grad_norm": 2.4803049564361572, "learning_rate": 9.971126049442665e-06, "loss": 3.2712, "step": 173800 }, { "epoch": 0.1712699888874002, "grad_norm": 2.1661553382873535, "learning_rate": 9.971109440098934e-06, "loss": 3.2123, "step": 173850 }, { "epoch": 0.1713192468652223, "grad_norm": 2.424905776977539, "learning_rate": 9.971092825993272e-06, "loss": 3.2324, "step": 173900 }, { "epoch": 0.1713685048430444, "grad_norm": 2.721795082092285, "learning_rate": 9.971076207125691e-06, "loss": 3.1998, "step": 173950 }, { "epoch": 0.17141776282086646, "grad_norm": 2.3044917583465576, "learning_rate": 9.971059583496208e-06, "loss": 3.201, "step": 174000 }, { "epoch": 0.17146702079868856, "grad_norm": 2.494201898574829, "learning_rate": 9.971042955104839e-06, "loss": 3.1993, "step": 174050 }, { "epoch": 0.17151627877651066, "grad_norm": 2.578601598739624, "learning_rate": 9.9710263219516e-06, "loss": 3.2697, "step": 174100 }, { "epoch": 0.17156553675433273, "grad_norm": 2.9186549186706543, "learning_rate": 9.971009684036508e-06, "loss": 3.2119, "step": 174150 }, { "epoch": 0.17161479473215482, "grad_norm": 2.5611016750335693, "learning_rate": 9.970993041359578e-06, "loss": 3.2495, "step": 174200 }, { "epoch": 0.1716640527099769, "grad_norm": 2.428990364074707, "learning_rate": 9.970976393920827e-06, "loss": 3.2782, "step": 174250 }, { "epoch": 0.171713310687799, "grad_norm": 2.3071579933166504, "learning_rate": 9.970959741720268e-06, "loss": 3.2636, "step": 174300 }, { "epoch": 0.1717625686656211, "grad_norm": 2.4201884269714355, "learning_rate": 9.970943084757919e-06, "loss": 3.1933, "step": 174350 }, { "epoch": 0.17181182664344316, "grad_norm": 2.4167661666870117, "learning_rate": 9.970926423033796e-06, "loss": 3.2427, "step": 174400 }, { "epoch": 0.17186108462126526, "grad_norm": 2.4131710529327393, "learning_rate": 9.970909756547913e-06, "loss": 3.1972, "step": 174450 }, { "epoch": 0.17191034259908736, "grad_norm": 2.4808688163757324, "learning_rate": 9.970893085300291e-06, "loss": 3.1644, "step": 174500 }, { "epoch": 0.17195960057690943, "grad_norm": 2.653364896774292, "learning_rate": 9.97087640929094e-06, "loss": 3.1963, "step": 174550 }, { "epoch": 0.17200885855473153, "grad_norm": 2.542815923690796, "learning_rate": 9.970859728519881e-06, "loss": 3.2296, "step": 174600 }, { "epoch": 0.17205811653255362, "grad_norm": 2.347154378890991, "learning_rate": 9.970843042987126e-06, "loss": 3.2584, "step": 174650 }, { "epoch": 0.1721073745103757, "grad_norm": 2.307035446166992, "learning_rate": 9.970826352692694e-06, "loss": 3.19, "step": 174700 }, { "epoch": 0.1721566324881978, "grad_norm": 2.4512135982513428, "learning_rate": 9.970809657636598e-06, "loss": 3.2042, "step": 174750 }, { "epoch": 0.1722058904660199, "grad_norm": 2.3631134033203125, "learning_rate": 9.970792957818856e-06, "loss": 3.2662, "step": 174800 }, { "epoch": 0.17225514844384196, "grad_norm": 2.411156415939331, "learning_rate": 9.970776253239484e-06, "loss": 3.2078, "step": 174850 }, { "epoch": 0.17230440642166406, "grad_norm": 2.5532026290893555, "learning_rate": 9.970759543898496e-06, "loss": 3.2319, "step": 174900 }, { "epoch": 0.17235366439948613, "grad_norm": 2.4078595638275146, "learning_rate": 9.970742829795912e-06, "loss": 3.286, "step": 174950 }, { "epoch": 0.17240292237730823, "grad_norm": 2.187547206878662, "learning_rate": 9.970726110931745e-06, "loss": 3.173, "step": 175000 }, { "epoch": 0.17245218035513032, "grad_norm": 2.5770387649536133, "learning_rate": 9.97070938730601e-06, "loss": 3.2262, "step": 175050 }, { "epoch": 0.1725014383329524, "grad_norm": 2.3505170345306396, "learning_rate": 9.970692658918726e-06, "loss": 3.2261, "step": 175100 }, { "epoch": 0.1725506963107745, "grad_norm": 2.297321081161499, "learning_rate": 9.970675925769909e-06, "loss": 3.1537, "step": 175150 }, { "epoch": 0.1725999542885966, "grad_norm": 2.5150694847106934, "learning_rate": 9.970659187859572e-06, "loss": 3.234, "step": 175200 }, { "epoch": 0.17264921226641866, "grad_norm": 2.377392053604126, "learning_rate": 9.970642445187733e-06, "loss": 3.224, "step": 175250 }, { "epoch": 0.17269847024424076, "grad_norm": 2.3507533073425293, "learning_rate": 9.970625697754408e-06, "loss": 3.2202, "step": 175300 }, { "epoch": 0.17274772822206286, "grad_norm": 2.198974609375, "learning_rate": 9.970608945559612e-06, "loss": 3.2034, "step": 175350 }, { "epoch": 0.17279698619988493, "grad_norm": 2.4746742248535156, "learning_rate": 9.970592188603364e-06, "loss": 3.1561, "step": 175400 }, { "epoch": 0.17284624417770703, "grad_norm": 2.1418864727020264, "learning_rate": 9.970575426885675e-06, "loss": 3.2814, "step": 175450 }, { "epoch": 0.1728955021555291, "grad_norm": 2.5270841121673584, "learning_rate": 9.970558660406565e-06, "loss": 3.1718, "step": 175500 }, { "epoch": 0.1729447601333512, "grad_norm": 2.8102076053619385, "learning_rate": 9.97054188916605e-06, "loss": 3.2014, "step": 175550 }, { "epoch": 0.1729940181111733, "grad_norm": 2.409632682800293, "learning_rate": 9.970525113164144e-06, "loss": 3.2273, "step": 175600 }, { "epoch": 0.17304327608899536, "grad_norm": 2.176866054534912, "learning_rate": 9.970508332400864e-06, "loss": 3.2491, "step": 175650 }, { "epoch": 0.17309253406681746, "grad_norm": 2.5835020542144775, "learning_rate": 9.970491546876227e-06, "loss": 3.2277, "step": 175700 }, { "epoch": 0.17314179204463956, "grad_norm": 2.4079623222351074, "learning_rate": 9.97047475659025e-06, "loss": 3.2639, "step": 175750 }, { "epoch": 0.17319105002246163, "grad_norm": 2.363938808441162, "learning_rate": 9.970457961542945e-06, "loss": 3.1784, "step": 175800 }, { "epoch": 0.17324030800028373, "grad_norm": 2.384348154067993, "learning_rate": 9.97044116173433e-06, "loss": 3.2732, "step": 175850 }, { "epoch": 0.17328956597810583, "grad_norm": 2.3531157970428467, "learning_rate": 9.970424357164423e-06, "loss": 3.2295, "step": 175900 }, { "epoch": 0.1733388239559279, "grad_norm": 2.329005241394043, "learning_rate": 9.970407547833239e-06, "loss": 3.1685, "step": 175950 }, { "epoch": 0.17338808193375, "grad_norm": 2.4844858646392822, "learning_rate": 9.970390733740793e-06, "loss": 3.2319, "step": 176000 }, { "epoch": 0.1734373399115721, "grad_norm": 2.6110851764678955, "learning_rate": 9.970373914887101e-06, "loss": 3.2685, "step": 176050 }, { "epoch": 0.17348659788939416, "grad_norm": 2.810617685317993, "learning_rate": 9.970357091272182e-06, "loss": 3.1943, "step": 176100 }, { "epoch": 0.17353585586721626, "grad_norm": 2.3159124851226807, "learning_rate": 9.970340262896048e-06, "loss": 3.2544, "step": 176150 }, { "epoch": 0.17358511384503833, "grad_norm": 2.3879196643829346, "learning_rate": 9.97032342975872e-06, "loss": 3.2139, "step": 176200 }, { "epoch": 0.17363437182286043, "grad_norm": 2.4451446533203125, "learning_rate": 9.970306591860208e-06, "loss": 3.1877, "step": 176250 }, { "epoch": 0.17368362980068253, "grad_norm": 2.2923426628112793, "learning_rate": 9.970289749200535e-06, "loss": 3.2136, "step": 176300 }, { "epoch": 0.1737328877785046, "grad_norm": 2.2988176345825195, "learning_rate": 9.970272901779712e-06, "loss": 3.1644, "step": 176350 }, { "epoch": 0.1737821457563267, "grad_norm": 2.6148171424865723, "learning_rate": 9.970256049597755e-06, "loss": 3.2128, "step": 176400 }, { "epoch": 0.1738314037341488, "grad_norm": 2.283486843109131, "learning_rate": 9.970239192654684e-06, "loss": 3.245, "step": 176450 }, { "epoch": 0.17388066171197086, "grad_norm": 2.37431001663208, "learning_rate": 9.970222330950513e-06, "loss": 3.1857, "step": 176500 }, { "epoch": 0.17392991968979296, "grad_norm": 2.206726312637329, "learning_rate": 9.970205464485259e-06, "loss": 3.2206, "step": 176550 }, { "epoch": 0.17397917766761506, "grad_norm": 2.302769184112549, "learning_rate": 9.970188593258934e-06, "loss": 3.1743, "step": 176600 }, { "epoch": 0.17402843564543713, "grad_norm": 2.218107223510742, "learning_rate": 9.970171717271561e-06, "loss": 3.1986, "step": 176650 }, { "epoch": 0.17407769362325923, "grad_norm": 2.3868567943573, "learning_rate": 9.970154836523151e-06, "loss": 3.2332, "step": 176700 }, { "epoch": 0.1741269516010813, "grad_norm": 2.3658347129821777, "learning_rate": 9.970137951013723e-06, "loss": 3.2347, "step": 176750 }, { "epoch": 0.1741762095789034, "grad_norm": 2.488234519958496, "learning_rate": 9.970121060743291e-06, "loss": 3.3221, "step": 176800 }, { "epoch": 0.1742254675567255, "grad_norm": 2.5332071781158447, "learning_rate": 9.970104165711871e-06, "loss": 3.1868, "step": 176850 }, { "epoch": 0.17427472553454756, "grad_norm": 2.3807594776153564, "learning_rate": 9.970087265919484e-06, "loss": 3.1987, "step": 176900 }, { "epoch": 0.17432398351236966, "grad_norm": 2.5645101070404053, "learning_rate": 9.97007036136614e-06, "loss": 3.2314, "step": 176950 }, { "epoch": 0.17437324149019176, "grad_norm": 2.656818389892578, "learning_rate": 9.970053452051858e-06, "loss": 3.2458, "step": 177000 }, { "epoch": 0.17442249946801383, "grad_norm": 2.353442907333374, "learning_rate": 9.970036537976654e-06, "loss": 3.2499, "step": 177050 }, { "epoch": 0.17447175744583593, "grad_norm": 2.27417254447937, "learning_rate": 9.970019619140545e-06, "loss": 3.2293, "step": 177100 }, { "epoch": 0.17452101542365803, "grad_norm": 2.4451217651367188, "learning_rate": 9.970002695543546e-06, "loss": 3.2549, "step": 177150 }, { "epoch": 0.1745702734014801, "grad_norm": 2.4593799114227295, "learning_rate": 9.969985767185672e-06, "loss": 3.2287, "step": 177200 }, { "epoch": 0.1746195313793022, "grad_norm": 2.2430336475372314, "learning_rate": 9.969968834066943e-06, "loss": 3.2072, "step": 177250 }, { "epoch": 0.1746687893571243, "grad_norm": 2.453826665878296, "learning_rate": 9.969951896187372e-06, "loss": 3.2609, "step": 177300 }, { "epoch": 0.17471804733494636, "grad_norm": 2.6128177642822266, "learning_rate": 9.969934953546977e-06, "loss": 3.3027, "step": 177350 }, { "epoch": 0.17476730531276846, "grad_norm": 2.5761077404022217, "learning_rate": 9.969918006145773e-06, "loss": 3.1796, "step": 177400 }, { "epoch": 0.17481656329059053, "grad_norm": 2.2695202827453613, "learning_rate": 9.969901053983777e-06, "loss": 3.3303, "step": 177450 }, { "epoch": 0.17486582126841263, "grad_norm": 2.2156848907470703, "learning_rate": 9.969884097061004e-06, "loss": 3.1921, "step": 177500 }, { "epoch": 0.17491507924623473, "grad_norm": 2.415022134780884, "learning_rate": 9.969867135377473e-06, "loss": 3.2027, "step": 177550 }, { "epoch": 0.1749643372240568, "grad_norm": 2.372105121612549, "learning_rate": 9.969850168933197e-06, "loss": 3.2477, "step": 177600 }, { "epoch": 0.1750135952018789, "grad_norm": 2.373159646987915, "learning_rate": 9.969833197728194e-06, "loss": 3.3252, "step": 177650 }, { "epoch": 0.175062853179701, "grad_norm": 2.375117778778076, "learning_rate": 9.969816221762481e-06, "loss": 3.1769, "step": 177700 }, { "epoch": 0.17511211115752306, "grad_norm": 2.3999218940734863, "learning_rate": 9.96979924103607e-06, "loss": 3.239, "step": 177750 }, { "epoch": 0.17516136913534516, "grad_norm": 2.683366060256958, "learning_rate": 9.969782255548984e-06, "loss": 3.2696, "step": 177800 }, { "epoch": 0.17521062711316726, "grad_norm": 2.558255195617676, "learning_rate": 9.969765265301234e-06, "loss": 3.2513, "step": 177850 }, { "epoch": 0.17525988509098933, "grad_norm": 2.459028720855713, "learning_rate": 9.96974827029284e-06, "loss": 3.212, "step": 177900 }, { "epoch": 0.17530914306881143, "grad_norm": 2.330308675765991, "learning_rate": 9.969731270523814e-06, "loss": 3.1923, "step": 177950 }, { "epoch": 0.1753584010466335, "grad_norm": 2.7446229457855225, "learning_rate": 9.969714265994175e-06, "loss": 3.2111, "step": 178000 }, { "epoch": 0.1754076590244556, "grad_norm": 2.47710919380188, "learning_rate": 9.96969725670394e-06, "loss": 3.1435, "step": 178050 }, { "epoch": 0.1754569170022777, "grad_norm": 2.5674662590026855, "learning_rate": 9.969680242653124e-06, "loss": 3.1494, "step": 178100 }, { "epoch": 0.17550617498009977, "grad_norm": 2.2957444190979004, "learning_rate": 9.969663223841744e-06, "loss": 3.2169, "step": 178150 }, { "epoch": 0.17555543295792186, "grad_norm": 2.4439327716827393, "learning_rate": 9.969646200269813e-06, "loss": 3.221, "step": 178200 }, { "epoch": 0.17560469093574396, "grad_norm": 2.365339994430542, "learning_rate": 9.969629171937354e-06, "loss": 3.2101, "step": 178250 }, { "epoch": 0.17565394891356603, "grad_norm": 2.4139089584350586, "learning_rate": 9.969612138844377e-06, "loss": 3.1817, "step": 178300 }, { "epoch": 0.17570320689138813, "grad_norm": 2.3424696922302246, "learning_rate": 9.969595100990901e-06, "loss": 3.167, "step": 178350 }, { "epoch": 0.17575246486921023, "grad_norm": 2.3044238090515137, "learning_rate": 9.969578058376942e-06, "loss": 3.1844, "step": 178400 }, { "epoch": 0.1758017228470323, "grad_norm": 2.4840033054351807, "learning_rate": 9.969561011002516e-06, "loss": 3.1937, "step": 178450 }, { "epoch": 0.1758509808248544, "grad_norm": 2.502654552459717, "learning_rate": 9.96954395886764e-06, "loss": 3.2016, "step": 178500 }, { "epoch": 0.1759002388026765, "grad_norm": 2.57059907913208, "learning_rate": 9.96952690197233e-06, "loss": 3.0883, "step": 178550 }, { "epoch": 0.17594949678049857, "grad_norm": 2.252640724182129, "learning_rate": 9.969509840316604e-06, "loss": 3.1544, "step": 178600 }, { "epoch": 0.17599875475832066, "grad_norm": 2.509791612625122, "learning_rate": 9.969492773900475e-06, "loss": 3.2039, "step": 178650 }, { "epoch": 0.17604801273614273, "grad_norm": 2.2449185848236084, "learning_rate": 9.969475702723961e-06, "loss": 3.2125, "step": 178700 }, { "epoch": 0.17609727071396483, "grad_norm": 2.456995964050293, "learning_rate": 9.96945862678708e-06, "loss": 3.2734, "step": 178750 }, { "epoch": 0.17614652869178693, "grad_norm": 2.3344812393188477, "learning_rate": 9.969441546089846e-06, "loss": 3.2524, "step": 178800 }, { "epoch": 0.176195786669609, "grad_norm": 2.600053548812866, "learning_rate": 9.969424460632275e-06, "loss": 3.2894, "step": 178850 }, { "epoch": 0.1762450446474311, "grad_norm": 2.3503122329711914, "learning_rate": 9.969407370414386e-06, "loss": 3.1809, "step": 178900 }, { "epoch": 0.1762943026252532, "grad_norm": 2.4465696811676025, "learning_rate": 9.969390275436193e-06, "loss": 3.2239, "step": 178950 }, { "epoch": 0.17634356060307527, "grad_norm": 2.2553763389587402, "learning_rate": 9.969373175697712e-06, "loss": 3.1909, "step": 179000 }, { "epoch": 0.17639281858089736, "grad_norm": 2.6784815788269043, "learning_rate": 9.969356071198963e-06, "loss": 3.215, "step": 179050 }, { "epoch": 0.17644207655871946, "grad_norm": 2.4496383666992188, "learning_rate": 9.969338961939958e-06, "loss": 3.1194, "step": 179100 }, { "epoch": 0.17649133453654153, "grad_norm": 2.2548818588256836, "learning_rate": 9.969321847920718e-06, "loss": 3.2143, "step": 179150 }, { "epoch": 0.17654059251436363, "grad_norm": 2.3630807399749756, "learning_rate": 9.969304729141256e-06, "loss": 3.2393, "step": 179200 }, { "epoch": 0.1765898504921857, "grad_norm": 2.304321765899658, "learning_rate": 9.969287605601587e-06, "loss": 3.2695, "step": 179250 }, { "epoch": 0.1766391084700078, "grad_norm": 2.2405922412872314, "learning_rate": 9.969270477301732e-06, "loss": 3.1952, "step": 179300 }, { "epoch": 0.1766883664478299, "grad_norm": 2.3130648136138916, "learning_rate": 9.969253344241702e-06, "loss": 3.2475, "step": 179350 }, { "epoch": 0.17673762442565197, "grad_norm": 2.4354708194732666, "learning_rate": 9.96923620642152e-06, "loss": 3.1491, "step": 179400 }, { "epoch": 0.17678688240347407, "grad_norm": 2.2851667404174805, "learning_rate": 9.969219063841198e-06, "loss": 3.2361, "step": 179450 }, { "epoch": 0.17683614038129616, "grad_norm": 2.1669671535491943, "learning_rate": 9.969201916500752e-06, "loss": 3.2251, "step": 179500 }, { "epoch": 0.17688539835911823, "grad_norm": 2.455143690109253, "learning_rate": 9.9691847644002e-06, "loss": 3.2062, "step": 179550 }, { "epoch": 0.17693465633694033, "grad_norm": 2.3827309608459473, "learning_rate": 9.969167607539558e-06, "loss": 3.2083, "step": 179600 }, { "epoch": 0.17698391431476243, "grad_norm": 2.3525800704956055, "learning_rate": 9.969150445918843e-06, "loss": 3.2226, "step": 179650 }, { "epoch": 0.1770331722925845, "grad_norm": 2.357990026473999, "learning_rate": 9.969133279538072e-06, "loss": 3.1874, "step": 179700 }, { "epoch": 0.1770824302704066, "grad_norm": 2.433995008468628, "learning_rate": 9.969116108397259e-06, "loss": 3.2163, "step": 179750 }, { "epoch": 0.1771316882482287, "grad_norm": 2.3474621772766113, "learning_rate": 9.969098932496422e-06, "loss": 3.166, "step": 179800 }, { "epoch": 0.17718094622605077, "grad_norm": 2.3695600032806396, "learning_rate": 9.969081751835577e-06, "loss": 3.1432, "step": 179850 }, { "epoch": 0.17723020420387287, "grad_norm": 2.4250152111053467, "learning_rate": 9.969064566414742e-06, "loss": 3.2001, "step": 179900 }, { "epoch": 0.17727946218169494, "grad_norm": 2.7788491249084473, "learning_rate": 9.969047376233932e-06, "loss": 3.1954, "step": 179950 }, { "epoch": 0.17732872015951703, "grad_norm": 2.843271493911743, "learning_rate": 9.969030181293163e-06, "loss": 3.2637, "step": 180000 }, { "epoch": 0.17737797813733913, "grad_norm": 2.479048728942871, "learning_rate": 9.969012981592452e-06, "loss": 3.1628, "step": 180050 }, { "epoch": 0.1774272361151612, "grad_norm": 2.4869651794433594, "learning_rate": 9.968995777131816e-06, "loss": 3.2622, "step": 180100 }, { "epoch": 0.1774764940929833, "grad_norm": 2.5936601161956787, "learning_rate": 9.968978567911272e-06, "loss": 3.289, "step": 180150 }, { "epoch": 0.1775257520708054, "grad_norm": 2.46224045753479, "learning_rate": 9.968961353930835e-06, "loss": 3.1471, "step": 180200 }, { "epoch": 0.17757501004862747, "grad_norm": 2.42905330657959, "learning_rate": 9.968944135190523e-06, "loss": 3.2288, "step": 180250 }, { "epoch": 0.17762426802644957, "grad_norm": 2.3414146900177, "learning_rate": 9.968926911690352e-06, "loss": 3.2019, "step": 180300 }, { "epoch": 0.17767352600427166, "grad_norm": 2.439530611038208, "learning_rate": 9.968909683430334e-06, "loss": 3.136, "step": 180350 }, { "epoch": 0.17772278398209373, "grad_norm": 2.425395965576172, "learning_rate": 9.968892450410493e-06, "loss": 3.1511, "step": 180400 }, { "epoch": 0.17777204195991583, "grad_norm": 2.3070318698883057, "learning_rate": 9.968875212630842e-06, "loss": 3.172, "step": 180450 }, { "epoch": 0.1778212999377379, "grad_norm": 2.6527514457702637, "learning_rate": 9.968857970091398e-06, "loss": 3.193, "step": 180500 }, { "epoch": 0.17787055791556, "grad_norm": 2.4171979427337646, "learning_rate": 9.968840722792175e-06, "loss": 3.187, "step": 180550 }, { "epoch": 0.1779198158933821, "grad_norm": 2.6169016361236572, "learning_rate": 9.968823470733193e-06, "loss": 3.2338, "step": 180600 }, { "epoch": 0.17796907387120417, "grad_norm": 2.244690418243408, "learning_rate": 9.968806213914468e-06, "loss": 3.2431, "step": 180650 }, { "epoch": 0.17801833184902627, "grad_norm": 2.3776443004608154, "learning_rate": 9.968788952336014e-06, "loss": 3.2088, "step": 180700 }, { "epoch": 0.17806758982684837, "grad_norm": 2.2827038764953613, "learning_rate": 9.968771685997851e-06, "loss": 3.2046, "step": 180750 }, { "epoch": 0.17811684780467044, "grad_norm": 3.0177316665649414, "learning_rate": 9.968754414899993e-06, "loss": 3.2659, "step": 180800 }, { "epoch": 0.17816610578249253, "grad_norm": 2.5790157318115234, "learning_rate": 9.968737139042458e-06, "loss": 3.2466, "step": 180850 }, { "epoch": 0.17821536376031463, "grad_norm": 2.415644884109497, "learning_rate": 9.968719858425263e-06, "loss": 3.228, "step": 180900 }, { "epoch": 0.1782646217381367, "grad_norm": 2.376086473464966, "learning_rate": 9.968702573048421e-06, "loss": 3.1735, "step": 180950 }, { "epoch": 0.1783138797159588, "grad_norm": 2.4414310455322266, "learning_rate": 9.968685282911953e-06, "loss": 3.1912, "step": 181000 }, { "epoch": 0.17836313769378087, "grad_norm": 2.4331605434417725, "learning_rate": 9.96866798801587e-06, "loss": 3.2045, "step": 181050 }, { "epoch": 0.17841239567160297, "grad_norm": 2.17183780670166, "learning_rate": 9.968650688360195e-06, "loss": 3.2325, "step": 181100 }, { "epoch": 0.17846165364942507, "grad_norm": 2.289301872253418, "learning_rate": 9.968633383944941e-06, "loss": 3.2067, "step": 181150 }, { "epoch": 0.17851091162724714, "grad_norm": 2.3786370754241943, "learning_rate": 9.968616074770126e-06, "loss": 3.2623, "step": 181200 }, { "epoch": 0.17856016960506924, "grad_norm": 2.4174704551696777, "learning_rate": 9.968598760835765e-06, "loss": 3.1445, "step": 181250 }, { "epoch": 0.17860942758289133, "grad_norm": 2.3299131393432617, "learning_rate": 9.968581442141877e-06, "loss": 3.2231, "step": 181300 }, { "epoch": 0.1786586855607134, "grad_norm": 2.3166391849517822, "learning_rate": 9.968564118688477e-06, "loss": 3.1527, "step": 181350 }, { "epoch": 0.1787079435385355, "grad_norm": 2.3757858276367188, "learning_rate": 9.96854679047558e-06, "loss": 3.2625, "step": 181400 }, { "epoch": 0.1787572015163576, "grad_norm": 2.4631245136260986, "learning_rate": 9.968529457503204e-06, "loss": 3.2558, "step": 181450 }, { "epoch": 0.17880645949417967, "grad_norm": 2.584956645965576, "learning_rate": 9.968512119771367e-06, "loss": 3.2225, "step": 181500 }, { "epoch": 0.17885571747200177, "grad_norm": 2.3392624855041504, "learning_rate": 9.968494777280084e-06, "loss": 3.2828, "step": 181550 }, { "epoch": 0.17890497544982387, "grad_norm": 2.4581894874572754, "learning_rate": 9.968477430029373e-06, "loss": 3.2916, "step": 181600 }, { "epoch": 0.17895423342764594, "grad_norm": 2.4843897819519043, "learning_rate": 9.96846007801925e-06, "loss": 3.1885, "step": 181650 }, { "epoch": 0.17900349140546803, "grad_norm": 2.4048242568969727, "learning_rate": 9.96844272124973e-06, "loss": 3.217, "step": 181700 }, { "epoch": 0.1790527493832901, "grad_norm": 2.4753775596618652, "learning_rate": 9.96842535972083e-06, "loss": 3.1802, "step": 181750 }, { "epoch": 0.1791020073611122, "grad_norm": 2.256366729736328, "learning_rate": 9.96840799343257e-06, "loss": 3.2358, "step": 181800 }, { "epoch": 0.1791512653389343, "grad_norm": 2.349601984024048, "learning_rate": 9.968390622384963e-06, "loss": 3.2442, "step": 181850 }, { "epoch": 0.17920052331675637, "grad_norm": 2.2636196613311768, "learning_rate": 9.968373246578025e-06, "loss": 3.1425, "step": 181900 }, { "epoch": 0.17924978129457847, "grad_norm": 2.643313407897949, "learning_rate": 9.968355866011777e-06, "loss": 3.146, "step": 181950 }, { "epoch": 0.17929903927240057, "grad_norm": 2.3444671630859375, "learning_rate": 9.968338480686232e-06, "loss": 3.1962, "step": 182000 }, { "epoch": 0.17934829725022264, "grad_norm": 2.5086519718170166, "learning_rate": 9.968321090601409e-06, "loss": 3.1536, "step": 182050 }, { "epoch": 0.17939755522804474, "grad_norm": 2.286423444747925, "learning_rate": 9.968303695757322e-06, "loss": 3.2251, "step": 182100 }, { "epoch": 0.17944681320586683, "grad_norm": 2.624016284942627, "learning_rate": 9.96828629615399e-06, "loss": 3.2075, "step": 182150 }, { "epoch": 0.1794960711836889, "grad_norm": 3.246455192565918, "learning_rate": 9.968268891791428e-06, "loss": 3.2645, "step": 182200 }, { "epoch": 0.179545329161511, "grad_norm": 2.625476598739624, "learning_rate": 9.968251482669655e-06, "loss": 3.1777, "step": 182250 }, { "epoch": 0.17959458713933307, "grad_norm": 2.3698625564575195, "learning_rate": 9.968234068788684e-06, "loss": 3.1453, "step": 182300 }, { "epoch": 0.17964384511715517, "grad_norm": 2.3047168254852295, "learning_rate": 9.968216650148537e-06, "loss": 3.2371, "step": 182350 }, { "epoch": 0.17969310309497727, "grad_norm": 2.2632524967193604, "learning_rate": 9.968199226749225e-06, "loss": 3.2292, "step": 182400 }, { "epoch": 0.17974236107279934, "grad_norm": 2.403829574584961, "learning_rate": 9.968181798590767e-06, "loss": 3.2815, "step": 182450 }, { "epoch": 0.17979161905062144, "grad_norm": 2.308981418609619, "learning_rate": 9.96816436567318e-06, "loss": 3.2758, "step": 182500 }, { "epoch": 0.17984087702844354, "grad_norm": 2.242492198944092, "learning_rate": 9.968146927996481e-06, "loss": 3.2051, "step": 182550 }, { "epoch": 0.1798901350062656, "grad_norm": 2.318098783493042, "learning_rate": 9.968129485560686e-06, "loss": 3.221, "step": 182600 }, { "epoch": 0.1799393929840877, "grad_norm": 2.26322078704834, "learning_rate": 9.968112038365813e-06, "loss": 3.1868, "step": 182650 }, { "epoch": 0.1799886509619098, "grad_norm": 2.604257822036743, "learning_rate": 9.968094586411877e-06, "loss": 3.2324, "step": 182700 }, { "epoch": 0.18003790893973187, "grad_norm": 2.2347121238708496, "learning_rate": 9.968077129698897e-06, "loss": 3.234, "step": 182750 }, { "epoch": 0.18008716691755397, "grad_norm": 2.424950122833252, "learning_rate": 9.968059668226886e-06, "loss": 3.3001, "step": 182800 }, { "epoch": 0.18013642489537607, "grad_norm": 2.4661848545074463, "learning_rate": 9.968042201995862e-06, "loss": 3.2215, "step": 182850 }, { "epoch": 0.18018568287319814, "grad_norm": 3.518190622329712, "learning_rate": 9.968024731005845e-06, "loss": 3.1993, "step": 182900 }, { "epoch": 0.18023494085102024, "grad_norm": 2.3831968307495117, "learning_rate": 9.968007255256849e-06, "loss": 3.1478, "step": 182950 }, { "epoch": 0.1802841988288423, "grad_norm": 2.472097873687744, "learning_rate": 9.96798977474889e-06, "loss": 3.2736, "step": 183000 }, { "epoch": 0.1803334568066644, "grad_norm": 2.2536277770996094, "learning_rate": 9.967972289481987e-06, "loss": 3.1764, "step": 183050 }, { "epoch": 0.1803827147844865, "grad_norm": 2.588365077972412, "learning_rate": 9.967954799456154e-06, "loss": 3.211, "step": 183100 }, { "epoch": 0.18043197276230857, "grad_norm": 2.477708578109741, "learning_rate": 9.96793730467141e-06, "loss": 3.1815, "step": 183150 }, { "epoch": 0.18048123074013067, "grad_norm": 2.4470369815826416, "learning_rate": 9.967919805127771e-06, "loss": 3.2072, "step": 183200 }, { "epoch": 0.18053048871795277, "grad_norm": 2.3543145656585693, "learning_rate": 9.967902300825254e-06, "loss": 3.2764, "step": 183250 }, { "epoch": 0.18057974669577484, "grad_norm": 2.187976837158203, "learning_rate": 9.967884791763876e-06, "loss": 3.2268, "step": 183300 }, { "epoch": 0.18062900467359694, "grad_norm": 2.2675974369049072, "learning_rate": 9.967867277943653e-06, "loss": 3.1894, "step": 183350 }, { "epoch": 0.18067826265141904, "grad_norm": 2.4269113540649414, "learning_rate": 9.967849759364602e-06, "loss": 3.1766, "step": 183400 }, { "epoch": 0.1807275206292411, "grad_norm": 2.1761116981506348, "learning_rate": 9.967832236026741e-06, "loss": 3.2333, "step": 183450 }, { "epoch": 0.1807767786070632, "grad_norm": 2.30753755569458, "learning_rate": 9.967814707930086e-06, "loss": 3.2127, "step": 183500 }, { "epoch": 0.18082603658488527, "grad_norm": 2.636326789855957, "learning_rate": 9.96779717507465e-06, "loss": 3.2021, "step": 183550 }, { "epoch": 0.18087529456270737, "grad_norm": 2.421142339706421, "learning_rate": 9.967779637460458e-06, "loss": 3.2146, "step": 183600 }, { "epoch": 0.18092455254052947, "grad_norm": 2.531524658203125, "learning_rate": 9.96776209508752e-06, "loss": 3.212, "step": 183650 }, { "epoch": 0.18097381051835154, "grad_norm": 2.468977689743042, "learning_rate": 9.967744547955854e-06, "loss": 3.2537, "step": 183700 }, { "epoch": 0.18102306849617364, "grad_norm": 2.3957114219665527, "learning_rate": 9.96772699606548e-06, "loss": 3.197, "step": 183750 }, { "epoch": 0.18107232647399574, "grad_norm": 2.337158203125, "learning_rate": 9.967709439416411e-06, "loss": 3.1421, "step": 183800 }, { "epoch": 0.1811215844518178, "grad_norm": 4.121609687805176, "learning_rate": 9.967691878008666e-06, "loss": 3.2154, "step": 183850 }, { "epoch": 0.1811708424296399, "grad_norm": 2.4127163887023926, "learning_rate": 9.967674311842259e-06, "loss": 3.202, "step": 183900 }, { "epoch": 0.181220100407462, "grad_norm": 2.3678977489471436, "learning_rate": 9.967656740917211e-06, "loss": 3.2303, "step": 183950 }, { "epoch": 0.18126935838528407, "grad_norm": 2.325740337371826, "learning_rate": 9.967639165233535e-06, "loss": 3.2165, "step": 184000 }, { "epoch": 0.18131861636310617, "grad_norm": 2.4106318950653076, "learning_rate": 9.967621584791252e-06, "loss": 3.116, "step": 184050 }, { "epoch": 0.18136787434092827, "grad_norm": 2.3828835487365723, "learning_rate": 9.967603999590377e-06, "loss": 3.2087, "step": 184100 }, { "epoch": 0.18141713231875034, "grad_norm": 2.370549201965332, "learning_rate": 9.967586409630925e-06, "loss": 3.2244, "step": 184150 }, { "epoch": 0.18146639029657244, "grad_norm": 2.5792245864868164, "learning_rate": 9.967568814912914e-06, "loss": 3.1708, "step": 184200 }, { "epoch": 0.1815156482743945, "grad_norm": 2.433742046356201, "learning_rate": 9.967551215436362e-06, "loss": 3.2196, "step": 184250 }, { "epoch": 0.1815649062522166, "grad_norm": 2.4566173553466797, "learning_rate": 9.967533611201283e-06, "loss": 3.2788, "step": 184300 }, { "epoch": 0.1816141642300387, "grad_norm": 2.915496826171875, "learning_rate": 9.967516002207698e-06, "loss": 3.2244, "step": 184350 }, { "epoch": 0.18166342220786078, "grad_norm": 2.6744544506073, "learning_rate": 9.96749838845562e-06, "loss": 3.2732, "step": 184400 }, { "epoch": 0.18171268018568287, "grad_norm": 2.5147130489349365, "learning_rate": 9.967480769945068e-06, "loss": 3.2233, "step": 184450 }, { "epoch": 0.18176193816350497, "grad_norm": 2.3343405723571777, "learning_rate": 9.96746314667606e-06, "loss": 3.2019, "step": 184500 }, { "epoch": 0.18181119614132704, "grad_norm": 2.734468460083008, "learning_rate": 9.96744551864861e-06, "loss": 3.2, "step": 184550 }, { "epoch": 0.18186045411914914, "grad_norm": 2.3590869903564453, "learning_rate": 9.967427885862736e-06, "loss": 3.2323, "step": 184600 }, { "epoch": 0.18190971209697124, "grad_norm": 2.3478872776031494, "learning_rate": 9.967410248318453e-06, "loss": 3.2005, "step": 184650 }, { "epoch": 0.1819589700747933, "grad_norm": 2.3960773944854736, "learning_rate": 9.967392606015784e-06, "loss": 3.1868, "step": 184700 }, { "epoch": 0.1820082280526154, "grad_norm": 2.218552827835083, "learning_rate": 9.96737495895474e-06, "loss": 3.2673, "step": 184750 }, { "epoch": 0.18205748603043748, "grad_norm": 2.4703712463378906, "learning_rate": 9.96735730713534e-06, "loss": 3.1731, "step": 184800 }, { "epoch": 0.18210674400825957, "grad_norm": 2.463500499725342, "learning_rate": 9.9673396505576e-06, "loss": 3.2466, "step": 184850 }, { "epoch": 0.18215600198608167, "grad_norm": 2.333935499191284, "learning_rate": 9.967321989221538e-06, "loss": 3.2262, "step": 184900 }, { "epoch": 0.18220525996390374, "grad_norm": 2.3653900623321533, "learning_rate": 9.967304323127172e-06, "loss": 3.247, "step": 184950 }, { "epoch": 0.18225451794172584, "grad_norm": 2.3154373168945312, "learning_rate": 9.967286652274515e-06, "loss": 3.1941, "step": 185000 }, { "epoch": 0.18230377591954794, "grad_norm": 2.414435863494873, "learning_rate": 9.967268976663587e-06, "loss": 3.1564, "step": 185050 }, { "epoch": 0.18235303389737, "grad_norm": 2.322814702987671, "learning_rate": 9.967251296294406e-06, "loss": 3.2223, "step": 185100 }, { "epoch": 0.1824022918751921, "grad_norm": 2.5436880588531494, "learning_rate": 9.967233611166986e-06, "loss": 3.1892, "step": 185150 }, { "epoch": 0.1824515498530142, "grad_norm": 2.2896182537078857, "learning_rate": 9.967215921281344e-06, "loss": 3.136, "step": 185200 }, { "epoch": 0.18250080783083628, "grad_norm": 2.4659423828125, "learning_rate": 9.9671982266375e-06, "loss": 3.2351, "step": 185250 }, { "epoch": 0.18255006580865837, "grad_norm": 2.7327942848205566, "learning_rate": 9.967180527235467e-06, "loss": 3.1901, "step": 185300 }, { "epoch": 0.18259932378648047, "grad_norm": 2.3278703689575195, "learning_rate": 9.967162823075265e-06, "loss": 3.2248, "step": 185350 }, { "epoch": 0.18264858176430254, "grad_norm": 2.4119722843170166, "learning_rate": 9.96714511415691e-06, "loss": 3.2229, "step": 185400 }, { "epoch": 0.18269783974212464, "grad_norm": 2.525313138961792, "learning_rate": 9.96712740048042e-06, "loss": 3.2026, "step": 185450 }, { "epoch": 0.1827470977199467, "grad_norm": 2.8143012523651123, "learning_rate": 9.967109682045811e-06, "loss": 3.2227, "step": 185500 }, { "epoch": 0.1827963556977688, "grad_norm": 2.178868055343628, "learning_rate": 9.967091958853099e-06, "loss": 3.2126, "step": 185550 }, { "epoch": 0.1828456136755909, "grad_norm": 2.2754158973693848, "learning_rate": 9.967074230902302e-06, "loss": 3.1576, "step": 185600 }, { "epoch": 0.18289487165341298, "grad_norm": 2.245694875717163, "learning_rate": 9.967056498193437e-06, "loss": 3.2209, "step": 185650 }, { "epoch": 0.18294412963123508, "grad_norm": 2.593104839324951, "learning_rate": 9.96703876072652e-06, "loss": 3.157, "step": 185700 }, { "epoch": 0.18299338760905717, "grad_norm": 2.3891561031341553, "learning_rate": 9.96702101850157e-06, "loss": 3.2507, "step": 185750 }, { "epoch": 0.18304264558687924, "grad_norm": 2.28312087059021, "learning_rate": 9.967003271518601e-06, "loss": 3.2289, "step": 185800 }, { "epoch": 0.18309190356470134, "grad_norm": 2.6639416217803955, "learning_rate": 9.966985519777633e-06, "loss": 3.2002, "step": 185850 }, { "epoch": 0.18314116154252344, "grad_norm": 2.5683982372283936, "learning_rate": 9.966967763278683e-06, "loss": 3.1509, "step": 185900 }, { "epoch": 0.1831904195203455, "grad_norm": 2.3612489700317383, "learning_rate": 9.966950002021766e-06, "loss": 3.1809, "step": 185950 }, { "epoch": 0.1832396774981676, "grad_norm": 2.1621787548065186, "learning_rate": 9.966932236006898e-06, "loss": 3.2045, "step": 186000 }, { "epoch": 0.18328893547598968, "grad_norm": 2.1562697887420654, "learning_rate": 9.9669144652341e-06, "loss": 3.1925, "step": 186050 }, { "epoch": 0.18333819345381178, "grad_norm": 2.486769676208496, "learning_rate": 9.966896689703386e-06, "loss": 3.178, "step": 186100 }, { "epoch": 0.18338745143163387, "grad_norm": 2.630861282348633, "learning_rate": 9.966878909414775e-06, "loss": 3.1672, "step": 186150 }, { "epoch": 0.18343670940945594, "grad_norm": 2.4195058345794678, "learning_rate": 9.966861124368282e-06, "loss": 3.1956, "step": 186200 }, { "epoch": 0.18348596738727804, "grad_norm": 2.488677740097046, "learning_rate": 9.966843334563923e-06, "loss": 3.2247, "step": 186250 }, { "epoch": 0.18353522536510014, "grad_norm": 2.3713600635528564, "learning_rate": 9.96682554000172e-06, "loss": 3.1821, "step": 186300 }, { "epoch": 0.1835844833429222, "grad_norm": 2.3867037296295166, "learning_rate": 9.966807740681686e-06, "loss": 3.1999, "step": 186350 }, { "epoch": 0.1836337413207443, "grad_norm": 2.70941162109375, "learning_rate": 9.96678993660384e-06, "loss": 3.2407, "step": 186400 }, { "epoch": 0.1836829992985664, "grad_norm": 2.4856951236724854, "learning_rate": 9.966772127768196e-06, "loss": 3.1999, "step": 186450 }, { "epoch": 0.18373225727638848, "grad_norm": 3.1221067905426025, "learning_rate": 9.966754314174776e-06, "loss": 3.1413, "step": 186500 }, { "epoch": 0.18378151525421058, "grad_norm": 2.358966827392578, "learning_rate": 9.966736495823593e-06, "loss": 3.2191, "step": 186550 }, { "epoch": 0.18383077323203267, "grad_norm": 2.2385661602020264, "learning_rate": 9.966718672714664e-06, "loss": 3.1604, "step": 186600 }, { "epoch": 0.18388003120985474, "grad_norm": 2.799252510070801, "learning_rate": 9.96670084484801e-06, "loss": 3.2351, "step": 186650 }, { "epoch": 0.18392928918767684, "grad_norm": 2.5816032886505127, "learning_rate": 9.966683012223643e-06, "loss": 3.1631, "step": 186700 }, { "epoch": 0.1839785471654989, "grad_norm": 2.298856735229492, "learning_rate": 9.966665174841585e-06, "loss": 3.2295, "step": 186750 }, { "epoch": 0.184027805143321, "grad_norm": 2.563558578491211, "learning_rate": 9.966647332701849e-06, "loss": 3.2209, "step": 186800 }, { "epoch": 0.1840770631211431, "grad_norm": 2.4518649578094482, "learning_rate": 9.966629485804454e-06, "loss": 3.2239, "step": 186850 }, { "epoch": 0.18412632109896518, "grad_norm": 2.378243923187256, "learning_rate": 9.966611634149418e-06, "loss": 3.2289, "step": 186900 }, { "epoch": 0.18417557907678728, "grad_norm": 2.168172836303711, "learning_rate": 9.966593777736755e-06, "loss": 3.2019, "step": 186950 }, { "epoch": 0.18422483705460937, "grad_norm": 2.4742865562438965, "learning_rate": 9.966575916566485e-06, "loss": 3.146, "step": 187000 }, { "epoch": 0.18427409503243145, "grad_norm": 2.487452268600464, "learning_rate": 9.966558050638627e-06, "loss": 3.1706, "step": 187050 }, { "epoch": 0.18432335301025354, "grad_norm": 2.2697088718414307, "learning_rate": 9.966540179953191e-06, "loss": 3.1917, "step": 187100 }, { "epoch": 0.18437261098807564, "grad_norm": 2.2541401386260986, "learning_rate": 9.9665223045102e-06, "loss": 3.2077, "step": 187150 }, { "epoch": 0.1844218689658977, "grad_norm": 2.3923799991607666, "learning_rate": 9.96650442430967e-06, "loss": 3.1846, "step": 187200 }, { "epoch": 0.1844711269437198, "grad_norm": 2.406975507736206, "learning_rate": 9.966486539351618e-06, "loss": 3.1576, "step": 187250 }, { "epoch": 0.18452038492154188, "grad_norm": 2.3095078468322754, "learning_rate": 9.96646864963606e-06, "loss": 3.2195, "step": 187300 }, { "epoch": 0.18456964289936398, "grad_norm": 2.487431049346924, "learning_rate": 9.966450755163014e-06, "loss": 3.2506, "step": 187350 }, { "epoch": 0.18461890087718608, "grad_norm": 2.5561208724975586, "learning_rate": 9.966432855932498e-06, "loss": 3.2144, "step": 187400 }, { "epoch": 0.18466815885500815, "grad_norm": 2.329491138458252, "learning_rate": 9.966414951944527e-06, "loss": 3.156, "step": 187450 }, { "epoch": 0.18471741683283024, "grad_norm": 2.2726519107818604, "learning_rate": 9.96639704319912e-06, "loss": 3.1701, "step": 187500 }, { "epoch": 0.18476667481065234, "grad_norm": 2.3236093521118164, "learning_rate": 9.966379129696293e-06, "loss": 3.1886, "step": 187550 }, { "epoch": 0.1848159327884744, "grad_norm": 2.2540907859802246, "learning_rate": 9.966361211436064e-06, "loss": 3.1292, "step": 187600 }, { "epoch": 0.1848651907662965, "grad_norm": 2.315483570098877, "learning_rate": 9.96634328841845e-06, "loss": 3.2044, "step": 187650 }, { "epoch": 0.1849144487441186, "grad_norm": 2.3866240978240967, "learning_rate": 9.966325360643467e-06, "loss": 3.1911, "step": 187700 }, { "epoch": 0.18496370672194068, "grad_norm": 2.2230470180511475, "learning_rate": 9.966307428111135e-06, "loss": 3.239, "step": 187750 }, { "epoch": 0.18501296469976278, "grad_norm": 2.3880510330200195, "learning_rate": 9.966289490821467e-06, "loss": 3.2173, "step": 187800 }, { "epoch": 0.18506222267758485, "grad_norm": 2.368213415145874, "learning_rate": 9.966271548774485e-06, "loss": 3.2433, "step": 187850 }, { "epoch": 0.18511148065540695, "grad_norm": 2.3240973949432373, "learning_rate": 9.966253601970203e-06, "loss": 3.2442, "step": 187900 }, { "epoch": 0.18516073863322904, "grad_norm": 2.543519973754883, "learning_rate": 9.966235650408638e-06, "loss": 3.1855, "step": 187950 }, { "epoch": 0.18520999661105111, "grad_norm": 2.2920472621917725, "learning_rate": 9.96621769408981e-06, "loss": 3.1459, "step": 188000 }, { "epoch": 0.1852592545888732, "grad_norm": 2.3326361179351807, "learning_rate": 9.966199733013732e-06, "loss": 3.273, "step": 188050 }, { "epoch": 0.1853085125666953, "grad_norm": 2.4187192916870117, "learning_rate": 9.966181767180425e-06, "loss": 3.1906, "step": 188100 }, { "epoch": 0.18535777054451738, "grad_norm": 2.8203608989715576, "learning_rate": 9.966163796589904e-06, "loss": 3.2051, "step": 188150 }, { "epoch": 0.18540702852233948, "grad_norm": 2.429426670074463, "learning_rate": 9.966145821242187e-06, "loss": 3.1885, "step": 188200 }, { "epoch": 0.18545628650016158, "grad_norm": 2.077145576477051, "learning_rate": 9.966127841137293e-06, "loss": 3.1737, "step": 188250 }, { "epoch": 0.18550554447798365, "grad_norm": 2.3485257625579834, "learning_rate": 9.966109856275236e-06, "loss": 3.192, "step": 188300 }, { "epoch": 0.18555480245580575, "grad_norm": 2.307839870452881, "learning_rate": 9.966091866656034e-06, "loss": 3.1629, "step": 188350 }, { "epoch": 0.18560406043362784, "grad_norm": 2.415091037750244, "learning_rate": 9.966073872279707e-06, "loss": 3.2074, "step": 188400 }, { "epoch": 0.1856533184114499, "grad_norm": 2.17326283454895, "learning_rate": 9.966055873146268e-06, "loss": 3.1449, "step": 188450 }, { "epoch": 0.185702576389272, "grad_norm": 2.466304063796997, "learning_rate": 9.966037869255736e-06, "loss": 3.2399, "step": 188500 }, { "epoch": 0.18575183436709408, "grad_norm": 2.375298023223877, "learning_rate": 9.966019860608132e-06, "loss": 3.2807, "step": 188550 }, { "epoch": 0.18580109234491618, "grad_norm": 2.267178773880005, "learning_rate": 9.966001847203465e-06, "loss": 3.1547, "step": 188600 }, { "epoch": 0.18585035032273828, "grad_norm": 2.364042043685913, "learning_rate": 9.965983829041762e-06, "loss": 3.1687, "step": 188650 }, { "epoch": 0.18589960830056035, "grad_norm": 2.257005214691162, "learning_rate": 9.965965806123033e-06, "loss": 3.2198, "step": 188700 }, { "epoch": 0.18594886627838245, "grad_norm": 2.5347790718078613, "learning_rate": 9.965947778447296e-06, "loss": 3.199, "step": 188750 }, { "epoch": 0.18599812425620454, "grad_norm": 2.306288480758667, "learning_rate": 9.965929746014571e-06, "loss": 3.2225, "step": 188800 }, { "epoch": 0.18604738223402661, "grad_norm": 2.2567124366760254, "learning_rate": 9.965911708824876e-06, "loss": 3.1695, "step": 188850 }, { "epoch": 0.1860966402118487, "grad_norm": 2.20658016204834, "learning_rate": 9.965893666878224e-06, "loss": 3.179, "step": 188900 }, { "epoch": 0.1861458981896708, "grad_norm": 2.458904266357422, "learning_rate": 9.965875620174636e-06, "loss": 3.2378, "step": 188950 }, { "epoch": 0.18619515616749288, "grad_norm": 2.3616299629211426, "learning_rate": 9.965857568714128e-06, "loss": 3.2432, "step": 189000 }, { "epoch": 0.18624441414531498, "grad_norm": 2.572533130645752, "learning_rate": 9.965839512496718e-06, "loss": 3.1845, "step": 189050 }, { "epoch": 0.18629367212313705, "grad_norm": 2.3329010009765625, "learning_rate": 9.965821451522421e-06, "loss": 3.2116, "step": 189100 }, { "epoch": 0.18634293010095915, "grad_norm": 2.4752893447875977, "learning_rate": 9.965803385791258e-06, "loss": 3.2225, "step": 189150 }, { "epoch": 0.18639218807878125, "grad_norm": 2.3398358821868896, "learning_rate": 9.965785315303242e-06, "loss": 3.1822, "step": 189200 }, { "epoch": 0.18644144605660332, "grad_norm": 2.3595540523529053, "learning_rate": 9.965767240058392e-06, "loss": 3.1536, "step": 189250 }, { "epoch": 0.18649070403442541, "grad_norm": 2.615919351577759, "learning_rate": 9.965749160056727e-06, "loss": 3.1804, "step": 189300 }, { "epoch": 0.1865399620122475, "grad_norm": 2.7051188945770264, "learning_rate": 9.965731075298264e-06, "loss": 3.22, "step": 189350 }, { "epoch": 0.18658921999006958, "grad_norm": 2.365379571914673, "learning_rate": 9.965712985783019e-06, "loss": 3.2254, "step": 189400 }, { "epoch": 0.18663847796789168, "grad_norm": 2.526567220687866, "learning_rate": 9.96569489151101e-06, "loss": 3.2379, "step": 189450 }, { "epoch": 0.18668773594571378, "grad_norm": 2.423424243927002, "learning_rate": 9.965676792482255e-06, "loss": 3.2253, "step": 189500 }, { "epoch": 0.18673699392353585, "grad_norm": 2.3560242652893066, "learning_rate": 9.965658688696769e-06, "loss": 3.1556, "step": 189550 }, { "epoch": 0.18678625190135795, "grad_norm": 2.2108547687530518, "learning_rate": 9.96564058015457e-06, "loss": 3.2213, "step": 189600 }, { "epoch": 0.18683550987918005, "grad_norm": 2.2721002101898193, "learning_rate": 9.965622466855679e-06, "loss": 3.1578, "step": 189650 }, { "epoch": 0.18688476785700212, "grad_norm": 2.401252508163452, "learning_rate": 9.965604348800107e-06, "loss": 3.1111, "step": 189700 }, { "epoch": 0.1869340258348242, "grad_norm": 2.2952773571014404, "learning_rate": 9.965586225987877e-06, "loss": 3.2204, "step": 189750 }, { "epoch": 0.18698328381264628, "grad_norm": 2.318816661834717, "learning_rate": 9.965568098419004e-06, "loss": 3.2496, "step": 189800 }, { "epoch": 0.18703254179046838, "grad_norm": 2.376627206802368, "learning_rate": 9.965549966093505e-06, "loss": 3.1094, "step": 189850 }, { "epoch": 0.18708179976829048, "grad_norm": 2.3444344997406006, "learning_rate": 9.9655318290114e-06, "loss": 3.2271, "step": 189900 }, { "epoch": 0.18713105774611255, "grad_norm": 2.2698018550872803, "learning_rate": 9.9655136871727e-06, "loss": 3.1162, "step": 189950 }, { "epoch": 0.18718031572393465, "grad_norm": 2.229180335998535, "learning_rate": 9.965495540577431e-06, "loss": 3.205, "step": 190000 }, { "epoch": 0.18722957370175675, "grad_norm": 2.3924190998077393, "learning_rate": 9.965477389225604e-06, "loss": 3.1147, "step": 190050 }, { "epoch": 0.18727883167957882, "grad_norm": 2.9361519813537598, "learning_rate": 9.965459233117239e-06, "loss": 3.2207, "step": 190100 }, { "epoch": 0.18732808965740091, "grad_norm": 2.4314749240875244, "learning_rate": 9.965441072252353e-06, "loss": 3.1731, "step": 190150 }, { "epoch": 0.187377347635223, "grad_norm": 2.196521282196045, "learning_rate": 9.965422906630964e-06, "loss": 3.1942, "step": 190200 }, { "epoch": 0.18742660561304508, "grad_norm": 2.4408762454986572, "learning_rate": 9.965404736253088e-06, "loss": 3.1712, "step": 190250 }, { "epoch": 0.18747586359086718, "grad_norm": 2.3225667476654053, "learning_rate": 9.965386561118742e-06, "loss": 3.1283, "step": 190300 }, { "epoch": 0.18752512156868925, "grad_norm": 2.425530433654785, "learning_rate": 9.965368381227947e-06, "loss": 3.2405, "step": 190350 }, { "epoch": 0.18757437954651135, "grad_norm": 2.404256820678711, "learning_rate": 9.965350196580717e-06, "loss": 3.1879, "step": 190400 }, { "epoch": 0.18762363752433345, "grad_norm": 2.251185655593872, "learning_rate": 9.96533200717707e-06, "loss": 3.2216, "step": 190450 }, { "epoch": 0.18767289550215552, "grad_norm": 2.3930776119232178, "learning_rate": 9.965313813017022e-06, "loss": 3.1793, "step": 190500 }, { "epoch": 0.18772215347997762, "grad_norm": 2.379326820373535, "learning_rate": 9.965295614100593e-06, "loss": 3.2168, "step": 190550 }, { "epoch": 0.18777141145779971, "grad_norm": 2.487004280090332, "learning_rate": 9.9652774104278e-06, "loss": 3.1719, "step": 190600 }, { "epoch": 0.18782066943562178, "grad_norm": 2.306688070297241, "learning_rate": 9.965259201998661e-06, "loss": 3.1912, "step": 190650 }, { "epoch": 0.18786992741344388, "grad_norm": 2.4894628524780273, "learning_rate": 9.965240988813191e-06, "loss": 3.2002, "step": 190700 }, { "epoch": 0.18791918539126598, "grad_norm": 2.3999686241149902, "learning_rate": 9.965222770871412e-06, "loss": 3.1331, "step": 190750 }, { "epoch": 0.18796844336908805, "grad_norm": 2.290255069732666, "learning_rate": 9.965204548173335e-06, "loss": 3.1921, "step": 190800 }, { "epoch": 0.18801770134691015, "grad_norm": 2.900500774383545, "learning_rate": 9.965186320718982e-06, "loss": 3.178, "step": 190850 }, { "epoch": 0.18806695932473225, "grad_norm": 2.4093830585479736, "learning_rate": 9.96516808850837e-06, "loss": 3.2521, "step": 190900 }, { "epoch": 0.18811621730255432, "grad_norm": 2.7893965244293213, "learning_rate": 9.965149851541515e-06, "loss": 3.203, "step": 190950 }, { "epoch": 0.18816547528037642, "grad_norm": 2.3622825145721436, "learning_rate": 9.965131609818435e-06, "loss": 3.255, "step": 191000 }, { "epoch": 0.18821473325819849, "grad_norm": 2.3034400939941406, "learning_rate": 9.965113363339148e-06, "loss": 3.1967, "step": 191050 }, { "epoch": 0.18826399123602058, "grad_norm": 2.238585948944092, "learning_rate": 9.96509511210367e-06, "loss": 3.2214, "step": 191100 }, { "epoch": 0.18831324921384268, "grad_norm": 2.2969276905059814, "learning_rate": 9.965076856112022e-06, "loss": 3.2484, "step": 191150 }, { "epoch": 0.18836250719166475, "grad_norm": 2.5052690505981445, "learning_rate": 9.96505859536422e-06, "loss": 3.1633, "step": 191200 }, { "epoch": 0.18841176516948685, "grad_norm": 2.4098780155181885, "learning_rate": 9.965040329860278e-06, "loss": 3.1874, "step": 191250 }, { "epoch": 0.18846102314730895, "grad_norm": 2.2313461303710938, "learning_rate": 9.965022059600217e-06, "loss": 3.2293, "step": 191300 }, { "epoch": 0.18851028112513102, "grad_norm": 2.6183159351348877, "learning_rate": 9.965003784584053e-06, "loss": 3.1966, "step": 191350 }, { "epoch": 0.18855953910295312, "grad_norm": 2.1891672611236572, "learning_rate": 9.964985504811807e-06, "loss": 3.2499, "step": 191400 }, { "epoch": 0.18860879708077521, "grad_norm": 2.337702751159668, "learning_rate": 9.96496722028349e-06, "loss": 3.1831, "step": 191450 }, { "epoch": 0.18865805505859728, "grad_norm": 2.3634822368621826, "learning_rate": 9.964948930999125e-06, "loss": 3.2413, "step": 191500 }, { "epoch": 0.18870731303641938, "grad_norm": 2.456507921218872, "learning_rate": 9.964930636958728e-06, "loss": 3.1044, "step": 191550 }, { "epoch": 0.18875657101424145, "grad_norm": 2.2775163650512695, "learning_rate": 9.964912338162315e-06, "loss": 3.1492, "step": 191600 }, { "epoch": 0.18880582899206355, "grad_norm": 2.2085235118865967, "learning_rate": 9.964894034609904e-06, "loss": 3.1629, "step": 191650 }, { "epoch": 0.18885508696988565, "grad_norm": 2.4059901237487793, "learning_rate": 9.964875726301514e-06, "loss": 3.2244, "step": 191700 }, { "epoch": 0.18890434494770772, "grad_norm": 2.2727034091949463, "learning_rate": 9.964857413237164e-06, "loss": 3.1553, "step": 191750 }, { "epoch": 0.18895360292552982, "grad_norm": 2.2478978633880615, "learning_rate": 9.964839095416869e-06, "loss": 3.1959, "step": 191800 }, { "epoch": 0.18900286090335192, "grad_norm": 2.448654890060425, "learning_rate": 9.964820772840645e-06, "loss": 3.1776, "step": 191850 }, { "epoch": 0.18905211888117399, "grad_norm": 2.516153335571289, "learning_rate": 9.964802445508512e-06, "loss": 3.2046, "step": 191900 }, { "epoch": 0.18910137685899608, "grad_norm": 2.467266082763672, "learning_rate": 9.964784113420488e-06, "loss": 3.1473, "step": 191950 }, { "epoch": 0.18915063483681818, "grad_norm": 2.2957606315612793, "learning_rate": 9.964765776576588e-06, "loss": 3.165, "step": 192000 }, { "epoch": 0.18919989281464025, "grad_norm": 2.3886754512786865, "learning_rate": 9.964747434976833e-06, "loss": 3.1599, "step": 192050 }, { "epoch": 0.18924915079246235, "grad_norm": 2.3692195415496826, "learning_rate": 9.964729088621238e-06, "loss": 3.1689, "step": 192100 }, { "epoch": 0.18929840877028445, "grad_norm": 2.3151657581329346, "learning_rate": 9.96471073750982e-06, "loss": 3.153, "step": 192150 }, { "epoch": 0.18934766674810652, "grad_norm": 2.4680593013763428, "learning_rate": 9.9646923816426e-06, "loss": 3.1025, "step": 192200 }, { "epoch": 0.18939692472592862, "grad_norm": 2.4993388652801514, "learning_rate": 9.964674021019592e-06, "loss": 3.1516, "step": 192250 }, { "epoch": 0.1894461827037507, "grad_norm": 2.2265734672546387, "learning_rate": 9.964655655640816e-06, "loss": 3.2583, "step": 192300 }, { "epoch": 0.18949544068157279, "grad_norm": 2.234433889389038, "learning_rate": 9.964637285506288e-06, "loss": 3.1798, "step": 192350 }, { "epoch": 0.18954469865939488, "grad_norm": 2.4456393718719482, "learning_rate": 9.964618910616026e-06, "loss": 3.1588, "step": 192400 }, { "epoch": 0.18959395663721695, "grad_norm": 2.2508795261383057, "learning_rate": 9.96460053097005e-06, "loss": 3.1929, "step": 192450 }, { "epoch": 0.18964321461503905, "grad_norm": 2.156420946121216, "learning_rate": 9.964582146568372e-06, "loss": 3.2179, "step": 192500 }, { "epoch": 0.18969247259286115, "grad_norm": 2.350403070449829, "learning_rate": 9.964563757411015e-06, "loss": 3.2007, "step": 192550 }, { "epoch": 0.18974173057068322, "grad_norm": 2.8115296363830566, "learning_rate": 9.964545363497995e-06, "loss": 3.2324, "step": 192600 }, { "epoch": 0.18979098854850532, "grad_norm": 2.27908992767334, "learning_rate": 9.964526964829328e-06, "loss": 3.1823, "step": 192650 }, { "epoch": 0.18984024652632742, "grad_norm": 2.3734939098358154, "learning_rate": 9.964508561405034e-06, "loss": 3.2604, "step": 192700 }, { "epoch": 0.1898895045041495, "grad_norm": 2.5990357398986816, "learning_rate": 9.96449015322513e-06, "loss": 3.1182, "step": 192750 }, { "epoch": 0.18993876248197158, "grad_norm": 2.4322078227996826, "learning_rate": 9.964471740289632e-06, "loss": 3.1366, "step": 192800 }, { "epoch": 0.18998802045979366, "grad_norm": 2.651874303817749, "learning_rate": 9.96445332259856e-06, "loss": 3.2434, "step": 192850 }, { "epoch": 0.19003727843761575, "grad_norm": 2.3832454681396484, "learning_rate": 9.964434900151929e-06, "loss": 3.1514, "step": 192900 }, { "epoch": 0.19008653641543785, "grad_norm": 2.2981324195861816, "learning_rate": 9.964416472949758e-06, "loss": 3.1424, "step": 192950 }, { "epoch": 0.19013579439325992, "grad_norm": 2.4571800231933594, "learning_rate": 9.964398040992067e-06, "loss": 3.1966, "step": 193000 }, { "epoch": 0.19018505237108202, "grad_norm": 2.3524985313415527, "learning_rate": 9.96437960427887e-06, "loss": 3.2319, "step": 193050 }, { "epoch": 0.19023431034890412, "grad_norm": 2.2744481563568115, "learning_rate": 9.964361162810186e-06, "loss": 3.1967, "step": 193100 }, { "epoch": 0.1902835683267262, "grad_norm": 2.3375165462493896, "learning_rate": 9.964342716586033e-06, "loss": 3.1686, "step": 193150 }, { "epoch": 0.19033282630454829, "grad_norm": 2.194582462310791, "learning_rate": 9.964324265606428e-06, "loss": 3.2426, "step": 193200 }, { "epoch": 0.19038208428237038, "grad_norm": 2.7396366596221924, "learning_rate": 9.964305809871388e-06, "loss": 3.1897, "step": 193250 }, { "epoch": 0.19043134226019245, "grad_norm": 2.732534885406494, "learning_rate": 9.964287349380935e-06, "loss": 3.173, "step": 193300 }, { "epoch": 0.19048060023801455, "grad_norm": 2.403519868850708, "learning_rate": 9.96426888413508e-06, "loss": 3.1772, "step": 193350 }, { "epoch": 0.19052985821583665, "grad_norm": 2.4294614791870117, "learning_rate": 9.964250414133848e-06, "loss": 3.2305, "step": 193400 }, { "epoch": 0.19057911619365872, "grad_norm": 2.2275023460388184, "learning_rate": 9.964231939377248e-06, "loss": 3.2027, "step": 193450 }, { "epoch": 0.19062837417148082, "grad_norm": 2.46925950050354, "learning_rate": 9.964213459865306e-06, "loss": 3.1574, "step": 193500 }, { "epoch": 0.1906776321493029, "grad_norm": 2.3353970050811768, "learning_rate": 9.964194975598036e-06, "loss": 3.2669, "step": 193550 }, { "epoch": 0.190726890127125, "grad_norm": 2.2026402950286865, "learning_rate": 9.964176486575454e-06, "loss": 3.1596, "step": 193600 }, { "epoch": 0.19077614810494709, "grad_norm": 2.497058868408203, "learning_rate": 9.964157992797581e-06, "loss": 3.2443, "step": 193650 }, { "epoch": 0.19082540608276916, "grad_norm": 2.2916629314422607, "learning_rate": 9.964139494264435e-06, "loss": 3.2155, "step": 193700 }, { "epoch": 0.19087466406059125, "grad_norm": 2.5768330097198486, "learning_rate": 9.96412099097603e-06, "loss": 3.1433, "step": 193750 }, { "epoch": 0.19092392203841335, "grad_norm": 2.4778990745544434, "learning_rate": 9.964102482932385e-06, "loss": 3.1687, "step": 193800 }, { "epoch": 0.19097318001623542, "grad_norm": 2.578822374343872, "learning_rate": 9.96408397013352e-06, "loss": 3.2199, "step": 193850 }, { "epoch": 0.19102243799405752, "grad_norm": 2.301558256149292, "learning_rate": 9.964065452579452e-06, "loss": 3.177, "step": 193900 }, { "epoch": 0.19107169597187962, "grad_norm": 2.457897663116455, "learning_rate": 9.964046930270196e-06, "loss": 3.2086, "step": 193950 }, { "epoch": 0.1911209539497017, "grad_norm": 2.283489942550659, "learning_rate": 9.964028403205774e-06, "loss": 3.2617, "step": 194000 }, { "epoch": 0.1911702119275238, "grad_norm": 2.175835609436035, "learning_rate": 9.9640098713862e-06, "loss": 3.1478, "step": 194050 }, { "epoch": 0.19121946990534586, "grad_norm": 2.514758348464966, "learning_rate": 9.963991334811494e-06, "loss": 3.2253, "step": 194100 }, { "epoch": 0.19126872788316795, "grad_norm": 2.4461724758148193, "learning_rate": 9.963972793481671e-06, "loss": 3.1813, "step": 194150 }, { "epoch": 0.19131798586099005, "grad_norm": 2.3089981079101562, "learning_rate": 9.963954247396754e-06, "loss": 3.2122, "step": 194200 }, { "epoch": 0.19136724383881212, "grad_norm": 2.3478996753692627, "learning_rate": 9.963935696556755e-06, "loss": 3.1946, "step": 194250 }, { "epoch": 0.19141650181663422, "grad_norm": 2.3709185123443604, "learning_rate": 9.963917140961694e-06, "loss": 3.1467, "step": 194300 }, { "epoch": 0.19146575979445632, "grad_norm": 2.412412405014038, "learning_rate": 9.963898580611592e-06, "loss": 3.1587, "step": 194350 }, { "epoch": 0.1915150177722784, "grad_norm": 2.4064080715179443, "learning_rate": 9.963880015506463e-06, "loss": 3.228, "step": 194400 }, { "epoch": 0.1915642757501005, "grad_norm": 2.3433806896209717, "learning_rate": 9.963861445646324e-06, "loss": 3.1675, "step": 194450 }, { "epoch": 0.19161353372792259, "grad_norm": 2.7242088317871094, "learning_rate": 9.963842871031196e-06, "loss": 3.1884, "step": 194500 }, { "epoch": 0.19166279170574466, "grad_norm": 2.316920757293701, "learning_rate": 9.963824291661093e-06, "loss": 3.1764, "step": 194550 }, { "epoch": 0.19171204968356675, "grad_norm": 2.3244409561157227, "learning_rate": 9.963805707536036e-06, "loss": 3.2085, "step": 194600 }, { "epoch": 0.19176130766138885, "grad_norm": 2.331098794937134, "learning_rate": 9.963787118656043e-06, "loss": 3.1673, "step": 194650 }, { "epoch": 0.19181056563921092, "grad_norm": 2.3229801654815674, "learning_rate": 9.96376852502113e-06, "loss": 3.1482, "step": 194700 }, { "epoch": 0.19185982361703302, "grad_norm": 2.285534381866455, "learning_rate": 9.963749926631315e-06, "loss": 3.1665, "step": 194750 }, { "epoch": 0.1919090815948551, "grad_norm": 2.4967353343963623, "learning_rate": 9.963731323486618e-06, "loss": 3.2219, "step": 194800 }, { "epoch": 0.1919583395726772, "grad_norm": 2.5189368724823, "learning_rate": 9.963712715587053e-06, "loss": 3.1594, "step": 194850 }, { "epoch": 0.1920075975504993, "grad_norm": 2.631378173828125, "learning_rate": 9.963694102932641e-06, "loss": 3.2156, "step": 194900 }, { "epoch": 0.19205685552832136, "grad_norm": 2.2416679859161377, "learning_rate": 9.9636754855234e-06, "loss": 3.1704, "step": 194950 }, { "epoch": 0.19210611350614346, "grad_norm": 2.335855484008789, "learning_rate": 9.963656863359343e-06, "loss": 3.1867, "step": 195000 }, { "epoch": 0.19215537148396555, "grad_norm": 2.530660629272461, "learning_rate": 9.963638236440494e-06, "loss": 3.1466, "step": 195050 }, { "epoch": 0.19220462946178762, "grad_norm": 2.464677333831787, "learning_rate": 9.963619604766868e-06, "loss": 3.2254, "step": 195100 }, { "epoch": 0.19225388743960972, "grad_norm": 2.373227119445801, "learning_rate": 9.963600968338483e-06, "loss": 3.192, "step": 195150 }, { "epoch": 0.19230314541743182, "grad_norm": 2.301301956176758, "learning_rate": 9.963582327155356e-06, "loss": 3.1967, "step": 195200 }, { "epoch": 0.1923524033952539, "grad_norm": 2.333597183227539, "learning_rate": 9.963563681217506e-06, "loss": 3.1943, "step": 195250 }, { "epoch": 0.192401661373076, "grad_norm": 2.222482919692993, "learning_rate": 9.963545030524952e-06, "loss": 3.2003, "step": 195300 }, { "epoch": 0.19245091935089806, "grad_norm": 2.518244743347168, "learning_rate": 9.96352637507771e-06, "loss": 3.1716, "step": 195350 }, { "epoch": 0.19250017732872016, "grad_norm": 2.437635898590088, "learning_rate": 9.963507714875797e-06, "loss": 3.2102, "step": 195400 }, { "epoch": 0.19254943530654225, "grad_norm": 2.578450918197632, "learning_rate": 9.963489049919233e-06, "loss": 3.0361, "step": 195450 }, { "epoch": 0.19259869328436433, "grad_norm": 2.291053533554077, "learning_rate": 9.963470380208036e-06, "loss": 3.1768, "step": 195500 }, { "epoch": 0.19264795126218642, "grad_norm": 2.284888505935669, "learning_rate": 9.963451705742224e-06, "loss": 3.1621, "step": 195550 }, { "epoch": 0.19269720924000852, "grad_norm": 2.4152355194091797, "learning_rate": 9.963433026521812e-06, "loss": 3.1914, "step": 195600 }, { "epoch": 0.1927464672178306, "grad_norm": 2.288513660430908, "learning_rate": 9.96341434254682e-06, "loss": 3.1319, "step": 195650 }, { "epoch": 0.1927957251956527, "grad_norm": 2.371650457382202, "learning_rate": 9.963395653817268e-06, "loss": 3.1691, "step": 195700 }, { "epoch": 0.1928449831734748, "grad_norm": 2.560469388961792, "learning_rate": 9.963376960333168e-06, "loss": 3.2114, "step": 195750 }, { "epoch": 0.19289424115129686, "grad_norm": 2.23199200630188, "learning_rate": 9.963358262094544e-06, "loss": 3.1571, "step": 195800 }, { "epoch": 0.19294349912911896, "grad_norm": 2.3587799072265625, "learning_rate": 9.96333955910141e-06, "loss": 3.1663, "step": 195850 }, { "epoch": 0.19299275710694103, "grad_norm": 2.535485029220581, "learning_rate": 9.963320851353786e-06, "loss": 3.1396, "step": 195900 }, { "epoch": 0.19304201508476312, "grad_norm": 2.4983019828796387, "learning_rate": 9.963302138851689e-06, "loss": 3.2006, "step": 195950 }, { "epoch": 0.19309127306258522, "grad_norm": 2.294947862625122, "learning_rate": 9.963283421595139e-06, "loss": 3.2082, "step": 196000 }, { "epoch": 0.1931405310404073, "grad_norm": 2.242189884185791, "learning_rate": 9.96326469958415e-06, "loss": 3.155, "step": 196050 }, { "epoch": 0.1931897890182294, "grad_norm": 2.2374038696289062, "learning_rate": 9.963245972818742e-06, "loss": 3.1288, "step": 196100 }, { "epoch": 0.1932390469960515, "grad_norm": 2.4462459087371826, "learning_rate": 9.963227241298935e-06, "loss": 3.1916, "step": 196150 }, { "epoch": 0.19328830497387356, "grad_norm": 2.417543411254883, "learning_rate": 9.963208505024745e-06, "loss": 3.1696, "step": 196200 }, { "epoch": 0.19333756295169566, "grad_norm": 2.306180238723755, "learning_rate": 9.963189763996188e-06, "loss": 3.1968, "step": 196250 }, { "epoch": 0.19338682092951776, "grad_norm": 2.5113680362701416, "learning_rate": 9.963171018213284e-06, "loss": 3.2388, "step": 196300 }, { "epoch": 0.19343607890733983, "grad_norm": 2.5705857276916504, "learning_rate": 9.963152267676052e-06, "loss": 3.1763, "step": 196350 }, { "epoch": 0.19348533688516192, "grad_norm": 2.7328579425811768, "learning_rate": 9.963133512384507e-06, "loss": 3.1389, "step": 196400 }, { "epoch": 0.19353459486298402, "grad_norm": 2.161534309387207, "learning_rate": 9.963114752338671e-06, "loss": 3.19, "step": 196450 }, { "epoch": 0.1935838528408061, "grad_norm": 2.388536214828491, "learning_rate": 9.963095987538558e-06, "loss": 3.1997, "step": 196500 }, { "epoch": 0.1936331108186282, "grad_norm": 2.399711847305298, "learning_rate": 9.963077217984188e-06, "loss": 3.1649, "step": 196550 }, { "epoch": 0.19368236879645026, "grad_norm": 2.1534523963928223, "learning_rate": 9.96305844367558e-06, "loss": 3.2177, "step": 196600 }, { "epoch": 0.19373162677427236, "grad_norm": 2.15371036529541, "learning_rate": 9.96303966461275e-06, "loss": 3.2178, "step": 196650 }, { "epoch": 0.19378088475209446, "grad_norm": 2.2764859199523926, "learning_rate": 9.963020880795715e-06, "loss": 3.1579, "step": 196700 }, { "epoch": 0.19383014272991653, "grad_norm": 2.4023356437683105, "learning_rate": 9.963002092224496e-06, "loss": 3.1625, "step": 196750 }, { "epoch": 0.19387940070773862, "grad_norm": 2.2914934158325195, "learning_rate": 9.962983298899109e-06, "loss": 3.1439, "step": 196800 }, { "epoch": 0.19392865868556072, "grad_norm": 2.465825319290161, "learning_rate": 9.962964500819574e-06, "loss": 3.2253, "step": 196850 }, { "epoch": 0.1939779166633828, "grad_norm": 2.3274152278900146, "learning_rate": 9.962945697985907e-06, "loss": 3.2058, "step": 196900 }, { "epoch": 0.1940271746412049, "grad_norm": 2.3699562549591064, "learning_rate": 9.962926890398126e-06, "loss": 3.1411, "step": 196950 }, { "epoch": 0.194076432619027, "grad_norm": 2.378976821899414, "learning_rate": 9.96290807805625e-06, "loss": 3.1929, "step": 197000 }, { "epoch": 0.19412569059684906, "grad_norm": 2.5655336380004883, "learning_rate": 9.962889260960298e-06, "loss": 3.2295, "step": 197050 }, { "epoch": 0.19417494857467116, "grad_norm": 2.3621158599853516, "learning_rate": 9.962870439110284e-06, "loss": 3.0955, "step": 197100 }, { "epoch": 0.19422420655249323, "grad_norm": 2.3521695137023926, "learning_rate": 9.96285161250623e-06, "loss": 3.1352, "step": 197150 }, { "epoch": 0.19427346453031533, "grad_norm": 2.62026047706604, "learning_rate": 9.962832781148154e-06, "loss": 3.1936, "step": 197200 }, { "epoch": 0.19432272250813742, "grad_norm": 2.4014358520507812, "learning_rate": 9.962813945036072e-06, "loss": 3.19, "step": 197250 }, { "epoch": 0.1943719804859595, "grad_norm": 2.4127185344696045, "learning_rate": 9.962795104170003e-06, "loss": 3.2728, "step": 197300 }, { "epoch": 0.1944212384637816, "grad_norm": 2.575002431869507, "learning_rate": 9.962776258549965e-06, "loss": 3.1712, "step": 197350 }, { "epoch": 0.1944704964416037, "grad_norm": 2.2262978553771973, "learning_rate": 9.962757408175975e-06, "loss": 3.2059, "step": 197400 }, { "epoch": 0.19451975441942576, "grad_norm": 2.4179749488830566, "learning_rate": 9.962738553048053e-06, "loss": 3.1328, "step": 197450 }, { "epoch": 0.19456901239724786, "grad_norm": 2.385349750518799, "learning_rate": 9.962719693166215e-06, "loss": 3.1755, "step": 197500 }, { "epoch": 0.19461827037506996, "grad_norm": 2.4968371391296387, "learning_rate": 9.96270082853048e-06, "loss": 3.1833, "step": 197550 }, { "epoch": 0.19466752835289203, "grad_norm": 2.2525041103363037, "learning_rate": 9.962681959140868e-06, "loss": 3.1664, "step": 197600 }, { "epoch": 0.19471678633071413, "grad_norm": 2.8358733654022217, "learning_rate": 9.962663084997396e-06, "loss": 3.2166, "step": 197650 }, { "epoch": 0.19476604430853622, "grad_norm": 2.465688467025757, "learning_rate": 9.962644206100078e-06, "loss": 3.2715, "step": 197700 }, { "epoch": 0.1948153022863583, "grad_norm": 2.3951239585876465, "learning_rate": 9.962625322448938e-06, "loss": 3.1547, "step": 197750 }, { "epoch": 0.1948645602641804, "grad_norm": 2.2760398387908936, "learning_rate": 9.96260643404399e-06, "loss": 3.1814, "step": 197800 }, { "epoch": 0.19491381824200246, "grad_norm": 2.5958476066589355, "learning_rate": 9.962587540885255e-06, "loss": 3.1697, "step": 197850 }, { "epoch": 0.19496307621982456, "grad_norm": 2.7002389430999756, "learning_rate": 9.962568642972748e-06, "loss": 3.1944, "step": 197900 }, { "epoch": 0.19501233419764666, "grad_norm": 2.3151636123657227, "learning_rate": 9.962549740306492e-06, "loss": 3.1758, "step": 197950 }, { "epoch": 0.19506159217546873, "grad_norm": 2.4793667793273926, "learning_rate": 9.9625308328865e-06, "loss": 3.1716, "step": 198000 }, { "epoch": 0.19511085015329083, "grad_norm": 2.3488872051239014, "learning_rate": 9.96251192071279e-06, "loss": 3.2181, "step": 198050 }, { "epoch": 0.19516010813111292, "grad_norm": 2.5180134773254395, "learning_rate": 9.962493003785386e-06, "loss": 3.1714, "step": 198100 }, { "epoch": 0.195209366108935, "grad_norm": 2.4682064056396484, "learning_rate": 9.962474082104298e-06, "loss": 3.1384, "step": 198150 }, { "epoch": 0.1952586240867571, "grad_norm": 2.598789691925049, "learning_rate": 9.962455155669553e-06, "loss": 3.1643, "step": 198200 }, { "epoch": 0.1953078820645792, "grad_norm": 2.2681994438171387, "learning_rate": 9.962436224481161e-06, "loss": 3.1153, "step": 198250 }, { "epoch": 0.19535714004240126, "grad_norm": 2.3721683025360107, "learning_rate": 9.962417288539146e-06, "loss": 3.1922, "step": 198300 }, { "epoch": 0.19540639802022336, "grad_norm": 2.248358964920044, "learning_rate": 9.962398347843523e-06, "loss": 3.1414, "step": 198350 }, { "epoch": 0.19545565599804543, "grad_norm": 2.37998628616333, "learning_rate": 9.962379402394312e-06, "loss": 3.216, "step": 198400 }, { "epoch": 0.19550491397586753, "grad_norm": 2.3856618404388428, "learning_rate": 9.962360452191528e-06, "loss": 3.1578, "step": 198450 }, { "epoch": 0.19555417195368963, "grad_norm": 2.5816261768341064, "learning_rate": 9.962341497235191e-06, "loss": 3.1771, "step": 198500 }, { "epoch": 0.1956034299315117, "grad_norm": 2.229011297225952, "learning_rate": 9.962322537525322e-06, "loss": 3.19, "step": 198550 }, { "epoch": 0.1956526879093338, "grad_norm": 2.4047539234161377, "learning_rate": 9.962303573061934e-06, "loss": 3.1503, "step": 198600 }, { "epoch": 0.1957019458871559, "grad_norm": 2.409688711166382, "learning_rate": 9.962284603845049e-06, "loss": 3.2172, "step": 198650 }, { "epoch": 0.19575120386497796, "grad_norm": 2.312586784362793, "learning_rate": 9.962265629874684e-06, "loss": 3.2165, "step": 198700 }, { "epoch": 0.19580046184280006, "grad_norm": 2.4302942752838135, "learning_rate": 9.962246651150857e-06, "loss": 3.2028, "step": 198750 }, { "epoch": 0.19584971982062216, "grad_norm": 2.301647186279297, "learning_rate": 9.962227667673587e-06, "loss": 3.1915, "step": 198800 }, { "epoch": 0.19589897779844423, "grad_norm": 2.356356382369995, "learning_rate": 9.96220867944289e-06, "loss": 3.1627, "step": 198850 }, { "epoch": 0.19594823577626633, "grad_norm": 2.259392499923706, "learning_rate": 9.962189686458788e-06, "loss": 3.1367, "step": 198900 }, { "epoch": 0.19599749375408843, "grad_norm": 2.43422532081604, "learning_rate": 9.962170688721296e-06, "loss": 3.1738, "step": 198950 }, { "epoch": 0.1960467517319105, "grad_norm": 2.4379403591156006, "learning_rate": 9.96215168623043e-06, "loss": 3.1603, "step": 199000 }, { "epoch": 0.1960960097097326, "grad_norm": 2.1605894565582275, "learning_rate": 9.962132678986215e-06, "loss": 3.1717, "step": 199050 }, { "epoch": 0.19614526768755466, "grad_norm": 2.6138803958892822, "learning_rate": 9.962113666988664e-06, "loss": 3.1361, "step": 199100 }, { "epoch": 0.19619452566537676, "grad_norm": 2.4692981243133545, "learning_rate": 9.962094650237797e-06, "loss": 3.1057, "step": 199150 }, { "epoch": 0.19624378364319886, "grad_norm": 2.5575692653656006, "learning_rate": 9.962075628733631e-06, "loss": 3.1856, "step": 199200 }, { "epoch": 0.19629304162102093, "grad_norm": 2.375133514404297, "learning_rate": 9.962056602476187e-06, "loss": 3.1883, "step": 199250 }, { "epoch": 0.19634229959884303, "grad_norm": 2.3268232345581055, "learning_rate": 9.962037571465479e-06, "loss": 3.1597, "step": 199300 }, { "epoch": 0.19639155757666513, "grad_norm": 2.4489972591400146, "learning_rate": 9.96201853570153e-06, "loss": 3.2092, "step": 199350 }, { "epoch": 0.1964408155544872, "grad_norm": 2.386378526687622, "learning_rate": 9.961999495184354e-06, "loss": 3.1872, "step": 199400 }, { "epoch": 0.1964900735323093, "grad_norm": 2.471327543258667, "learning_rate": 9.961980449913972e-06, "loss": 3.2072, "step": 199450 }, { "epoch": 0.1965393315101314, "grad_norm": 2.1944525241851807, "learning_rate": 9.961961399890401e-06, "loss": 3.1711, "step": 199500 }, { "epoch": 0.19658858948795346, "grad_norm": 2.346181631088257, "learning_rate": 9.96194234511366e-06, "loss": 3.2251, "step": 199550 }, { "epoch": 0.19663784746577556, "grad_norm": 2.348905563354492, "learning_rate": 9.961923285583765e-06, "loss": 3.2123, "step": 199600 }, { "epoch": 0.19668710544359763, "grad_norm": 2.3777413368225098, "learning_rate": 9.961904221300738e-06, "loss": 3.167, "step": 199650 }, { "epoch": 0.19673636342141973, "grad_norm": 2.30802321434021, "learning_rate": 9.961885152264595e-06, "loss": 3.1885, "step": 199700 }, { "epoch": 0.19678562139924183, "grad_norm": 2.48321270942688, "learning_rate": 9.961866078475353e-06, "loss": 3.1363, "step": 199750 }, { "epoch": 0.1968348793770639, "grad_norm": 2.4374709129333496, "learning_rate": 9.961846999933032e-06, "loss": 3.2174, "step": 199800 }, { "epoch": 0.196884137354886, "grad_norm": 2.846295118331909, "learning_rate": 9.961827916637652e-06, "loss": 3.1916, "step": 199850 }, { "epoch": 0.1969333953327081, "grad_norm": 2.375593423843384, "learning_rate": 9.961808828589229e-06, "loss": 3.1647, "step": 199900 }, { "epoch": 0.19698265331053016, "grad_norm": 2.3127782344818115, "learning_rate": 9.96178973578778e-06, "loss": 3.1941, "step": 199950 }, { "epoch": 0.19703191128835226, "grad_norm": 2.329901695251465, "learning_rate": 9.961770638233325e-06, "loss": 3.1717, "step": 200000 }, { "epoch": 0.19708116926617436, "grad_norm": 2.3490004539489746, "learning_rate": 9.961751535925885e-06, "loss": 3.1307, "step": 200050 }, { "epoch": 0.19713042724399643, "grad_norm": 2.347968578338623, "learning_rate": 9.961732428865474e-06, "loss": 3.2119, "step": 200100 }, { "epoch": 0.19717968522181853, "grad_norm": 2.2133829593658447, "learning_rate": 9.961713317052112e-06, "loss": 3.1709, "step": 200150 }, { "epoch": 0.19722894319964063, "grad_norm": 2.3734090328216553, "learning_rate": 9.961694200485816e-06, "loss": 3.1486, "step": 200200 }, { "epoch": 0.1972782011774627, "grad_norm": 2.529090166091919, "learning_rate": 9.961675079166608e-06, "loss": 3.2172, "step": 200250 }, { "epoch": 0.1973274591552848, "grad_norm": 2.2635481357574463, "learning_rate": 9.961655953094501e-06, "loss": 3.187, "step": 200300 }, { "epoch": 0.19737671713310687, "grad_norm": 2.3745622634887695, "learning_rate": 9.961636822269519e-06, "loss": 3.1741, "step": 200350 }, { "epoch": 0.19742597511092896, "grad_norm": 2.3730173110961914, "learning_rate": 9.961617686691675e-06, "loss": 3.2237, "step": 200400 }, { "epoch": 0.19747523308875106, "grad_norm": 2.2681336402893066, "learning_rate": 9.96159854636099e-06, "loss": 3.136, "step": 200450 }, { "epoch": 0.19752449106657313, "grad_norm": 2.6144564151763916, "learning_rate": 9.961579401277484e-06, "loss": 3.1863, "step": 200500 }, { "epoch": 0.19757374904439523, "grad_norm": 2.2651357650756836, "learning_rate": 9.961560251441172e-06, "loss": 3.1832, "step": 200550 }, { "epoch": 0.19762300702221733, "grad_norm": 2.178575038909912, "learning_rate": 9.961541096852073e-06, "loss": 3.172, "step": 200600 }, { "epoch": 0.1976722650000394, "grad_norm": 2.2683420181274414, "learning_rate": 9.961521937510208e-06, "loss": 3.1787, "step": 200650 }, { "epoch": 0.1977215229778615, "grad_norm": 2.2300939559936523, "learning_rate": 9.961502773415593e-06, "loss": 3.165, "step": 200700 }, { "epoch": 0.1977707809556836, "grad_norm": 2.563718318939209, "learning_rate": 9.961483604568244e-06, "loss": 3.1805, "step": 200750 }, { "epoch": 0.19782003893350567, "grad_norm": 2.2559280395507812, "learning_rate": 9.961464430968186e-06, "loss": 3.1962, "step": 200800 }, { "epoch": 0.19786929691132776, "grad_norm": 2.5315680503845215, "learning_rate": 9.961445252615434e-06, "loss": 3.1266, "step": 200850 }, { "epoch": 0.19791855488914983, "grad_norm": 2.3059022426605225, "learning_rate": 9.961426069510002e-06, "loss": 3.1499, "step": 200900 }, { "epoch": 0.19796781286697193, "grad_norm": 2.3210031986236572, "learning_rate": 9.961406881651916e-06, "loss": 3.1193, "step": 200950 }, { "epoch": 0.19801707084479403, "grad_norm": 2.5008418560028076, "learning_rate": 9.96138768904119e-06, "loss": 3.1331, "step": 201000 }, { "epoch": 0.1980663288226161, "grad_norm": 2.245042085647583, "learning_rate": 9.961368491677842e-06, "loss": 3.1097, "step": 201050 }, { "epoch": 0.1981155868004382, "grad_norm": 2.249685049057007, "learning_rate": 9.961349289561892e-06, "loss": 3.1945, "step": 201100 }, { "epoch": 0.1981648447782603, "grad_norm": 2.6442301273345947, "learning_rate": 9.961330082693356e-06, "loss": 3.154, "step": 201150 }, { "epoch": 0.19821410275608237, "grad_norm": 2.5741302967071533, "learning_rate": 9.961310871072259e-06, "loss": 3.1217, "step": 201200 }, { "epoch": 0.19826336073390446, "grad_norm": 2.356677770614624, "learning_rate": 9.96129165469861e-06, "loss": 3.2068, "step": 201250 }, { "epoch": 0.19831261871172656, "grad_norm": 2.3624699115753174, "learning_rate": 9.961272433572434e-06, "loss": 3.1714, "step": 201300 }, { "epoch": 0.19836187668954863, "grad_norm": 2.552502393722534, "learning_rate": 9.961253207693748e-06, "loss": 3.1822, "step": 201350 }, { "epoch": 0.19841113466737073, "grad_norm": 2.6163618564605713, "learning_rate": 9.96123397706257e-06, "loss": 3.2711, "step": 201400 }, { "epoch": 0.19846039264519283, "grad_norm": 2.3790595531463623, "learning_rate": 9.961214741678917e-06, "loss": 3.1501, "step": 201450 }, { "epoch": 0.1985096506230149, "grad_norm": 2.5507352352142334, "learning_rate": 9.96119550154281e-06, "loss": 3.1609, "step": 201500 }, { "epoch": 0.198558908600837, "grad_norm": 2.3799893856048584, "learning_rate": 9.961176256654266e-06, "loss": 3.2122, "step": 201550 }, { "epoch": 0.19860816657865907, "grad_norm": 2.3326478004455566, "learning_rate": 9.961157007013302e-06, "loss": 3.1995, "step": 201600 }, { "epoch": 0.19865742455648117, "grad_norm": 2.434476375579834, "learning_rate": 9.96113775261994e-06, "loss": 3.1316, "step": 201650 }, { "epoch": 0.19870668253430326, "grad_norm": 2.277432680130005, "learning_rate": 9.961118493474195e-06, "loss": 3.1644, "step": 201700 }, { "epoch": 0.19875594051212533, "grad_norm": 2.6430203914642334, "learning_rate": 9.961099229576089e-06, "loss": 3.1755, "step": 201750 }, { "epoch": 0.19880519848994743, "grad_norm": 2.235264301300049, "learning_rate": 9.961079960925637e-06, "loss": 3.1889, "step": 201800 }, { "epoch": 0.19885445646776953, "grad_norm": 2.3129382133483887, "learning_rate": 9.96106068752286e-06, "loss": 3.187, "step": 201850 }, { "epoch": 0.1989037144455916, "grad_norm": 2.230006456375122, "learning_rate": 9.961041409367773e-06, "loss": 3.0785, "step": 201900 }, { "epoch": 0.1989529724234137, "grad_norm": 2.4203357696533203, "learning_rate": 9.9610221264604e-06, "loss": 3.1672, "step": 201950 }, { "epoch": 0.1990022304012358, "grad_norm": 2.3013617992401123, "learning_rate": 9.961002838800755e-06, "loss": 3.1561, "step": 202000 }, { "epoch": 0.19905148837905787, "grad_norm": 2.3852150440216064, "learning_rate": 9.960983546388855e-06, "loss": 3.1547, "step": 202050 }, { "epoch": 0.19910074635687997, "grad_norm": 2.3615288734436035, "learning_rate": 9.960964249224725e-06, "loss": 3.1832, "step": 202100 }, { "epoch": 0.19915000433470204, "grad_norm": 2.3716626167297363, "learning_rate": 9.960944947308377e-06, "loss": 3.1837, "step": 202150 }, { "epoch": 0.19919926231252413, "grad_norm": 2.329118490219116, "learning_rate": 9.960925640639834e-06, "loss": 3.1237, "step": 202200 }, { "epoch": 0.19924852029034623, "grad_norm": 3.411515474319458, "learning_rate": 9.960906329219114e-06, "loss": 3.1649, "step": 202250 }, { "epoch": 0.1992977782681683, "grad_norm": 2.3965423107147217, "learning_rate": 9.960887013046233e-06, "loss": 3.0808, "step": 202300 }, { "epoch": 0.1993470362459904, "grad_norm": 2.792933225631714, "learning_rate": 9.960867692121209e-06, "loss": 3.1782, "step": 202350 }, { "epoch": 0.1993962942238125, "grad_norm": 2.4136593341827393, "learning_rate": 9.960848366444064e-06, "loss": 3.1281, "step": 202400 }, { "epoch": 0.19944555220163457, "grad_norm": 2.5002527236938477, "learning_rate": 9.960829036014815e-06, "loss": 3.1564, "step": 202450 }, { "epoch": 0.19949481017945667, "grad_norm": 2.3341305255889893, "learning_rate": 9.960809700833477e-06, "loss": 3.1716, "step": 202500 }, { "epoch": 0.19954406815727876, "grad_norm": 2.292515516281128, "learning_rate": 9.960790360900075e-06, "loss": 3.1689, "step": 202550 }, { "epoch": 0.19959332613510083, "grad_norm": 2.278526782989502, "learning_rate": 9.960771016214623e-06, "loss": 3.1087, "step": 202600 }, { "epoch": 0.19964258411292293, "grad_norm": 2.190265655517578, "learning_rate": 9.960751666777142e-06, "loss": 3.153, "step": 202650 }, { "epoch": 0.19969184209074503, "grad_norm": 2.379063844680786, "learning_rate": 9.960732312587648e-06, "loss": 3.1745, "step": 202700 }, { "epoch": 0.1997411000685671, "grad_norm": 2.3188283443450928, "learning_rate": 9.960712953646163e-06, "loss": 3.1328, "step": 202750 }, { "epoch": 0.1997903580463892, "grad_norm": 2.407381296157837, "learning_rate": 9.960693589952702e-06, "loss": 3.2262, "step": 202800 }, { "epoch": 0.19983961602421127, "grad_norm": 2.347661018371582, "learning_rate": 9.960674221507284e-06, "loss": 3.196, "step": 202850 }, { "epoch": 0.19988887400203337, "grad_norm": 2.281912088394165, "learning_rate": 9.96065484830993e-06, "loss": 3.1594, "step": 202900 }, { "epoch": 0.19993813197985547, "grad_norm": 2.600001573562622, "learning_rate": 9.960635470360657e-06, "loss": 3.1384, "step": 202950 }, { "epoch": 0.19998738995767754, "grad_norm": 2.632993698120117, "learning_rate": 9.960616087659483e-06, "loss": 3.0998, "step": 203000 }, { "epoch": 0.20003664793549963, "grad_norm": 2.7869346141815186, "learning_rate": 9.960596700206428e-06, "loss": 3.1866, "step": 203050 }, { "epoch": 0.20008590591332173, "grad_norm": 2.527700185775757, "learning_rate": 9.96057730800151e-06, "loss": 3.1591, "step": 203100 }, { "epoch": 0.2001351638911438, "grad_norm": 2.3003110885620117, "learning_rate": 9.960557911044747e-06, "loss": 3.1136, "step": 203150 }, { "epoch": 0.2001844218689659, "grad_norm": 2.369945764541626, "learning_rate": 9.960538509336156e-06, "loss": 3.2229, "step": 203200 }, { "epoch": 0.200233679846788, "grad_norm": 2.3069117069244385, "learning_rate": 9.960519102875759e-06, "loss": 3.1869, "step": 203250 }, { "epoch": 0.20028293782461007, "grad_norm": 2.6560218334198, "learning_rate": 9.960499691663574e-06, "loss": 3.1957, "step": 203300 }, { "epoch": 0.20033219580243217, "grad_norm": 2.306450366973877, "learning_rate": 9.960480275699618e-06, "loss": 3.1803, "step": 203350 }, { "epoch": 0.20038145378025424, "grad_norm": 2.3503198623657227, "learning_rate": 9.960460854983911e-06, "loss": 3.1251, "step": 203400 }, { "epoch": 0.20043071175807634, "grad_norm": 2.3734047412872314, "learning_rate": 9.96044142951647e-06, "loss": 3.1241, "step": 203450 }, { "epoch": 0.20047996973589843, "grad_norm": 2.2274351119995117, "learning_rate": 9.960421999297316e-06, "loss": 3.1808, "step": 203500 }, { "epoch": 0.2005292277137205, "grad_norm": 2.2439446449279785, "learning_rate": 9.960402564326464e-06, "loss": 3.1348, "step": 203550 }, { "epoch": 0.2005784856915426, "grad_norm": 2.335048198699951, "learning_rate": 9.960383124603936e-06, "loss": 3.1355, "step": 203600 }, { "epoch": 0.2006277436693647, "grad_norm": 2.2605020999908447, "learning_rate": 9.96036368012975e-06, "loss": 3.1636, "step": 203650 }, { "epoch": 0.20067700164718677, "grad_norm": 2.4001994132995605, "learning_rate": 9.960344230903923e-06, "loss": 3.1298, "step": 203700 }, { "epoch": 0.20072625962500887, "grad_norm": 2.2080001831054688, "learning_rate": 9.960324776926476e-06, "loss": 3.1332, "step": 203750 }, { "epoch": 0.20077551760283097, "grad_norm": 2.4068219661712646, "learning_rate": 9.960305318197424e-06, "loss": 3.1914, "step": 203800 }, { "epoch": 0.20082477558065304, "grad_norm": 2.207080602645874, "learning_rate": 9.960285854716789e-06, "loss": 3.1311, "step": 203850 }, { "epoch": 0.20087403355847513, "grad_norm": 2.424910068511963, "learning_rate": 9.96026638648459e-06, "loss": 3.1171, "step": 203900 }, { "epoch": 0.2009232915362972, "grad_norm": 2.374262809753418, "learning_rate": 9.960246913500841e-06, "loss": 3.1307, "step": 203950 }, { "epoch": 0.2009725495141193, "grad_norm": 2.262049436569214, "learning_rate": 9.960227435765567e-06, "loss": 3.1365, "step": 204000 }, { "epoch": 0.2010218074919414, "grad_norm": 2.3736376762390137, "learning_rate": 9.960207953278783e-06, "loss": 3.1488, "step": 204050 }, { "epoch": 0.20107106546976347, "grad_norm": 2.353731870651245, "learning_rate": 9.960188466040508e-06, "loss": 3.1999, "step": 204100 }, { "epoch": 0.20112032344758557, "grad_norm": 2.6353390216827393, "learning_rate": 9.96016897405076e-06, "loss": 3.1685, "step": 204150 }, { "epoch": 0.20116958142540767, "grad_norm": 2.5656416416168213, "learning_rate": 9.960149477309558e-06, "loss": 3.1138, "step": 204200 }, { "epoch": 0.20121883940322974, "grad_norm": 2.6922476291656494, "learning_rate": 9.960129975816923e-06, "loss": 3.1546, "step": 204250 }, { "epoch": 0.20126809738105184, "grad_norm": 2.360347032546997, "learning_rate": 9.96011046957287e-06, "loss": 3.214, "step": 204300 }, { "epoch": 0.20131735535887393, "grad_norm": 2.2466230392456055, "learning_rate": 9.960090958577423e-06, "loss": 3.1493, "step": 204350 }, { "epoch": 0.201366613336696, "grad_norm": 2.248671531677246, "learning_rate": 9.960071442830594e-06, "loss": 3.2063, "step": 204400 }, { "epoch": 0.2014158713145181, "grad_norm": 2.4156911373138428, "learning_rate": 9.960051922332405e-06, "loss": 3.2141, "step": 204450 }, { "epoch": 0.2014651292923402, "grad_norm": 2.4040310382843018, "learning_rate": 9.960032397082876e-06, "loss": 3.2051, "step": 204500 }, { "epoch": 0.20151438727016227, "grad_norm": 2.28920841217041, "learning_rate": 9.960012867082025e-06, "loss": 3.1104, "step": 204550 }, { "epoch": 0.20156364524798437, "grad_norm": 2.74505877494812, "learning_rate": 9.959993332329869e-06, "loss": 3.1529, "step": 204600 }, { "epoch": 0.20161290322580644, "grad_norm": 2.2143354415893555, "learning_rate": 9.959973792826427e-06, "loss": 3.1713, "step": 204650 }, { "epoch": 0.20166216120362854, "grad_norm": 2.4952709674835205, "learning_rate": 9.959954248571719e-06, "loss": 3.1975, "step": 204700 }, { "epoch": 0.20171141918145064, "grad_norm": 2.220656633377075, "learning_rate": 9.959934699565764e-06, "loss": 3.1876, "step": 204750 }, { "epoch": 0.2017606771592727, "grad_norm": 2.295731544494629, "learning_rate": 9.959915145808579e-06, "loss": 3.138, "step": 204800 }, { "epoch": 0.2018099351370948, "grad_norm": 2.709906816482544, "learning_rate": 9.959895587300185e-06, "loss": 3.1562, "step": 204850 }, { "epoch": 0.2018591931149169, "grad_norm": 2.338934898376465, "learning_rate": 9.959876024040598e-06, "loss": 3.1895, "step": 204900 }, { "epoch": 0.20190845109273897, "grad_norm": 2.4069290161132812, "learning_rate": 9.959856456029839e-06, "loss": 3.233, "step": 204950 }, { "epoch": 0.20195770907056107, "grad_norm": 3.108976364135742, "learning_rate": 9.959836883267927e-06, "loss": 3.1659, "step": 205000 }, { "epoch": 0.20200696704838317, "grad_norm": 2.4821436405181885, "learning_rate": 9.959817305754878e-06, "loss": 3.2422, "step": 205050 }, { "epoch": 0.20205622502620524, "grad_norm": 2.3242716789245605, "learning_rate": 9.959797723490711e-06, "loss": 3.1409, "step": 205100 }, { "epoch": 0.20210548300402734, "grad_norm": 2.4353396892547607, "learning_rate": 9.959778136475448e-06, "loss": 3.1642, "step": 205150 }, { "epoch": 0.2021547409818494, "grad_norm": 2.2855384349823, "learning_rate": 9.959758544709107e-06, "loss": 3.2181, "step": 205200 }, { "epoch": 0.2022039989596715, "grad_norm": 2.340528726577759, "learning_rate": 9.959738948191705e-06, "loss": 3.1685, "step": 205250 }, { "epoch": 0.2022532569374936, "grad_norm": 2.4178950786590576, "learning_rate": 9.95971934692326e-06, "loss": 3.1245, "step": 205300 }, { "epoch": 0.20230251491531567, "grad_norm": 2.2373764514923096, "learning_rate": 9.959699740903794e-06, "loss": 3.204, "step": 205350 }, { "epoch": 0.20235177289313777, "grad_norm": 2.7442729473114014, "learning_rate": 9.959680130133322e-06, "loss": 3.1916, "step": 205400 }, { "epoch": 0.20240103087095987, "grad_norm": 2.2657668590545654, "learning_rate": 9.959660514611866e-06, "loss": 3.1174, "step": 205450 }, { "epoch": 0.20245028884878194, "grad_norm": 2.1790337562561035, "learning_rate": 9.959640894339444e-06, "loss": 3.1234, "step": 205500 }, { "epoch": 0.20249954682660404, "grad_norm": 2.4008233547210693, "learning_rate": 9.959621269316075e-06, "loss": 3.171, "step": 205550 }, { "epoch": 0.20254880480442614, "grad_norm": 2.408280372619629, "learning_rate": 9.959601639541776e-06, "loss": 3.2001, "step": 205600 }, { "epoch": 0.2025980627822482, "grad_norm": 2.35282564163208, "learning_rate": 9.959582005016567e-06, "loss": 3.1774, "step": 205650 }, { "epoch": 0.2026473207600703, "grad_norm": 2.2874209880828857, "learning_rate": 9.959562365740468e-06, "loss": 3.0817, "step": 205700 }, { "epoch": 0.2026965787378924, "grad_norm": 4.549248695373535, "learning_rate": 9.959542721713496e-06, "loss": 3.237, "step": 205750 }, { "epoch": 0.20274583671571447, "grad_norm": 2.7091376781463623, "learning_rate": 9.959523072935669e-06, "loss": 3.1151, "step": 205800 }, { "epoch": 0.20279509469353657, "grad_norm": 2.4826924800872803, "learning_rate": 9.959503419407009e-06, "loss": 3.1787, "step": 205850 }, { "epoch": 0.20284435267135864, "grad_norm": 2.314362049102783, "learning_rate": 9.959483761127532e-06, "loss": 3.1894, "step": 205900 }, { "epoch": 0.20289361064918074, "grad_norm": 2.2838730812072754, "learning_rate": 9.959464098097257e-06, "loss": 3.1106, "step": 205950 }, { "epoch": 0.20294286862700284, "grad_norm": 2.597059965133667, "learning_rate": 9.959444430316207e-06, "loss": 3.203, "step": 206000 }, { "epoch": 0.2029921266048249, "grad_norm": 2.4173622131347656, "learning_rate": 9.959424757784395e-06, "loss": 3.158, "step": 206050 }, { "epoch": 0.203041384582647, "grad_norm": 2.3075854778289795, "learning_rate": 9.959405080501842e-06, "loss": 3.1513, "step": 206100 }, { "epoch": 0.2030906425604691, "grad_norm": 2.429185390472412, "learning_rate": 9.959385398468571e-06, "loss": 3.1259, "step": 206150 }, { "epoch": 0.20313990053829117, "grad_norm": 2.219280958175659, "learning_rate": 9.959365711684595e-06, "loss": 3.1441, "step": 206200 }, { "epoch": 0.20318915851611327, "grad_norm": 3.4745967388153076, "learning_rate": 9.959346020149933e-06, "loss": 3.1287, "step": 206250 }, { "epoch": 0.20323841649393537, "grad_norm": 2.303740978240967, "learning_rate": 9.959326323864608e-06, "loss": 3.1943, "step": 206300 }, { "epoch": 0.20328767447175744, "grad_norm": 2.2190935611724854, "learning_rate": 9.959306622828636e-06, "loss": 3.163, "step": 206350 }, { "epoch": 0.20333693244957954, "grad_norm": 2.2731540203094482, "learning_rate": 9.959286917042038e-06, "loss": 3.2071, "step": 206400 }, { "epoch": 0.2033861904274016, "grad_norm": 2.3589987754821777, "learning_rate": 9.95926720650483e-06, "loss": 3.1965, "step": 206450 }, { "epoch": 0.2034354484052237, "grad_norm": 2.3886425495147705, "learning_rate": 9.959247491217032e-06, "loss": 3.1303, "step": 206500 }, { "epoch": 0.2034847063830458, "grad_norm": 2.4407129287719727, "learning_rate": 9.959227771178663e-06, "loss": 3.214, "step": 206550 }, { "epoch": 0.20353396436086788, "grad_norm": 2.655928373336792, "learning_rate": 9.959208046389743e-06, "loss": 3.132, "step": 206600 }, { "epoch": 0.20358322233868997, "grad_norm": 2.5741915702819824, "learning_rate": 9.95918831685029e-06, "loss": 3.1545, "step": 206650 }, { "epoch": 0.20363248031651207, "grad_norm": 2.2618424892425537, "learning_rate": 9.959168582560323e-06, "loss": 3.1544, "step": 206700 }, { "epoch": 0.20368173829433414, "grad_norm": 2.370654821395874, "learning_rate": 9.959148843519862e-06, "loss": 3.1963, "step": 206750 }, { "epoch": 0.20373099627215624, "grad_norm": 2.3089559078216553, "learning_rate": 9.959129099728924e-06, "loss": 3.14, "step": 206800 }, { "epoch": 0.20378025424997834, "grad_norm": 2.3521053791046143, "learning_rate": 9.959109351187528e-06, "loss": 3.1428, "step": 206850 }, { "epoch": 0.2038295122278004, "grad_norm": 2.2573249340057373, "learning_rate": 9.959089597895693e-06, "loss": 3.0746, "step": 206900 }, { "epoch": 0.2038787702056225, "grad_norm": 2.378023147583008, "learning_rate": 9.95906983985344e-06, "loss": 3.1807, "step": 206950 }, { "epoch": 0.2039280281834446, "grad_norm": 2.686462879180908, "learning_rate": 9.959050077060785e-06, "loss": 3.1223, "step": 207000 }, { "epoch": 0.20397728616126667, "grad_norm": 2.35837984085083, "learning_rate": 9.95903030951775e-06, "loss": 3.218, "step": 207050 }, { "epoch": 0.20402654413908877, "grad_norm": 2.211061954498291, "learning_rate": 9.959010537224351e-06, "loss": 3.1777, "step": 207100 }, { "epoch": 0.20407580211691084, "grad_norm": 2.2865025997161865, "learning_rate": 9.958990760180608e-06, "loss": 3.1164, "step": 207150 }, { "epoch": 0.20412506009473294, "grad_norm": 2.413261890411377, "learning_rate": 9.958970978386541e-06, "loss": 3.1057, "step": 207200 }, { "epoch": 0.20417431807255504, "grad_norm": 2.3077125549316406, "learning_rate": 9.958951191842168e-06, "loss": 3.1341, "step": 207250 }, { "epoch": 0.2042235760503771, "grad_norm": 2.389791965484619, "learning_rate": 9.958931400547507e-06, "loss": 3.1285, "step": 207300 }, { "epoch": 0.2042728340281992, "grad_norm": 2.2830917835235596, "learning_rate": 9.958911604502578e-06, "loss": 3.1478, "step": 207350 }, { "epoch": 0.2043220920060213, "grad_norm": 2.4379444122314453, "learning_rate": 9.958891803707403e-06, "loss": 3.1836, "step": 207400 }, { "epoch": 0.20437134998384338, "grad_norm": 2.4105894565582275, "learning_rate": 9.958871998161995e-06, "loss": 3.1238, "step": 207450 }, { "epoch": 0.20442060796166547, "grad_norm": 2.8004002571105957, "learning_rate": 9.958852187866377e-06, "loss": 3.1343, "step": 207500 }, { "epoch": 0.20446986593948757, "grad_norm": 2.2657265663146973, "learning_rate": 9.958832372820567e-06, "loss": 3.1498, "step": 207550 }, { "epoch": 0.20451912391730964, "grad_norm": 2.269650936126709, "learning_rate": 9.958812553024584e-06, "loss": 3.1549, "step": 207600 }, { "epoch": 0.20456838189513174, "grad_norm": 2.3662571907043457, "learning_rate": 9.958792728478445e-06, "loss": 3.0788, "step": 207650 }, { "epoch": 0.2046176398729538, "grad_norm": 2.6087539196014404, "learning_rate": 9.958772899182174e-06, "loss": 3.1854, "step": 207700 }, { "epoch": 0.2046668978507759, "grad_norm": 2.4934215545654297, "learning_rate": 9.958753065135785e-06, "loss": 3.1649, "step": 207750 }, { "epoch": 0.204716155828598, "grad_norm": 2.3540399074554443, "learning_rate": 9.9587332263393e-06, "loss": 3.1151, "step": 207800 }, { "epoch": 0.20476541380642008, "grad_norm": 2.2416234016418457, "learning_rate": 9.958713382792734e-06, "loss": 3.1338, "step": 207850 }, { "epoch": 0.20481467178424217, "grad_norm": 2.452441692352295, "learning_rate": 9.958693534496112e-06, "loss": 3.0862, "step": 207900 }, { "epoch": 0.20486392976206427, "grad_norm": 2.31312894821167, "learning_rate": 9.958673681449449e-06, "loss": 3.169, "step": 207950 }, { "epoch": 0.20491318773988634, "grad_norm": 2.4456894397735596, "learning_rate": 9.958653823652766e-06, "loss": 3.1959, "step": 208000 }, { "epoch": 0.20496244571770844, "grad_norm": 2.508714437484741, "learning_rate": 9.95863396110608e-06, "loss": 3.1874, "step": 208050 }, { "epoch": 0.20501170369553054, "grad_norm": 2.3797824382781982, "learning_rate": 9.958614093809409e-06, "loss": 3.162, "step": 208100 }, { "epoch": 0.2050609616733526, "grad_norm": 2.405336618423462, "learning_rate": 9.958594221762777e-06, "loss": 3.1123, "step": 208150 }, { "epoch": 0.2051102196511747, "grad_norm": 2.263030767440796, "learning_rate": 9.958574344966198e-06, "loss": 3.1963, "step": 208200 }, { "epoch": 0.2051594776289968, "grad_norm": 2.330038547515869, "learning_rate": 9.958554463419693e-06, "loss": 3.1642, "step": 208250 }, { "epoch": 0.20520873560681888, "grad_norm": 2.2958781719207764, "learning_rate": 9.958534577123282e-06, "loss": 3.1471, "step": 208300 }, { "epoch": 0.20525799358464097, "grad_norm": 2.2572388648986816, "learning_rate": 9.958514686076983e-06, "loss": 3.152, "step": 208350 }, { "epoch": 0.20530725156246304, "grad_norm": 2.3831839561462402, "learning_rate": 9.958494790280815e-06, "loss": 3.149, "step": 208400 }, { "epoch": 0.20535650954028514, "grad_norm": 2.436060667037964, "learning_rate": 9.958474889734797e-06, "loss": 3.1494, "step": 208450 }, { "epoch": 0.20540576751810724, "grad_norm": 2.349781036376953, "learning_rate": 9.95845498443895e-06, "loss": 3.0828, "step": 208500 }, { "epoch": 0.2054550254959293, "grad_norm": 2.2536070346832275, "learning_rate": 9.95843507439329e-06, "loss": 3.1674, "step": 208550 }, { "epoch": 0.2055042834737514, "grad_norm": 2.520420551300049, "learning_rate": 9.958415159597837e-06, "loss": 3.1144, "step": 208600 }, { "epoch": 0.2055535414515735, "grad_norm": 2.385207176208496, "learning_rate": 9.958395240052613e-06, "loss": 3.1462, "step": 208650 }, { "epoch": 0.20560279942939558, "grad_norm": 2.2822673320770264, "learning_rate": 9.958375315757631e-06, "loss": 3.1023, "step": 208700 }, { "epoch": 0.20565205740721768, "grad_norm": 2.1631195545196533, "learning_rate": 9.958355386712917e-06, "loss": 3.1703, "step": 208750 }, { "epoch": 0.20570131538503977, "grad_norm": 2.463300943374634, "learning_rate": 9.958335452918485e-06, "loss": 3.1238, "step": 208800 }, { "epoch": 0.20575057336286184, "grad_norm": 2.2513768672943115, "learning_rate": 9.958315514374356e-06, "loss": 3.1655, "step": 208850 }, { "epoch": 0.20579983134068394, "grad_norm": 2.274393320083618, "learning_rate": 9.958295571080549e-06, "loss": 3.1471, "step": 208900 }, { "epoch": 0.205849089318506, "grad_norm": 3.0230369567871094, "learning_rate": 9.958275623037084e-06, "loss": 3.0998, "step": 208950 }, { "epoch": 0.2058983472963281, "grad_norm": 2.436112642288208, "learning_rate": 9.958255670243979e-06, "loss": 3.0369, "step": 209000 }, { "epoch": 0.2059476052741502, "grad_norm": 2.2775697708129883, "learning_rate": 9.958235712701252e-06, "loss": 3.0889, "step": 209050 }, { "epoch": 0.20599686325197228, "grad_norm": 2.29316782951355, "learning_rate": 9.958215750408924e-06, "loss": 3.1651, "step": 209100 }, { "epoch": 0.20604612122979438, "grad_norm": 2.417280435562134, "learning_rate": 9.958195783367015e-06, "loss": 3.1964, "step": 209150 }, { "epoch": 0.20609537920761647, "grad_norm": 2.307659149169922, "learning_rate": 9.95817581157554e-06, "loss": 3.1335, "step": 209200 }, { "epoch": 0.20614463718543855, "grad_norm": 2.6780643463134766, "learning_rate": 9.958155835034525e-06, "loss": 3.1626, "step": 209250 }, { "epoch": 0.20619389516326064, "grad_norm": 2.239384889602661, "learning_rate": 9.958135853743982e-06, "loss": 3.1304, "step": 209300 }, { "epoch": 0.20624315314108274, "grad_norm": 2.462625503540039, "learning_rate": 9.958115867703932e-06, "loss": 3.1578, "step": 209350 }, { "epoch": 0.2062924111189048, "grad_norm": 2.3173625469207764, "learning_rate": 9.958095876914398e-06, "loss": 3.1559, "step": 209400 }, { "epoch": 0.2063416690967269, "grad_norm": 2.2476139068603516, "learning_rate": 9.958075881375395e-06, "loss": 3.1745, "step": 209450 }, { "epoch": 0.206390927074549, "grad_norm": 2.606207847595215, "learning_rate": 9.958055881086944e-06, "loss": 3.1699, "step": 209500 }, { "epoch": 0.20644018505237108, "grad_norm": 2.232581615447998, "learning_rate": 9.958035876049063e-06, "loss": 3.1725, "step": 209550 }, { "epoch": 0.20648944303019318, "grad_norm": 2.16389536857605, "learning_rate": 9.958015866261772e-06, "loss": 3.0922, "step": 209600 }, { "epoch": 0.20653870100801525, "grad_norm": 3.8807289600372314, "learning_rate": 9.957995851725091e-06, "loss": 3.1558, "step": 209650 }, { "epoch": 0.20658795898583734, "grad_norm": 2.299062967300415, "learning_rate": 9.957975832439037e-06, "loss": 3.1826, "step": 209700 }, { "epoch": 0.20663721696365944, "grad_norm": 2.3832263946533203, "learning_rate": 9.957955808403632e-06, "loss": 3.1828, "step": 209750 }, { "epoch": 0.2066864749414815, "grad_norm": 2.3467345237731934, "learning_rate": 9.957935779618892e-06, "loss": 3.1411, "step": 209800 }, { "epoch": 0.2067357329193036, "grad_norm": 4.1672682762146, "learning_rate": 9.957915746084839e-06, "loss": 3.1023, "step": 209850 }, { "epoch": 0.2067849908971257, "grad_norm": 2.2828493118286133, "learning_rate": 9.957895707801491e-06, "loss": 3.1391, "step": 209900 }, { "epoch": 0.20683424887494778, "grad_norm": 2.3025710582733154, "learning_rate": 9.957875664768865e-06, "loss": 3.1494, "step": 209950 }, { "epoch": 0.20688350685276988, "grad_norm": 2.445500373840332, "learning_rate": 9.957855616986985e-06, "loss": 3.1046, "step": 210000 }, { "epoch": 0.20693276483059198, "grad_norm": 2.5231399536132812, "learning_rate": 9.957835564455867e-06, "loss": 3.1231, "step": 210050 }, { "epoch": 0.20698202280841405, "grad_norm": 2.439889669418335, "learning_rate": 9.95781550717553e-06, "loss": 3.1485, "step": 210100 }, { "epoch": 0.20703128078623614, "grad_norm": 2.306947708129883, "learning_rate": 9.957795445145993e-06, "loss": 3.21, "step": 210150 }, { "epoch": 0.20708053876405821, "grad_norm": 2.2984092235565186, "learning_rate": 9.957775378367278e-06, "loss": 3.122, "step": 210200 }, { "epoch": 0.2071297967418803, "grad_norm": 3.2085721492767334, "learning_rate": 9.957755306839401e-06, "loss": 3.1883, "step": 210250 }, { "epoch": 0.2071790547197024, "grad_norm": 2.2393877506256104, "learning_rate": 9.957735230562384e-06, "loss": 3.1944, "step": 210300 }, { "epoch": 0.20722831269752448, "grad_norm": 2.356269359588623, "learning_rate": 9.957715149536244e-06, "loss": 3.1975, "step": 210350 }, { "epoch": 0.20727757067534658, "grad_norm": 2.43263840675354, "learning_rate": 9.957695063761002e-06, "loss": 3.215, "step": 210400 }, { "epoch": 0.20732682865316868, "grad_norm": 2.500180959701538, "learning_rate": 9.957674973236676e-06, "loss": 3.1726, "step": 210450 }, { "epoch": 0.20737608663099075, "grad_norm": 2.353097438812256, "learning_rate": 9.957654877963286e-06, "loss": 3.1555, "step": 210500 }, { "epoch": 0.20742534460881284, "grad_norm": 2.604830026626587, "learning_rate": 9.957634777940848e-06, "loss": 3.192, "step": 210550 }, { "epoch": 0.20747460258663494, "grad_norm": 2.407069683074951, "learning_rate": 9.957614673169387e-06, "loss": 3.1395, "step": 210600 }, { "epoch": 0.207523860564457, "grad_norm": 2.3856699466705322, "learning_rate": 9.957594563648918e-06, "loss": 3.1816, "step": 210650 }, { "epoch": 0.2075731185422791, "grad_norm": 2.3091938495635986, "learning_rate": 9.957574449379464e-06, "loss": 3.1091, "step": 210700 }, { "epoch": 0.20762237652010118, "grad_norm": 2.2650439739227295, "learning_rate": 9.957554330361039e-06, "loss": 3.1794, "step": 210750 }, { "epoch": 0.20767163449792328, "grad_norm": 2.3910062313079834, "learning_rate": 9.957534206593666e-06, "loss": 3.167, "step": 210800 }, { "epoch": 0.20772089247574538, "grad_norm": 2.3552258014678955, "learning_rate": 9.957514078077363e-06, "loss": 3.1997, "step": 210850 }, { "epoch": 0.20777015045356745, "grad_norm": 2.6385338306427, "learning_rate": 9.95749394481215e-06, "loss": 3.111, "step": 210900 }, { "epoch": 0.20781940843138955, "grad_norm": 2.424082040786743, "learning_rate": 9.957473806798047e-06, "loss": 3.1163, "step": 210950 }, { "epoch": 0.20786866640921164, "grad_norm": 2.508225679397583, "learning_rate": 9.957453664035073e-06, "loss": 3.0891, "step": 211000 }, { "epoch": 0.20791792438703371, "grad_norm": 2.3564515113830566, "learning_rate": 9.957433516523245e-06, "loss": 3.1648, "step": 211050 }, { "epoch": 0.2079671823648558, "grad_norm": 2.7111833095550537, "learning_rate": 9.957413364262585e-06, "loss": 3.1986, "step": 211100 }, { "epoch": 0.2080164403426779, "grad_norm": 2.1761960983276367, "learning_rate": 9.95739320725311e-06, "loss": 3.1546, "step": 211150 }, { "epoch": 0.20806569832049998, "grad_norm": 2.746267318725586, "learning_rate": 9.957373045494841e-06, "loss": 3.1315, "step": 211200 }, { "epoch": 0.20811495629832208, "grad_norm": 2.45660138130188, "learning_rate": 9.957352878987796e-06, "loss": 3.1415, "step": 211250 }, { "epoch": 0.20816421427614418, "grad_norm": 2.3345420360565186, "learning_rate": 9.957332707731995e-06, "loss": 3.1407, "step": 211300 }, { "epoch": 0.20821347225396625, "grad_norm": 2.318936824798584, "learning_rate": 9.957312531727458e-06, "loss": 3.1814, "step": 211350 }, { "epoch": 0.20826273023178835, "grad_norm": 2.367290496826172, "learning_rate": 9.957292350974206e-06, "loss": 3.1919, "step": 211400 }, { "epoch": 0.20831198820961042, "grad_norm": 2.4204750061035156, "learning_rate": 9.957272165472254e-06, "loss": 3.2131, "step": 211450 }, { "epoch": 0.20836124618743251, "grad_norm": 2.4520339965820312, "learning_rate": 9.957251975221624e-06, "loss": 3.1692, "step": 211500 }, { "epoch": 0.2084105041652546, "grad_norm": 2.4482266902923584, "learning_rate": 9.957231780222334e-06, "loss": 3.1847, "step": 211550 }, { "epoch": 0.20845976214307668, "grad_norm": 2.4644150733947754, "learning_rate": 9.957211580474405e-06, "loss": 3.1375, "step": 211600 }, { "epoch": 0.20850902012089878, "grad_norm": 2.382768392562866, "learning_rate": 9.957191375977855e-06, "loss": 3.2548, "step": 211650 }, { "epoch": 0.20855827809872088, "grad_norm": 2.2712690830230713, "learning_rate": 9.957171166732704e-06, "loss": 3.1579, "step": 211700 }, { "epoch": 0.20860753607654295, "grad_norm": 2.4542791843414307, "learning_rate": 9.95715095273897e-06, "loss": 3.2126, "step": 211750 }, { "epoch": 0.20865679405436505, "grad_norm": 2.5927412509918213, "learning_rate": 9.957130733996677e-06, "loss": 3.1121, "step": 211800 }, { "epoch": 0.20870605203218714, "grad_norm": 2.4330270290374756, "learning_rate": 9.957110510505836e-06, "loss": 3.0841, "step": 211850 }, { "epoch": 0.20875531001000922, "grad_norm": 2.5410919189453125, "learning_rate": 9.957090282266475e-06, "loss": 3.2166, "step": 211900 }, { "epoch": 0.2088045679878313, "grad_norm": 2.316696882247925, "learning_rate": 9.95707004927861e-06, "loss": 3.2068, "step": 211950 }, { "epoch": 0.20885382596565338, "grad_norm": 2.508714199066162, "learning_rate": 9.957049811542257e-06, "loss": 3.217, "step": 212000 }, { "epoch": 0.20890308394347548, "grad_norm": 2.615920066833496, "learning_rate": 9.95702956905744e-06, "loss": 3.1073, "step": 212050 }, { "epoch": 0.20895234192129758, "grad_norm": 2.485020875930786, "learning_rate": 9.957009321824177e-06, "loss": 3.1421, "step": 212100 }, { "epoch": 0.20900159989911965, "grad_norm": 2.317331314086914, "learning_rate": 9.956989069842488e-06, "loss": 3.1377, "step": 212150 }, { "epoch": 0.20905085787694175, "grad_norm": 2.4104042053222656, "learning_rate": 9.95696881311239e-06, "loss": 3.1834, "step": 212200 }, { "epoch": 0.20910011585476385, "grad_norm": 2.2977495193481445, "learning_rate": 9.956948551633904e-06, "loss": 3.1784, "step": 212250 }, { "epoch": 0.20914937383258592, "grad_norm": 2.410491943359375, "learning_rate": 9.956928285407051e-06, "loss": 3.1973, "step": 212300 }, { "epoch": 0.20919863181040801, "grad_norm": 2.5964248180389404, "learning_rate": 9.956908014431849e-06, "loss": 3.0851, "step": 212350 }, { "epoch": 0.2092478897882301, "grad_norm": 2.3136489391326904, "learning_rate": 9.956887738708317e-06, "loss": 3.1582, "step": 212400 }, { "epoch": 0.20929714776605218, "grad_norm": 2.3279008865356445, "learning_rate": 9.956867458236474e-06, "loss": 3.1703, "step": 212450 }, { "epoch": 0.20934640574387428, "grad_norm": 2.316629409790039, "learning_rate": 9.95684717301634e-06, "loss": 3.1768, "step": 212500 }, { "epoch": 0.20939566372169638, "grad_norm": 2.3444039821624756, "learning_rate": 9.956826883047935e-06, "loss": 3.1223, "step": 212550 }, { "epoch": 0.20944492169951845, "grad_norm": 2.3323521614074707, "learning_rate": 9.956806588331278e-06, "loss": 3.1383, "step": 212600 }, { "epoch": 0.20949417967734055, "grad_norm": 2.2402830123901367, "learning_rate": 9.956786288866388e-06, "loss": 3.1904, "step": 212650 }, { "epoch": 0.20954343765516262, "grad_norm": 2.4704461097717285, "learning_rate": 9.956765984653286e-06, "loss": 3.0773, "step": 212700 }, { "epoch": 0.20959269563298472, "grad_norm": 2.4164011478424072, "learning_rate": 9.956745675691987e-06, "loss": 3.1093, "step": 212750 }, { "epoch": 0.2096419536108068, "grad_norm": 2.4975688457489014, "learning_rate": 9.956725361982517e-06, "loss": 3.1407, "step": 212800 }, { "epoch": 0.20969121158862888, "grad_norm": 2.3983428478240967, "learning_rate": 9.956705043524892e-06, "loss": 3.1702, "step": 212850 }, { "epoch": 0.20974046956645098, "grad_norm": 2.6149449348449707, "learning_rate": 9.956684720319133e-06, "loss": 3.0528, "step": 212900 }, { "epoch": 0.20978972754427308, "grad_norm": 2.452202081680298, "learning_rate": 9.956664392365255e-06, "loss": 3.1428, "step": 212950 }, { "epoch": 0.20983898552209515, "grad_norm": 2.463357448577881, "learning_rate": 9.956644059663282e-06, "loss": 3.1138, "step": 213000 }, { "epoch": 0.20988824349991725, "grad_norm": 2.39921498298645, "learning_rate": 9.956623722213232e-06, "loss": 3.1898, "step": 213050 }, { "epoch": 0.20993750147773935, "grad_norm": 2.4862189292907715, "learning_rate": 9.956603380015127e-06, "loss": 3.1352, "step": 213100 }, { "epoch": 0.20998675945556142, "grad_norm": 2.2902393341064453, "learning_rate": 9.95658303306898e-06, "loss": 3.1974, "step": 213150 }, { "epoch": 0.21003601743338352, "grad_norm": 2.321004629135132, "learning_rate": 9.956562681374817e-06, "loss": 3.1051, "step": 213200 }, { "epoch": 0.21008527541120559, "grad_norm": 2.657411813735962, "learning_rate": 9.956542324932654e-06, "loss": 3.0804, "step": 213250 }, { "epoch": 0.21013453338902768, "grad_norm": 2.3638172149658203, "learning_rate": 9.956521963742513e-06, "loss": 3.2135, "step": 213300 }, { "epoch": 0.21018379136684978, "grad_norm": 2.3436832427978516, "learning_rate": 9.95650159780441e-06, "loss": 3.1552, "step": 213350 }, { "epoch": 0.21023304934467185, "grad_norm": 2.4611406326293945, "learning_rate": 9.956481227118368e-06, "loss": 3.1274, "step": 213400 }, { "epoch": 0.21028230732249395, "grad_norm": 2.5019917488098145, "learning_rate": 9.956460851684406e-06, "loss": 3.1272, "step": 213450 }, { "epoch": 0.21033156530031605, "grad_norm": 2.1739938259124756, "learning_rate": 9.956440471502541e-06, "loss": 3.1333, "step": 213500 }, { "epoch": 0.21038082327813812, "grad_norm": 2.354860544204712, "learning_rate": 9.956420086572796e-06, "loss": 3.1623, "step": 213550 }, { "epoch": 0.21043008125596022, "grad_norm": 2.492450475692749, "learning_rate": 9.956399696895187e-06, "loss": 3.2089, "step": 213600 }, { "epoch": 0.21047933923378231, "grad_norm": 2.3187363147735596, "learning_rate": 9.956379302469735e-06, "loss": 3.1257, "step": 213650 }, { "epoch": 0.21052859721160438, "grad_norm": 2.3188271522521973, "learning_rate": 9.956358903296462e-06, "loss": 3.1792, "step": 213700 }, { "epoch": 0.21057785518942648, "grad_norm": 2.148580551147461, "learning_rate": 9.956338499375381e-06, "loss": 3.0928, "step": 213750 }, { "epoch": 0.21062711316724858, "grad_norm": 2.216648578643799, "learning_rate": 9.956318090706521e-06, "loss": 3.1286, "step": 213800 }, { "epoch": 0.21067637114507065, "grad_norm": 2.379702568054199, "learning_rate": 9.956297677289893e-06, "loss": 3.1446, "step": 213850 }, { "epoch": 0.21072562912289275, "grad_norm": 2.4406378269195557, "learning_rate": 9.95627725912552e-06, "loss": 3.1297, "step": 213900 }, { "epoch": 0.21077488710071482, "grad_norm": 2.1440553665161133, "learning_rate": 9.956256836213422e-06, "loss": 3.1833, "step": 213950 }, { "epoch": 0.21082414507853692, "grad_norm": 2.341656446456909, "learning_rate": 9.95623640855362e-06, "loss": 3.1513, "step": 214000 }, { "epoch": 0.21087340305635902, "grad_norm": 2.363743782043457, "learning_rate": 9.95621597614613e-06, "loss": 3.1907, "step": 214050 }, { "epoch": 0.21092266103418109, "grad_norm": 2.3875539302825928, "learning_rate": 9.956195538990972e-06, "loss": 3.1761, "step": 214100 }, { "epoch": 0.21097191901200318, "grad_norm": 2.2219841480255127, "learning_rate": 9.956175097088168e-06, "loss": 3.1153, "step": 214150 }, { "epoch": 0.21102117698982528, "grad_norm": 2.3501782417297363, "learning_rate": 9.956154650437737e-06, "loss": 3.179, "step": 214200 }, { "epoch": 0.21107043496764735, "grad_norm": 2.7596614360809326, "learning_rate": 9.956134199039697e-06, "loss": 3.1574, "step": 214250 }, { "epoch": 0.21111969294546945, "grad_norm": 2.302449941635132, "learning_rate": 9.95611374289407e-06, "loss": 3.1358, "step": 214300 }, { "epoch": 0.21116895092329155, "grad_norm": 2.3484699726104736, "learning_rate": 9.956093282000872e-06, "loss": 3.1819, "step": 214350 }, { "epoch": 0.21121820890111362, "grad_norm": 2.340998649597168, "learning_rate": 9.956072816360125e-06, "loss": 3.0957, "step": 214400 }, { "epoch": 0.21126746687893572, "grad_norm": 2.5022034645080566, "learning_rate": 9.956052345971849e-06, "loss": 3.0428, "step": 214450 }, { "epoch": 0.2113167248567578, "grad_norm": 2.374725103378296, "learning_rate": 9.956031870836065e-06, "loss": 3.1453, "step": 214500 }, { "epoch": 0.21136598283457989, "grad_norm": 2.2930655479431152, "learning_rate": 9.956011390952786e-06, "loss": 3.1399, "step": 214550 }, { "epoch": 0.21141524081240198, "grad_norm": 2.9443299770355225, "learning_rate": 9.95599090632204e-06, "loss": 3.1262, "step": 214600 }, { "epoch": 0.21146449879022405, "grad_norm": 2.274686813354492, "learning_rate": 9.95597041694384e-06, "loss": 3.1728, "step": 214650 }, { "epoch": 0.21151375676804615, "grad_norm": 2.355478048324585, "learning_rate": 9.955949922818211e-06, "loss": 3.1904, "step": 214700 }, { "epoch": 0.21156301474586825, "grad_norm": 2.6392462253570557, "learning_rate": 9.95592942394517e-06, "loss": 3.1428, "step": 214750 }, { "epoch": 0.21161227272369032, "grad_norm": 2.5454142093658447, "learning_rate": 9.955908920324735e-06, "loss": 3.0802, "step": 214800 }, { "epoch": 0.21166153070151242, "grad_norm": 2.505828857421875, "learning_rate": 9.955888411956928e-06, "loss": 3.1451, "step": 214850 }, { "epoch": 0.21171078867933452, "grad_norm": 2.3444368839263916, "learning_rate": 9.955867898841769e-06, "loss": 3.1234, "step": 214900 }, { "epoch": 0.2117600466571566, "grad_norm": 2.2742414474487305, "learning_rate": 9.955847380979276e-06, "loss": 3.1028, "step": 214950 }, { "epoch": 0.21180930463497868, "grad_norm": 2.391993761062622, "learning_rate": 9.955826858369468e-06, "loss": 3.1144, "step": 215000 }, { "epoch": 0.21185856261280078, "grad_norm": 2.29498553276062, "learning_rate": 9.955806331012366e-06, "loss": 3.1345, "step": 215050 }, { "epoch": 0.21190782059062285, "grad_norm": 2.5411782264709473, "learning_rate": 9.95578579890799e-06, "loss": 3.1415, "step": 215100 }, { "epoch": 0.21195707856844495, "grad_norm": 2.423549175262451, "learning_rate": 9.95576526205636e-06, "loss": 3.1123, "step": 215150 }, { "epoch": 0.21200633654626702, "grad_norm": 2.340252161026001, "learning_rate": 9.955744720457496e-06, "loss": 3.1728, "step": 215200 }, { "epoch": 0.21205559452408912, "grad_norm": 2.3448429107666016, "learning_rate": 9.955724174111415e-06, "loss": 3.1435, "step": 215250 }, { "epoch": 0.21210485250191122, "grad_norm": 2.2902934551239014, "learning_rate": 9.955703623018139e-06, "loss": 3.1518, "step": 215300 }, { "epoch": 0.2121541104797333, "grad_norm": 2.551823377609253, "learning_rate": 9.955683067177685e-06, "loss": 3.1701, "step": 215350 }, { "epoch": 0.21220336845755539, "grad_norm": 2.425600051879883, "learning_rate": 9.955662506590078e-06, "loss": 3.1578, "step": 215400 }, { "epoch": 0.21225262643537748, "grad_norm": 2.277099132537842, "learning_rate": 9.955641941255333e-06, "loss": 3.148, "step": 215450 }, { "epoch": 0.21230188441319955, "grad_norm": 2.4369056224823, "learning_rate": 9.955621371173469e-06, "loss": 3.1499, "step": 215500 }, { "epoch": 0.21235114239102165, "grad_norm": 2.231456756591797, "learning_rate": 9.95560079634451e-06, "loss": 3.2129, "step": 215550 }, { "epoch": 0.21240040036884375, "grad_norm": 2.5540106296539307, "learning_rate": 9.955580216768473e-06, "loss": 3.1958, "step": 215600 }, { "epoch": 0.21244965834666582, "grad_norm": 2.1821067333221436, "learning_rate": 9.955559632445378e-06, "loss": 3.1299, "step": 215650 }, { "epoch": 0.21249891632448792, "grad_norm": 2.3998944759368896, "learning_rate": 9.955539043375246e-06, "loss": 3.1231, "step": 215700 }, { "epoch": 0.21254817430231, "grad_norm": 2.3447794914245605, "learning_rate": 9.955518449558094e-06, "loss": 3.1306, "step": 215750 }, { "epoch": 0.2125974322801321, "grad_norm": 2.2837648391723633, "learning_rate": 9.955497850993944e-06, "loss": 3.1577, "step": 215800 }, { "epoch": 0.21264669025795419, "grad_norm": 2.363300323486328, "learning_rate": 9.955477247682817e-06, "loss": 3.1911, "step": 215850 }, { "epoch": 0.21269594823577626, "grad_norm": 2.305295944213867, "learning_rate": 9.955456639624727e-06, "loss": 3.1229, "step": 215900 }, { "epoch": 0.21274520621359835, "grad_norm": 2.3401949405670166, "learning_rate": 9.9554360268197e-06, "loss": 3.1725, "step": 215950 }, { "epoch": 0.21279446419142045, "grad_norm": 2.3277621269226074, "learning_rate": 9.955415409267754e-06, "loss": 3.1695, "step": 216000 }, { "epoch": 0.21284372216924252, "grad_norm": 2.3886282444000244, "learning_rate": 9.955394786968907e-06, "loss": 3.0797, "step": 216050 }, { "epoch": 0.21289298014706462, "grad_norm": 2.3659415245056152, "learning_rate": 9.95537415992318e-06, "loss": 3.1734, "step": 216100 }, { "epoch": 0.21294223812488672, "grad_norm": 2.3833136558532715, "learning_rate": 9.955353528130592e-06, "loss": 3.124, "step": 216150 }, { "epoch": 0.2129914961027088, "grad_norm": 2.4051971435546875, "learning_rate": 9.955332891591165e-06, "loss": 3.0935, "step": 216200 }, { "epoch": 0.2130407540805309, "grad_norm": 2.4027061462402344, "learning_rate": 9.955312250304916e-06, "loss": 3.1842, "step": 216250 }, { "epoch": 0.21309001205835298, "grad_norm": 2.247666835784912, "learning_rate": 9.955291604271866e-06, "loss": 3.0883, "step": 216300 }, { "epoch": 0.21313927003617505, "grad_norm": 2.373809814453125, "learning_rate": 9.955270953492033e-06, "loss": 3.124, "step": 216350 }, { "epoch": 0.21318852801399715, "grad_norm": 2.354877471923828, "learning_rate": 9.955250297965441e-06, "loss": 3.1447, "step": 216400 }, { "epoch": 0.21323778599181922, "grad_norm": 2.1966781616210938, "learning_rate": 9.955229637692107e-06, "loss": 3.139, "step": 216450 }, { "epoch": 0.21328704396964132, "grad_norm": 2.311089515686035, "learning_rate": 9.95520897267205e-06, "loss": 3.1044, "step": 216500 }, { "epoch": 0.21333630194746342, "grad_norm": 2.391268253326416, "learning_rate": 9.95518830290529e-06, "loss": 3.1744, "step": 216550 }, { "epoch": 0.2133855599252855, "grad_norm": 2.3440258502960205, "learning_rate": 9.95516762839185e-06, "loss": 3.1646, "step": 216600 }, { "epoch": 0.2134348179031076, "grad_norm": 2.2114803791046143, "learning_rate": 9.955146949131747e-06, "loss": 3.1433, "step": 216650 }, { "epoch": 0.21348407588092969, "grad_norm": 2.4254446029663086, "learning_rate": 9.955126265124999e-06, "loss": 3.1888, "step": 216700 }, { "epoch": 0.21353333385875176, "grad_norm": 2.3717868328094482, "learning_rate": 9.95510557637163e-06, "loss": 3.1973, "step": 216750 }, { "epoch": 0.21358259183657385, "grad_norm": 2.3486387729644775, "learning_rate": 9.955084882871657e-06, "loss": 3.1151, "step": 216800 }, { "epoch": 0.21363184981439595, "grad_norm": 2.321690559387207, "learning_rate": 9.955064184625102e-06, "loss": 3.1479, "step": 216850 }, { "epoch": 0.21368110779221802, "grad_norm": 2.254225015640259, "learning_rate": 9.955043481631981e-06, "loss": 3.1283, "step": 216900 }, { "epoch": 0.21373036577004012, "grad_norm": 2.2769339084625244, "learning_rate": 9.955022773892319e-06, "loss": 3.1623, "step": 216950 }, { "epoch": 0.2137796237478622, "grad_norm": 2.4090428352355957, "learning_rate": 9.955002061406132e-06, "loss": 3.1394, "step": 217000 }, { "epoch": 0.2138288817256843, "grad_norm": 2.3981375694274902, "learning_rate": 9.954981344173442e-06, "loss": 3.1286, "step": 217050 }, { "epoch": 0.2138781397035064, "grad_norm": 2.3273844718933105, "learning_rate": 9.954960622194268e-06, "loss": 3.1975, "step": 217100 }, { "epoch": 0.21392739768132846, "grad_norm": 2.683159351348877, "learning_rate": 9.954939895468627e-06, "loss": 3.1231, "step": 217150 }, { "epoch": 0.21397665565915056, "grad_norm": 2.527832508087158, "learning_rate": 9.954919163996543e-06, "loss": 3.1402, "step": 217200 }, { "epoch": 0.21402591363697265, "grad_norm": 2.373608112335205, "learning_rate": 9.954898427778035e-06, "loss": 3.1545, "step": 217250 }, { "epoch": 0.21407517161479472, "grad_norm": 2.315764904022217, "learning_rate": 9.954877686813122e-06, "loss": 3.1775, "step": 217300 }, { "epoch": 0.21412442959261682, "grad_norm": 2.359273910522461, "learning_rate": 9.954856941101824e-06, "loss": 3.1475, "step": 217350 }, { "epoch": 0.21417368757043892, "grad_norm": 2.2855985164642334, "learning_rate": 9.954836190644162e-06, "loss": 3.2016, "step": 217400 }, { "epoch": 0.214222945548261, "grad_norm": 2.2812869548797607, "learning_rate": 9.954815435440156e-06, "loss": 3.1394, "step": 217450 }, { "epoch": 0.2142722035260831, "grad_norm": 2.519676685333252, "learning_rate": 9.954794675489822e-06, "loss": 3.1994, "step": 217500 }, { "epoch": 0.2143214615039052, "grad_norm": 2.229118585586548, "learning_rate": 9.954773910793184e-06, "loss": 3.1741, "step": 217550 }, { "epoch": 0.21437071948172726, "grad_norm": 2.3279454708099365, "learning_rate": 9.95475314135026e-06, "loss": 3.1672, "step": 217600 }, { "epoch": 0.21441997745954935, "grad_norm": 2.404555559158325, "learning_rate": 9.95473236716107e-06, "loss": 3.0889, "step": 217650 }, { "epoch": 0.21446923543737142, "grad_norm": 2.3408584594726562, "learning_rate": 9.954711588225637e-06, "loss": 3.1585, "step": 217700 }, { "epoch": 0.21451849341519352, "grad_norm": 2.3164286613464355, "learning_rate": 9.954690804543976e-06, "loss": 3.1577, "step": 217750 }, { "epoch": 0.21456775139301562, "grad_norm": 2.459487199783325, "learning_rate": 9.95467001611611e-06, "loss": 3.1648, "step": 217800 }, { "epoch": 0.2146170093708377, "grad_norm": 2.415505886077881, "learning_rate": 9.954649222942057e-06, "loss": 3.0993, "step": 217850 }, { "epoch": 0.2146662673486598, "grad_norm": 2.3815271854400635, "learning_rate": 9.954628425021839e-06, "loss": 3.1695, "step": 217900 }, { "epoch": 0.2147155253264819, "grad_norm": 2.5812854766845703, "learning_rate": 9.954607622355476e-06, "loss": 3.1023, "step": 217950 }, { "epoch": 0.21476478330430396, "grad_norm": 2.1780426502227783, "learning_rate": 9.954586814942985e-06, "loss": 3.0459, "step": 218000 }, { "epoch": 0.21481404128212606, "grad_norm": 2.362535238265991, "learning_rate": 9.954566002784389e-06, "loss": 3.1574, "step": 218050 }, { "epoch": 0.21486329925994815, "grad_norm": 2.682753562927246, "learning_rate": 9.954545185879706e-06, "loss": 3.1285, "step": 218100 }, { "epoch": 0.21491255723777022, "grad_norm": 2.333970069885254, "learning_rate": 9.954524364228956e-06, "loss": 3.2008, "step": 218150 }, { "epoch": 0.21496181521559232, "grad_norm": 2.6193277835845947, "learning_rate": 9.954503537832161e-06, "loss": 3.1268, "step": 218200 }, { "epoch": 0.2150110731934144, "grad_norm": 2.4867894649505615, "learning_rate": 9.954482706689339e-06, "loss": 3.1436, "step": 218250 }, { "epoch": 0.2150603311712365, "grad_norm": 2.3212709426879883, "learning_rate": 9.954461870800511e-06, "loss": 3.1234, "step": 218300 }, { "epoch": 0.2151095891490586, "grad_norm": 2.4020416736602783, "learning_rate": 9.954441030165696e-06, "loss": 3.1108, "step": 218350 }, { "epoch": 0.21515884712688066, "grad_norm": 2.3771960735321045, "learning_rate": 9.954420184784914e-06, "loss": 3.1683, "step": 218400 }, { "epoch": 0.21520810510470276, "grad_norm": 2.5260322093963623, "learning_rate": 9.954399334658187e-06, "loss": 3.1934, "step": 218450 }, { "epoch": 0.21525736308252486, "grad_norm": 2.575911045074463, "learning_rate": 9.954378479785532e-06, "loss": 3.0993, "step": 218500 }, { "epoch": 0.21530662106034693, "grad_norm": 2.280452013015747, "learning_rate": 9.954357620166971e-06, "loss": 3.1692, "step": 218550 }, { "epoch": 0.21535587903816902, "grad_norm": 2.222432851791382, "learning_rate": 9.954336755802522e-06, "loss": 3.1447, "step": 218600 }, { "epoch": 0.21540513701599112, "grad_norm": 2.273552656173706, "learning_rate": 9.954315886692209e-06, "loss": 3.104, "step": 218650 }, { "epoch": 0.2154543949938132, "grad_norm": 2.3567557334899902, "learning_rate": 9.954295012836048e-06, "loss": 3.1258, "step": 218700 }, { "epoch": 0.2155036529716353, "grad_norm": 2.269564151763916, "learning_rate": 9.954274134234059e-06, "loss": 3.1448, "step": 218750 }, { "epoch": 0.21555291094945736, "grad_norm": 2.487272262573242, "learning_rate": 9.954253250886265e-06, "loss": 3.1848, "step": 218800 }, { "epoch": 0.21560216892727946, "grad_norm": 2.23667049407959, "learning_rate": 9.954232362792685e-06, "loss": 3.1018, "step": 218850 }, { "epoch": 0.21565142690510156, "grad_norm": 2.143541097640991, "learning_rate": 9.954211469953336e-06, "loss": 3.1883, "step": 218900 }, { "epoch": 0.21570068488292363, "grad_norm": 2.367692232131958, "learning_rate": 9.954190572368243e-06, "loss": 3.1031, "step": 218950 }, { "epoch": 0.21574994286074572, "grad_norm": 2.2872705459594727, "learning_rate": 9.954169670037421e-06, "loss": 3.1586, "step": 219000 }, { "epoch": 0.21579920083856782, "grad_norm": 2.273003101348877, "learning_rate": 9.954148762960893e-06, "loss": 3.1623, "step": 219050 }, { "epoch": 0.2158484588163899, "grad_norm": 2.415879249572754, "learning_rate": 9.95412785113868e-06, "loss": 3.1327, "step": 219100 }, { "epoch": 0.215897716794212, "grad_norm": 2.309433698654175, "learning_rate": 9.954106934570799e-06, "loss": 3.1841, "step": 219150 }, { "epoch": 0.2159469747720341, "grad_norm": 2.239039421081543, "learning_rate": 9.954086013257272e-06, "loss": 3.182, "step": 219200 }, { "epoch": 0.21599623274985616, "grad_norm": 2.336247444152832, "learning_rate": 9.954065087198118e-06, "loss": 3.0992, "step": 219250 }, { "epoch": 0.21604549072767826, "grad_norm": 2.232131242752075, "learning_rate": 9.954044156393358e-06, "loss": 3.1573, "step": 219300 }, { "epoch": 0.21609474870550036, "grad_norm": 2.3979811668395996, "learning_rate": 9.95402322084301e-06, "loss": 3.1947, "step": 219350 }, { "epoch": 0.21614400668332243, "grad_norm": 2.395092248916626, "learning_rate": 9.954002280547097e-06, "loss": 3.2343, "step": 219400 }, { "epoch": 0.21619326466114452, "grad_norm": 2.3550307750701904, "learning_rate": 9.953981335505638e-06, "loss": 3.1316, "step": 219450 }, { "epoch": 0.2162425226389666, "grad_norm": 2.328139305114746, "learning_rate": 9.953960385718654e-06, "loss": 3.1179, "step": 219500 }, { "epoch": 0.2162917806167887, "grad_norm": 2.4443392753601074, "learning_rate": 9.953939431186162e-06, "loss": 3.1583, "step": 219550 }, { "epoch": 0.2163410385946108, "grad_norm": 2.2851271629333496, "learning_rate": 9.953918471908185e-06, "loss": 3.1012, "step": 219600 }, { "epoch": 0.21639029657243286, "grad_norm": 2.4817416667938232, "learning_rate": 9.953897507884743e-06, "loss": 3.113, "step": 219650 }, { "epoch": 0.21643955455025496, "grad_norm": 2.2895359992980957, "learning_rate": 9.953876539115851e-06, "loss": 3.1865, "step": 219700 }, { "epoch": 0.21648881252807706, "grad_norm": 2.3296163082122803, "learning_rate": 9.953855565601538e-06, "loss": 3.0697, "step": 219750 }, { "epoch": 0.21653807050589913, "grad_norm": 2.4292235374450684, "learning_rate": 9.953834587341816e-06, "loss": 3.0956, "step": 219800 }, { "epoch": 0.21658732848372123, "grad_norm": 2.1216650009155273, "learning_rate": 9.95381360433671e-06, "loss": 3.1872, "step": 219850 }, { "epoch": 0.21663658646154332, "grad_norm": 2.415778160095215, "learning_rate": 9.953792616586237e-06, "loss": 3.1604, "step": 219900 }, { "epoch": 0.2166858444393654, "grad_norm": 2.371203899383545, "learning_rate": 9.95377162409042e-06, "loss": 3.15, "step": 219950 }, { "epoch": 0.2167351024171875, "grad_norm": 2.323850631713867, "learning_rate": 9.953750626849278e-06, "loss": 3.1505, "step": 220000 }, { "epoch": 0.21678436039500956, "grad_norm": 2.3198366165161133, "learning_rate": 9.953729624862831e-06, "loss": 3.0807, "step": 220050 }, { "epoch": 0.21683361837283166, "grad_norm": 2.310755729675293, "learning_rate": 9.953708618131097e-06, "loss": 3.1506, "step": 220100 }, { "epoch": 0.21688287635065376, "grad_norm": 2.394498825073242, "learning_rate": 9.9536876066541e-06, "loss": 3.1061, "step": 220150 }, { "epoch": 0.21693213432847583, "grad_norm": 2.35528302192688, "learning_rate": 9.953666590431856e-06, "loss": 3.1864, "step": 220200 }, { "epoch": 0.21698139230629793, "grad_norm": 2.4034018516540527, "learning_rate": 9.95364556946439e-06, "loss": 3.1117, "step": 220250 }, { "epoch": 0.21703065028412002, "grad_norm": 2.4296813011169434, "learning_rate": 9.953624543751718e-06, "loss": 3.1317, "step": 220300 }, { "epoch": 0.2170799082619421, "grad_norm": 2.4014899730682373, "learning_rate": 9.95360351329386e-06, "loss": 3.113, "step": 220350 }, { "epoch": 0.2171291662397642, "grad_norm": 2.2216415405273438, "learning_rate": 9.95358247809084e-06, "loss": 3.0968, "step": 220400 }, { "epoch": 0.2171784242175863, "grad_norm": 2.3313658237457275, "learning_rate": 9.953561438142677e-06, "loss": 3.2323, "step": 220450 }, { "epoch": 0.21722768219540836, "grad_norm": 2.508573055267334, "learning_rate": 9.953540393449387e-06, "loss": 3.1242, "step": 220500 }, { "epoch": 0.21727694017323046, "grad_norm": 3.3420238494873047, "learning_rate": 9.953519344010994e-06, "loss": 3.1206, "step": 220550 }, { "epoch": 0.21732619815105256, "grad_norm": 2.354785680770874, "learning_rate": 9.953498289827519e-06, "loss": 3.165, "step": 220600 }, { "epoch": 0.21737545612887463, "grad_norm": 2.5638415813446045, "learning_rate": 9.953477230898978e-06, "loss": 3.1726, "step": 220650 }, { "epoch": 0.21742471410669673, "grad_norm": 2.3629157543182373, "learning_rate": 9.953456167225397e-06, "loss": 3.1529, "step": 220700 }, { "epoch": 0.2174739720845188, "grad_norm": 2.4882609844207764, "learning_rate": 9.953435098806789e-06, "loss": 3.1201, "step": 220750 }, { "epoch": 0.2175232300623409, "grad_norm": 2.2484898567199707, "learning_rate": 9.95341402564318e-06, "loss": 3.0933, "step": 220800 }, { "epoch": 0.217572488040163, "grad_norm": 2.5583443641662598, "learning_rate": 9.953392947734589e-06, "loss": 3.1514, "step": 220850 }, { "epoch": 0.21762174601798506, "grad_norm": 2.3598177433013916, "learning_rate": 9.953371865081035e-06, "loss": 3.1507, "step": 220900 }, { "epoch": 0.21767100399580716, "grad_norm": 2.334414005279541, "learning_rate": 9.953350777682538e-06, "loss": 3.1289, "step": 220950 }, { "epoch": 0.21772026197362926, "grad_norm": 2.203545093536377, "learning_rate": 9.953329685539119e-06, "loss": 3.1154, "step": 221000 }, { "epoch": 0.21776951995145133, "grad_norm": 2.596688747406006, "learning_rate": 9.953308588650799e-06, "loss": 3.1456, "step": 221050 }, { "epoch": 0.21781877792927343, "grad_norm": 2.4641153812408447, "learning_rate": 9.953287487017596e-06, "loss": 3.1684, "step": 221100 }, { "epoch": 0.21786803590709553, "grad_norm": 2.335426092147827, "learning_rate": 9.953266380639532e-06, "loss": 3.1684, "step": 221150 }, { "epoch": 0.2179172938849176, "grad_norm": 2.3259992599487305, "learning_rate": 9.953245269516627e-06, "loss": 3.2126, "step": 221200 }, { "epoch": 0.2179665518627397, "grad_norm": 2.4024343490600586, "learning_rate": 9.9532241536489e-06, "loss": 3.1573, "step": 221250 }, { "epoch": 0.21801580984056176, "grad_norm": 2.225503921508789, "learning_rate": 9.953203033036373e-06, "loss": 3.1949, "step": 221300 }, { "epoch": 0.21806506781838386, "grad_norm": 2.3264174461364746, "learning_rate": 9.953181907679066e-06, "loss": 3.1425, "step": 221350 }, { "epoch": 0.21811432579620596, "grad_norm": 2.407752752304077, "learning_rate": 9.953160777576996e-06, "loss": 3.0791, "step": 221400 }, { "epoch": 0.21816358377402803, "grad_norm": 2.1915769577026367, "learning_rate": 9.953139642730189e-06, "loss": 3.0761, "step": 221450 }, { "epoch": 0.21821284175185013, "grad_norm": 2.535818338394165, "learning_rate": 9.95311850313866e-06, "loss": 3.2509, "step": 221500 }, { "epoch": 0.21826209972967223, "grad_norm": 2.2756717205047607, "learning_rate": 9.953097358802433e-06, "loss": 3.106, "step": 221550 }, { "epoch": 0.2183113577074943, "grad_norm": 2.342862606048584, "learning_rate": 9.953076209721525e-06, "loss": 3.1443, "step": 221600 }, { "epoch": 0.2183606156853164, "grad_norm": 2.4161016941070557, "learning_rate": 9.953055055895956e-06, "loss": 3.2093, "step": 221650 }, { "epoch": 0.2184098736631385, "grad_norm": 2.646806478500366, "learning_rate": 9.953033897325752e-06, "loss": 3.163, "step": 221700 }, { "epoch": 0.21845913164096056, "grad_norm": 2.5922157764434814, "learning_rate": 9.953012734010928e-06, "loss": 3.1306, "step": 221750 }, { "epoch": 0.21850838961878266, "grad_norm": 2.2392547130584717, "learning_rate": 9.952991565951506e-06, "loss": 3.1159, "step": 221800 }, { "epoch": 0.21855764759660476, "grad_norm": 2.319308280944824, "learning_rate": 9.952970393147506e-06, "loss": 3.1296, "step": 221850 }, { "epoch": 0.21860690557442683, "grad_norm": 2.368163585662842, "learning_rate": 9.952949215598947e-06, "loss": 3.1177, "step": 221900 }, { "epoch": 0.21865616355224893, "grad_norm": 2.1927247047424316, "learning_rate": 9.952928033305851e-06, "loss": 3.2009, "step": 221950 }, { "epoch": 0.218705421530071, "grad_norm": 2.3409321308135986, "learning_rate": 9.952906846268238e-06, "loss": 3.0918, "step": 222000 }, { "epoch": 0.2187546795078931, "grad_norm": 2.5363574028015137, "learning_rate": 9.952885654486127e-06, "loss": 3.1061, "step": 222050 }, { "epoch": 0.2188039374857152, "grad_norm": 2.292142152786255, "learning_rate": 9.952864457959541e-06, "loss": 3.0607, "step": 222100 }, { "epoch": 0.21885319546353726, "grad_norm": Infinity, "learning_rate": 9.952843256688498e-06, "loss": 3.1004, "step": 222150 }, { "epoch": 0.21890245344135936, "grad_norm": 2.2631335258483887, "learning_rate": 9.952822050673018e-06, "loss": 3.2112, "step": 222200 }, { "epoch": 0.21895171141918146, "grad_norm": 2.38161301612854, "learning_rate": 9.952800839913124e-06, "loss": 3.1215, "step": 222250 }, { "epoch": 0.21900096939700353, "grad_norm": 2.3659276962280273, "learning_rate": 9.952779624408834e-06, "loss": 3.1205, "step": 222300 }, { "epoch": 0.21905022737482563, "grad_norm": 2.298375129699707, "learning_rate": 9.95275840416017e-06, "loss": 3.1116, "step": 222350 }, { "epoch": 0.21909948535264773, "grad_norm": 2.338192939758301, "learning_rate": 9.952737179167148e-06, "loss": 3.1552, "step": 222400 }, { "epoch": 0.2191487433304698, "grad_norm": 2.4458112716674805, "learning_rate": 9.952715949429796e-06, "loss": 3.0976, "step": 222450 }, { "epoch": 0.2191980013082919, "grad_norm": 2.276179075241089, "learning_rate": 9.952694714948128e-06, "loss": 3.1573, "step": 222500 }, { "epoch": 0.21924725928611397, "grad_norm": 2.357313871383667, "learning_rate": 9.952673475722165e-06, "loss": 3.1271, "step": 222550 }, { "epoch": 0.21929651726393606, "grad_norm": 2.2970969676971436, "learning_rate": 9.95265223175193e-06, "loss": 3.1294, "step": 222600 }, { "epoch": 0.21934577524175816, "grad_norm": 2.5114800930023193, "learning_rate": 9.952630983037442e-06, "loss": 3.1834, "step": 222650 }, { "epoch": 0.21939503321958023, "grad_norm": 2.2983362674713135, "learning_rate": 9.952609729578722e-06, "loss": 3.1314, "step": 222700 }, { "epoch": 0.21944429119740233, "grad_norm": 2.9122931957244873, "learning_rate": 9.952588471375787e-06, "loss": 3.1006, "step": 222750 }, { "epoch": 0.21949354917522443, "grad_norm": 2.6576688289642334, "learning_rate": 9.952567208428662e-06, "loss": 3.1942, "step": 222800 }, { "epoch": 0.2195428071530465, "grad_norm": 2.4808766841888428, "learning_rate": 9.952545940737366e-06, "loss": 3.083, "step": 222850 }, { "epoch": 0.2195920651308686, "grad_norm": 2.299445867538452, "learning_rate": 9.952524668301918e-06, "loss": 3.0873, "step": 222900 }, { "epoch": 0.2196413231086907, "grad_norm": 2.335651397705078, "learning_rate": 9.952503391122339e-06, "loss": 3.1505, "step": 222950 }, { "epoch": 0.21969058108651277, "grad_norm": 2.324120044708252, "learning_rate": 9.952482109198651e-06, "loss": 3.1599, "step": 223000 }, { "epoch": 0.21973983906433486, "grad_norm": 2.2573792934417725, "learning_rate": 9.952460822530872e-06, "loss": 3.0968, "step": 223050 }, { "epoch": 0.21978909704215696, "grad_norm": 2.2530925273895264, "learning_rate": 9.952439531119023e-06, "loss": 3.0953, "step": 223100 }, { "epoch": 0.21983835501997903, "grad_norm": 2.6181163787841797, "learning_rate": 9.952418234963124e-06, "loss": 3.0904, "step": 223150 }, { "epoch": 0.21988761299780113, "grad_norm": 2.2858657836914062, "learning_rate": 9.952396934063198e-06, "loss": 3.0963, "step": 223200 }, { "epoch": 0.2199368709756232, "grad_norm": 2.3564040660858154, "learning_rate": 9.952375628419263e-06, "loss": 3.1173, "step": 223250 }, { "epoch": 0.2199861289534453, "grad_norm": 2.4005532264709473, "learning_rate": 9.952354318031338e-06, "loss": 3.0248, "step": 223300 }, { "epoch": 0.2200353869312674, "grad_norm": 2.2955896854400635, "learning_rate": 9.952333002899446e-06, "loss": 3.0786, "step": 223350 }, { "epoch": 0.22008464490908947, "grad_norm": 2.2909789085388184, "learning_rate": 9.952311683023609e-06, "loss": 3.1705, "step": 223400 }, { "epoch": 0.22013390288691156, "grad_norm": 2.2800896167755127, "learning_rate": 9.952290358403844e-06, "loss": 3.0696, "step": 223450 }, { "epoch": 0.22018316086473366, "grad_norm": 2.145925521850586, "learning_rate": 9.952269029040172e-06, "loss": 3.0791, "step": 223500 }, { "epoch": 0.22023241884255573, "grad_norm": 2.243391513824463, "learning_rate": 9.952247694932614e-06, "loss": 3.1067, "step": 223550 }, { "epoch": 0.22028167682037783, "grad_norm": 2.2909889221191406, "learning_rate": 9.952226356081191e-06, "loss": 3.136, "step": 223600 }, { "epoch": 0.22033093479819993, "grad_norm": 2.319020986557007, "learning_rate": 9.952205012485924e-06, "loss": 3.0825, "step": 223650 }, { "epoch": 0.220380192776022, "grad_norm": 2.3813540935516357, "learning_rate": 9.95218366414683e-06, "loss": 3.1717, "step": 223700 }, { "epoch": 0.2204294507538441, "grad_norm": 2.2313458919525146, "learning_rate": 9.952162311063934e-06, "loss": 3.1216, "step": 223750 }, { "epoch": 0.22047870873166617, "grad_norm": 2.2755343914031982, "learning_rate": 9.952140953237254e-06, "loss": 3.1561, "step": 223800 }, { "epoch": 0.22052796670948827, "grad_norm": 2.4507675170898438, "learning_rate": 9.95211959066681e-06, "loss": 3.1193, "step": 223850 }, { "epoch": 0.22057722468731036, "grad_norm": 2.338200330734253, "learning_rate": 9.952098223352624e-06, "loss": 3.119, "step": 223900 }, { "epoch": 0.22062648266513243, "grad_norm": 2.416537284851074, "learning_rate": 9.952076851294714e-06, "loss": 3.1345, "step": 223950 }, { "epoch": 0.22067574064295453, "grad_norm": 2.496049404144287, "learning_rate": 9.952055474493104e-06, "loss": 3.1462, "step": 224000 }, { "epoch": 0.22072499862077663, "grad_norm": 2.4390504360198975, "learning_rate": 9.952034092947812e-06, "loss": 3.1478, "step": 224050 }, { "epoch": 0.2207742565985987, "grad_norm": 2.342205286026001, "learning_rate": 9.95201270665886e-06, "loss": 3.1594, "step": 224100 }, { "epoch": 0.2208235145764208, "grad_norm": 2.2451493740081787, "learning_rate": 9.951991315626266e-06, "loss": 3.1198, "step": 224150 }, { "epoch": 0.2208727725542429, "grad_norm": Infinity, "learning_rate": 9.951969919850053e-06, "loss": 3.1493, "step": 224200 }, { "epoch": 0.22092203053206497, "grad_norm": 2.3551137447357178, "learning_rate": 9.95194851933024e-06, "loss": 3.1719, "step": 224250 }, { "epoch": 0.22097128850988706, "grad_norm": 2.408876657485962, "learning_rate": 9.951927114066848e-06, "loss": 3.1749, "step": 224300 }, { "epoch": 0.22102054648770916, "grad_norm": 2.739084482192993, "learning_rate": 9.951905704059899e-06, "loss": 3.1445, "step": 224350 }, { "epoch": 0.22106980446553123, "grad_norm": 2.425157308578491, "learning_rate": 9.95188428930941e-06, "loss": 3.1426, "step": 224400 }, { "epoch": 0.22111906244335333, "grad_norm": 2.4225034713745117, "learning_rate": 9.951862869815406e-06, "loss": 3.19, "step": 224450 }, { "epoch": 0.2211683204211754, "grad_norm": 2.413776397705078, "learning_rate": 9.951841445577903e-06, "loss": 3.0905, "step": 224500 }, { "epoch": 0.2212175783989975, "grad_norm": 2.4735167026519775, "learning_rate": 9.951820016596924e-06, "loss": 3.1404, "step": 224550 }, { "epoch": 0.2212668363768196, "grad_norm": 4.925982475280762, "learning_rate": 9.95179858287249e-06, "loss": 3.1675, "step": 224600 }, { "epoch": 0.22131609435464167, "grad_norm": 2.3830573558807373, "learning_rate": 9.951777144404622e-06, "loss": 3.1558, "step": 224650 }, { "epoch": 0.22136535233246377, "grad_norm": 2.386718988418579, "learning_rate": 9.951755701193337e-06, "loss": 3.1669, "step": 224700 }, { "epoch": 0.22141461031028586, "grad_norm": 2.4135544300079346, "learning_rate": 9.951734253238658e-06, "loss": 3.0971, "step": 224750 }, { "epoch": 0.22146386828810793, "grad_norm": 2.173279047012329, "learning_rate": 9.951712800540606e-06, "loss": 3.1893, "step": 224800 }, { "epoch": 0.22151312626593003, "grad_norm": 2.1849546432495117, "learning_rate": 9.951691343099202e-06, "loss": 3.1764, "step": 224850 }, { "epoch": 0.22156238424375213, "grad_norm": 2.4604570865631104, "learning_rate": 9.951669880914463e-06, "loss": 3.1355, "step": 224900 }, { "epoch": 0.2216116422215742, "grad_norm": 2.727764129638672, "learning_rate": 9.951648413986414e-06, "loss": 3.1551, "step": 224950 }, { "epoch": 0.2216609001993963, "grad_norm": 2.3390331268310547, "learning_rate": 9.951626942315071e-06, "loss": 3.0982, "step": 225000 }, { "epoch": 0.22171015817721837, "grad_norm": 2.509833335876465, "learning_rate": 9.95160546590046e-06, "loss": 3.1597, "step": 225050 }, { "epoch": 0.22175941615504047, "grad_norm": 2.329866647720337, "learning_rate": 9.951583984742598e-06, "loss": 3.1881, "step": 225100 }, { "epoch": 0.22180867413286257, "grad_norm": 2.327477216720581, "learning_rate": 9.951562498841505e-06, "loss": 3.1524, "step": 225150 }, { "epoch": 0.22185793211068464, "grad_norm": 2.3664917945861816, "learning_rate": 9.951541008197204e-06, "loss": 3.128, "step": 225200 }, { "epoch": 0.22190719008850673, "grad_norm": 2.2758588790893555, "learning_rate": 9.951519512809714e-06, "loss": 3.0813, "step": 225250 }, { "epoch": 0.22195644806632883, "grad_norm": 2.476743221282959, "learning_rate": 9.951498012679057e-06, "loss": 3.1529, "step": 225300 }, { "epoch": 0.2220057060441509, "grad_norm": 2.308541774749756, "learning_rate": 9.951476507805251e-06, "loss": 3.0256, "step": 225350 }, { "epoch": 0.222054964021973, "grad_norm": 2.2983767986297607, "learning_rate": 9.951454998188318e-06, "loss": 3.1596, "step": 225400 }, { "epoch": 0.2221042219997951, "grad_norm": 2.537550687789917, "learning_rate": 9.951433483828282e-06, "loss": 3.1456, "step": 225450 }, { "epoch": 0.22215347997761717, "grad_norm": 2.4512693881988525, "learning_rate": 9.951411964725158e-06, "loss": 3.0909, "step": 225500 }, { "epoch": 0.22220273795543927, "grad_norm": 2.366459846496582, "learning_rate": 9.95139044087897e-06, "loss": 3.1937, "step": 225550 }, { "epoch": 0.22225199593326136, "grad_norm": 2.3706185817718506, "learning_rate": 9.951368912289736e-06, "loss": 3.1219, "step": 225600 }, { "epoch": 0.22230125391108344, "grad_norm": 2.306251287460327, "learning_rate": 9.95134737895748e-06, "loss": 3.0447, "step": 225650 }, { "epoch": 0.22235051188890553, "grad_norm": 2.2847890853881836, "learning_rate": 9.95132584088222e-06, "loss": 3.1282, "step": 225700 }, { "epoch": 0.2223997698667276, "grad_norm": 2.4151692390441895, "learning_rate": 9.951304298063979e-06, "loss": 3.1488, "step": 225750 }, { "epoch": 0.2224490278445497, "grad_norm": 2.6397173404693604, "learning_rate": 9.951282750502774e-06, "loss": 3.1177, "step": 225800 }, { "epoch": 0.2224982858223718, "grad_norm": 2.3319664001464844, "learning_rate": 9.95126119819863e-06, "loss": 3.0724, "step": 225850 }, { "epoch": 0.22254754380019387, "grad_norm": 2.1995129585266113, "learning_rate": 9.951239641151564e-06, "loss": 3.0403, "step": 225900 }, { "epoch": 0.22259680177801597, "grad_norm": 2.340365171432495, "learning_rate": 9.951218079361598e-06, "loss": 3.0753, "step": 225950 }, { "epoch": 0.22264605975583807, "grad_norm": 2.412123918533325, "learning_rate": 9.951196512828754e-06, "loss": 3.1151, "step": 226000 }, { "epoch": 0.22269531773366014, "grad_norm": 2.294193744659424, "learning_rate": 9.95117494155305e-06, "loss": 3.1075, "step": 226050 }, { "epoch": 0.22274457571148223, "grad_norm": 2.4633636474609375, "learning_rate": 9.951153365534509e-06, "loss": 3.1404, "step": 226100 }, { "epoch": 0.22279383368930433, "grad_norm": 2.410461902618408, "learning_rate": 9.951131784773151e-06, "loss": 3.1913, "step": 226150 }, { "epoch": 0.2228430916671264, "grad_norm": 2.4326319694519043, "learning_rate": 9.951110199268996e-06, "loss": 3.1108, "step": 226200 }, { "epoch": 0.2228923496449485, "grad_norm": 2.282093048095703, "learning_rate": 9.951088609022065e-06, "loss": 3.2022, "step": 226250 }, { "epoch": 0.22294160762277057, "grad_norm": 2.2904813289642334, "learning_rate": 9.95106701403238e-06, "loss": 3.1781, "step": 226300 }, { "epoch": 0.22299086560059267, "grad_norm": 2.441413164138794, "learning_rate": 9.95104541429996e-06, "loss": 3.0676, "step": 226350 }, { "epoch": 0.22304012357841477, "grad_norm": 2.216420888900757, "learning_rate": 9.951023809824826e-06, "loss": 3.1796, "step": 226400 }, { "epoch": 0.22308938155623684, "grad_norm": 2.2211849689483643, "learning_rate": 9.951002200606998e-06, "loss": 3.1065, "step": 226450 }, { "epoch": 0.22313863953405894, "grad_norm": 2.300203323364258, "learning_rate": 9.950980586646499e-06, "loss": 3.1481, "step": 226500 }, { "epoch": 0.22318789751188103, "grad_norm": 2.251762628555298, "learning_rate": 9.950958967943347e-06, "loss": 3.1811, "step": 226550 }, { "epoch": 0.2232371554897031, "grad_norm": 2.272516965866089, "learning_rate": 9.950937344497566e-06, "loss": 3.1904, "step": 226600 }, { "epoch": 0.2232864134675252, "grad_norm": 2.5223546028137207, "learning_rate": 9.950915716309173e-06, "loss": 3.0872, "step": 226650 }, { "epoch": 0.2233356714453473, "grad_norm": 2.9933032989501953, "learning_rate": 9.950894083378191e-06, "loss": 3.1001, "step": 226700 }, { "epoch": 0.22338492942316937, "grad_norm": 2.349722146987915, "learning_rate": 9.95087244570464e-06, "loss": 3.0759, "step": 226750 }, { "epoch": 0.22343418740099147, "grad_norm": 2.527432680130005, "learning_rate": 9.95085080328854e-06, "loss": 3.0944, "step": 226800 }, { "epoch": 0.22348344537881354, "grad_norm": 2.321079969406128, "learning_rate": 9.950829156129913e-06, "loss": 3.1357, "step": 226850 }, { "epoch": 0.22353270335663564, "grad_norm": 2.3322012424468994, "learning_rate": 9.950807504228782e-06, "loss": 3.0767, "step": 226900 }, { "epoch": 0.22358196133445774, "grad_norm": 2.490889549255371, "learning_rate": 9.950785847585162e-06, "loss": 3.1922, "step": 226950 }, { "epoch": 0.2236312193122798, "grad_norm": 2.262770414352417, "learning_rate": 9.950764186199078e-06, "loss": 3.0801, "step": 227000 }, { "epoch": 0.2236804772901019, "grad_norm": 2.309741735458374, "learning_rate": 9.950742520070548e-06, "loss": 3.0282, "step": 227050 }, { "epoch": 0.223729735267924, "grad_norm": 2.244929075241089, "learning_rate": 9.950720849199596e-06, "loss": 3.1467, "step": 227100 }, { "epoch": 0.22377899324574607, "grad_norm": 2.2359673976898193, "learning_rate": 9.95069917358624e-06, "loss": 3.1852, "step": 227150 }, { "epoch": 0.22382825122356817, "grad_norm": 2.1796724796295166, "learning_rate": 9.950677493230502e-06, "loss": 3.1591, "step": 227200 }, { "epoch": 0.22387750920139027, "grad_norm": 2.3265440464019775, "learning_rate": 9.950655808132405e-06, "loss": 3.1465, "step": 227250 }, { "epoch": 0.22392676717921234, "grad_norm": 2.4119598865509033, "learning_rate": 9.950634118291965e-06, "loss": 3.0864, "step": 227300 }, { "epoch": 0.22397602515703444, "grad_norm": 2.336982011795044, "learning_rate": 9.950612423709206e-06, "loss": 3.1098, "step": 227350 }, { "epoch": 0.22402528313485653, "grad_norm": 2.3077609539031982, "learning_rate": 9.950590724384149e-06, "loss": 3.1326, "step": 227400 }, { "epoch": 0.2240745411126786, "grad_norm": 2.4018473625183105, "learning_rate": 9.950569020316811e-06, "loss": 3.1542, "step": 227450 }, { "epoch": 0.2241237990905007, "grad_norm": 2.499340772628784, "learning_rate": 9.950547311507218e-06, "loss": 3.1258, "step": 227500 }, { "epoch": 0.22417305706832277, "grad_norm": 2.071631908416748, "learning_rate": 9.950525597955387e-06, "loss": 3.1326, "step": 227550 }, { "epoch": 0.22422231504614487, "grad_norm": 2.308685064315796, "learning_rate": 9.950503879661341e-06, "loss": 3.1074, "step": 227600 }, { "epoch": 0.22427157302396697, "grad_norm": 2.398099660873413, "learning_rate": 9.9504821566251e-06, "loss": 3.1095, "step": 227650 }, { "epoch": 0.22432083100178904, "grad_norm": 2.3888399600982666, "learning_rate": 9.950460428846684e-06, "loss": 3.1259, "step": 227700 }, { "epoch": 0.22437008897961114, "grad_norm": 2.3664770126342773, "learning_rate": 9.950438696326114e-06, "loss": 3.1317, "step": 227750 }, { "epoch": 0.22441934695743324, "grad_norm": 2.326826810836792, "learning_rate": 9.950416959063413e-06, "loss": 3.12, "step": 227800 }, { "epoch": 0.2244686049352553, "grad_norm": 2.8650074005126953, "learning_rate": 9.9503952170586e-06, "loss": 3.1232, "step": 227850 }, { "epoch": 0.2245178629130774, "grad_norm": 2.3764822483062744, "learning_rate": 9.950373470311693e-06, "loss": 3.0929, "step": 227900 }, { "epoch": 0.2245671208908995, "grad_norm": 2.303454637527466, "learning_rate": 9.95035171882272e-06, "loss": 3.1039, "step": 227950 }, { "epoch": 0.22461637886872157, "grad_norm": 2.5130581855773926, "learning_rate": 9.950329962591697e-06, "loss": 3.1656, "step": 228000 }, { "epoch": 0.22466563684654367, "grad_norm": 2.46669602394104, "learning_rate": 9.950308201618643e-06, "loss": 3.0518, "step": 228050 }, { "epoch": 0.22471489482436574, "grad_norm": 2.4951865673065186, "learning_rate": 9.950286435903582e-06, "loss": 3.1664, "step": 228100 }, { "epoch": 0.22476415280218784, "grad_norm": 2.2968597412109375, "learning_rate": 9.950264665446536e-06, "loss": 3.117, "step": 228150 }, { "epoch": 0.22481341078000994, "grad_norm": 2.4654903411865234, "learning_rate": 9.950242890247523e-06, "loss": 3.1624, "step": 228200 }, { "epoch": 0.224862668757832, "grad_norm": 2.405632734298706, "learning_rate": 9.950221110306566e-06, "loss": 3.0874, "step": 228250 }, { "epoch": 0.2249119267356541, "grad_norm": 2.2929255962371826, "learning_rate": 9.950199325623684e-06, "loss": 3.0653, "step": 228300 }, { "epoch": 0.2249611847134762, "grad_norm": 2.4489662647247314, "learning_rate": 9.950177536198898e-06, "loss": 3.1353, "step": 228350 }, { "epoch": 0.22501044269129827, "grad_norm": 2.3690357208251953, "learning_rate": 9.950155742032231e-06, "loss": 3.1234, "step": 228400 }, { "epoch": 0.22505970066912037, "grad_norm": 2.2238545417785645, "learning_rate": 9.9501339431237e-06, "loss": 3.1678, "step": 228450 }, { "epoch": 0.22510895864694247, "grad_norm": 2.303699493408203, "learning_rate": 9.950112139473332e-06, "loss": 3.1243, "step": 228500 }, { "epoch": 0.22515821662476454, "grad_norm": 2.277594566345215, "learning_rate": 9.950090331081141e-06, "loss": 3.1293, "step": 228550 }, { "epoch": 0.22520747460258664, "grad_norm": 2.269394874572754, "learning_rate": 9.950068517947153e-06, "loss": 3.1606, "step": 228600 }, { "epoch": 0.22525673258040874, "grad_norm": 2.459094285964966, "learning_rate": 9.950046700071385e-06, "loss": 3.0326, "step": 228650 }, { "epoch": 0.2253059905582308, "grad_norm": 2.3973357677459717, "learning_rate": 9.950024877453861e-06, "loss": 3.0976, "step": 228700 }, { "epoch": 0.2253552485360529, "grad_norm": 2.2996439933776855, "learning_rate": 9.9500030500946e-06, "loss": 3.1141, "step": 228750 }, { "epoch": 0.22540450651387497, "grad_norm": 2.450636625289917, "learning_rate": 9.949981217993627e-06, "loss": 3.1315, "step": 228800 }, { "epoch": 0.22545376449169707, "grad_norm": 2.48783540725708, "learning_rate": 9.949959381150956e-06, "loss": 3.1283, "step": 228850 }, { "epoch": 0.22550302246951917, "grad_norm": 2.297492742538452, "learning_rate": 9.949937539566614e-06, "loss": 3.0837, "step": 228900 }, { "epoch": 0.22555228044734124, "grad_norm": 2.3832290172576904, "learning_rate": 9.949915693240618e-06, "loss": 3.1433, "step": 228950 }, { "epoch": 0.22560153842516334, "grad_norm": 2.2616357803344727, "learning_rate": 9.949893842172991e-06, "loss": 3.1009, "step": 229000 }, { "epoch": 0.22565079640298544, "grad_norm": 2.4085733890533447, "learning_rate": 9.949871986363752e-06, "loss": 3.15, "step": 229050 }, { "epoch": 0.2257000543808075, "grad_norm": 2.7629122734069824, "learning_rate": 9.949850125812923e-06, "loss": 3.1108, "step": 229100 }, { "epoch": 0.2257493123586296, "grad_norm": 2.4449000358581543, "learning_rate": 9.949828260520527e-06, "loss": 3.1386, "step": 229150 }, { "epoch": 0.2257985703364517, "grad_norm": 2.276897430419922, "learning_rate": 9.949806390486582e-06, "loss": 3.1602, "step": 229200 }, { "epoch": 0.22584782831427377, "grad_norm": 2.3056507110595703, "learning_rate": 9.949784515711111e-06, "loss": 3.1337, "step": 229250 }, { "epoch": 0.22589708629209587, "grad_norm": 2.4320430755615234, "learning_rate": 9.949762636194133e-06, "loss": 3.1411, "step": 229300 }, { "epoch": 0.22594634426991794, "grad_norm": 2.579031229019165, "learning_rate": 9.94974075193567e-06, "loss": 3.1318, "step": 229350 }, { "epoch": 0.22599560224774004, "grad_norm": 2.3198349475860596, "learning_rate": 9.949718862935745e-06, "loss": 3.1114, "step": 229400 }, { "epoch": 0.22604486022556214, "grad_norm": 2.48252272605896, "learning_rate": 9.949696969194376e-06, "loss": 3.0974, "step": 229450 }, { "epoch": 0.2260941182033842, "grad_norm": 2.432814836502075, "learning_rate": 9.949675070711585e-06, "loss": 3.081, "step": 229500 }, { "epoch": 0.2261433761812063, "grad_norm": 2.2791616916656494, "learning_rate": 9.949653167487391e-06, "loss": 3.1672, "step": 229550 }, { "epoch": 0.2261926341590284, "grad_norm": 2.407297134399414, "learning_rate": 9.949631259521817e-06, "loss": 3.1411, "step": 229600 }, { "epoch": 0.22624189213685048, "grad_norm": 2.2199108600616455, "learning_rate": 9.949609346814886e-06, "loss": 3.0797, "step": 229650 }, { "epoch": 0.22629115011467257, "grad_norm": 2.616917371749878, "learning_rate": 9.949587429366617e-06, "loss": 3.0246, "step": 229700 }, { "epoch": 0.22634040809249467, "grad_norm": 2.2679972648620605, "learning_rate": 9.949565507177028e-06, "loss": 3.1543, "step": 229750 }, { "epoch": 0.22638966607031674, "grad_norm": 2.493642568588257, "learning_rate": 9.949543580246144e-06, "loss": 3.0781, "step": 229800 }, { "epoch": 0.22643892404813884, "grad_norm": 2.6296164989471436, "learning_rate": 9.949521648573986e-06, "loss": 3.1177, "step": 229850 }, { "epoch": 0.22648818202596094, "grad_norm": 2.3538947105407715, "learning_rate": 9.949499712160573e-06, "loss": 3.0368, "step": 229900 }, { "epoch": 0.226537440003783, "grad_norm": 2.2601139545440674, "learning_rate": 9.949477771005927e-06, "loss": 3.0856, "step": 229950 }, { "epoch": 0.2265866979816051, "grad_norm": 2.288181781768799, "learning_rate": 9.949455825110068e-06, "loss": 3.2074, "step": 230000 }, { "epoch": 0.22663595595942718, "grad_norm": 2.32810115814209, "learning_rate": 9.949433874473019e-06, "loss": 3.1413, "step": 230050 }, { "epoch": 0.22668521393724927, "grad_norm": 2.4031431674957275, "learning_rate": 9.9494119190948e-06, "loss": 3.1555, "step": 230100 }, { "epoch": 0.22673447191507137, "grad_norm": 2.292767286300659, "learning_rate": 9.94938995897543e-06, "loss": 3.1405, "step": 230150 }, { "epoch": 0.22678372989289344, "grad_norm": 2.2212533950805664, "learning_rate": 9.949367994114934e-06, "loss": 3.1567, "step": 230200 }, { "epoch": 0.22683298787071554, "grad_norm": 2.4706010818481445, "learning_rate": 9.94934602451333e-06, "loss": 3.0583, "step": 230250 }, { "epoch": 0.22688224584853764, "grad_norm": 2.3244545459747314, "learning_rate": 9.949324050170641e-06, "loss": 3.1692, "step": 230300 }, { "epoch": 0.2269315038263597, "grad_norm": 2.3320229053497314, "learning_rate": 9.949302071086888e-06, "loss": 3.0792, "step": 230350 }, { "epoch": 0.2269807618041818, "grad_norm": 2.6513266563415527, "learning_rate": 9.94928008726209e-06, "loss": 3.1239, "step": 230400 }, { "epoch": 0.2270300197820039, "grad_norm": 2.298130512237549, "learning_rate": 9.949258098696269e-06, "loss": 3.1854, "step": 230450 }, { "epoch": 0.22707927775982598, "grad_norm": 2.2413125038146973, "learning_rate": 9.949236105389447e-06, "loss": 3.0626, "step": 230500 }, { "epoch": 0.22712853573764807, "grad_norm": 2.2524447441101074, "learning_rate": 9.949214107341642e-06, "loss": 3.0847, "step": 230550 }, { "epoch": 0.22717779371547014, "grad_norm": 2.370786428451538, "learning_rate": 9.949192104552881e-06, "loss": 3.1872, "step": 230600 }, { "epoch": 0.22722705169329224, "grad_norm": 2.4005978107452393, "learning_rate": 9.94917009702318e-06, "loss": 3.0909, "step": 230650 }, { "epoch": 0.22727630967111434, "grad_norm": 2.480656385421753, "learning_rate": 9.94914808475256e-06, "loss": 3.1399, "step": 230700 }, { "epoch": 0.2273255676489364, "grad_norm": 2.273033380508423, "learning_rate": 9.949126067741045e-06, "loss": 3.088, "step": 230750 }, { "epoch": 0.2273748256267585, "grad_norm": 2.298105001449585, "learning_rate": 9.949104045988656e-06, "loss": 3.125, "step": 230800 }, { "epoch": 0.2274240836045806, "grad_norm": 2.454493999481201, "learning_rate": 9.94908201949541e-06, "loss": 3.1131, "step": 230850 }, { "epoch": 0.22747334158240268, "grad_norm": 2.45310378074646, "learning_rate": 9.949059988261335e-06, "loss": 3.1353, "step": 230900 }, { "epoch": 0.22752259956022478, "grad_norm": 2.277022361755371, "learning_rate": 9.949037952286444e-06, "loss": 3.0638, "step": 230950 }, { "epoch": 0.22757185753804687, "grad_norm": 2.3495540618896484, "learning_rate": 9.949015911570764e-06, "loss": 3.0889, "step": 231000 }, { "epoch": 0.22762111551586894, "grad_norm": 2.4992523193359375, "learning_rate": 9.948993866114314e-06, "loss": 3.0557, "step": 231050 }, { "epoch": 0.22767037349369104, "grad_norm": 2.291456460952759, "learning_rate": 9.948971815917117e-06, "loss": 3.123, "step": 231100 }, { "epoch": 0.22771963147151314, "grad_norm": 2.622642993927002, "learning_rate": 9.94894976097919e-06, "loss": 3.1377, "step": 231150 }, { "epoch": 0.2277688894493352, "grad_norm": 2.287047863006592, "learning_rate": 9.948927701300559e-06, "loss": 3.1052, "step": 231200 }, { "epoch": 0.2278181474271573, "grad_norm": 2.2981679439544678, "learning_rate": 9.948905636881241e-06, "loss": 3.1601, "step": 231250 }, { "epoch": 0.22786740540497938, "grad_norm": 2.1821305751800537, "learning_rate": 9.94888356772126e-06, "loss": 3.14, "step": 231300 }, { "epoch": 0.22791666338280148, "grad_norm": 2.4427993297576904, "learning_rate": 9.948861493820636e-06, "loss": 3.1548, "step": 231350 }, { "epoch": 0.22796592136062357, "grad_norm": 2.4314017295837402, "learning_rate": 9.94883941517939e-06, "loss": 3.1284, "step": 231400 }, { "epoch": 0.22801517933844564, "grad_norm": 2.344003438949585, "learning_rate": 9.948817331797542e-06, "loss": 3.0702, "step": 231450 }, { "epoch": 0.22806443731626774, "grad_norm": 2.3072893619537354, "learning_rate": 9.948795243675116e-06, "loss": 3.1284, "step": 231500 }, { "epoch": 0.22811369529408984, "grad_norm": 2.3631937503814697, "learning_rate": 9.948773150812132e-06, "loss": 3.1364, "step": 231550 }, { "epoch": 0.2281629532719119, "grad_norm": 2.160754442214966, "learning_rate": 9.94875105320861e-06, "loss": 3.1334, "step": 231600 }, { "epoch": 0.228212211249734, "grad_norm": 2.5623526573181152, "learning_rate": 9.948728950864572e-06, "loss": 3.1225, "step": 231650 }, { "epoch": 0.2282614692275561, "grad_norm": 2.591986656188965, "learning_rate": 9.94870684378004e-06, "loss": 3.1219, "step": 231700 }, { "epoch": 0.22831072720537818, "grad_norm": 2.2456555366516113, "learning_rate": 9.948684731955034e-06, "loss": 3.118, "step": 231750 }, { "epoch": 0.22835998518320028, "grad_norm": 2.309113025665283, "learning_rate": 9.948662615389575e-06, "loss": 3.1057, "step": 231800 }, { "epoch": 0.22840924316102235, "grad_norm": 2.5398659706115723, "learning_rate": 9.948640494083686e-06, "loss": 3.0865, "step": 231850 }, { "epoch": 0.22845850113884444, "grad_norm": 2.056925058364868, "learning_rate": 9.948618368037387e-06, "loss": 3.1586, "step": 231900 }, { "epoch": 0.22850775911666654, "grad_norm": 2.399930238723755, "learning_rate": 9.948596237250697e-06, "loss": 3.1255, "step": 231950 }, { "epoch": 0.2285570170944886, "grad_norm": 2.401066780090332, "learning_rate": 9.94857410172364e-06, "loss": 3.0886, "step": 232000 }, { "epoch": 0.2286062750723107, "grad_norm": 2.4520864486694336, "learning_rate": 9.948551961456237e-06, "loss": 3.1069, "step": 232050 }, { "epoch": 0.2286555330501328, "grad_norm": 2.2527456283569336, "learning_rate": 9.94852981644851e-06, "loss": 3.1032, "step": 232100 }, { "epoch": 0.22870479102795488, "grad_norm": 2.3150463104248047, "learning_rate": 9.948507666700477e-06, "loss": 3.1328, "step": 232150 }, { "epoch": 0.22875404900577698, "grad_norm": 2.2695631980895996, "learning_rate": 9.948485512212162e-06, "loss": 3.1318, "step": 232200 }, { "epoch": 0.22880330698359908, "grad_norm": 2.29229736328125, "learning_rate": 9.948463352983585e-06, "loss": 3.131, "step": 232250 }, { "epoch": 0.22885256496142115, "grad_norm": 2.2613794803619385, "learning_rate": 9.948441189014769e-06, "loss": 3.0421, "step": 232300 }, { "epoch": 0.22890182293924324, "grad_norm": 2.2791359424591064, "learning_rate": 9.948419020305731e-06, "loss": 3.1543, "step": 232350 }, { "epoch": 0.22895108091706534, "grad_norm": 2.4636623859405518, "learning_rate": 9.948396846856497e-06, "loss": 3.1361, "step": 232400 }, { "epoch": 0.2290003388948874, "grad_norm": 2.4935758113861084, "learning_rate": 9.948374668667087e-06, "loss": 3.078, "step": 232450 }, { "epoch": 0.2290495968727095, "grad_norm": 2.2243528366088867, "learning_rate": 9.94835248573752e-06, "loss": 3.1416, "step": 232500 }, { "epoch": 0.22909885485053158, "grad_norm": 2.4566490650177, "learning_rate": 9.948330298067819e-06, "loss": 3.0693, "step": 232550 }, { "epoch": 0.22914811282835368, "grad_norm": 2.2690930366516113, "learning_rate": 9.948308105658005e-06, "loss": 3.1009, "step": 232600 }, { "epoch": 0.22919737080617578, "grad_norm": 2.389646291732788, "learning_rate": 9.9482859085081e-06, "loss": 3.1227, "step": 232650 }, { "epoch": 0.22924662878399785, "grad_norm": 2.29900860786438, "learning_rate": 9.948263706618124e-06, "loss": 3.0902, "step": 232700 }, { "epoch": 0.22929588676181994, "grad_norm": 2.3441901206970215, "learning_rate": 9.9482414999881e-06, "loss": 3.1493, "step": 232750 }, { "epoch": 0.22934514473964204, "grad_norm": 2.46977162361145, "learning_rate": 9.948219288618046e-06, "loss": 3.1013, "step": 232800 }, { "epoch": 0.2293944027174641, "grad_norm": 2.378328800201416, "learning_rate": 9.948197072507988e-06, "loss": 3.0513, "step": 232850 }, { "epoch": 0.2294436606952862, "grad_norm": 2.2376158237457275, "learning_rate": 9.948174851657943e-06, "loss": 3.1434, "step": 232900 }, { "epoch": 0.2294929186731083, "grad_norm": 2.5076003074645996, "learning_rate": 9.948152626067934e-06, "loss": 3.1143, "step": 232950 }, { "epoch": 0.22954217665093038, "grad_norm": 2.2947511672973633, "learning_rate": 9.948130395737983e-06, "loss": 3.1737, "step": 233000 }, { "epoch": 0.22959143462875248, "grad_norm": 2.2616872787475586, "learning_rate": 9.94810816066811e-06, "loss": 3.1294, "step": 233050 }, { "epoch": 0.22964069260657455, "grad_norm": 2.360733985900879, "learning_rate": 9.948085920858338e-06, "loss": 3.1271, "step": 233100 }, { "epoch": 0.22968995058439665, "grad_norm": 2.363555431365967, "learning_rate": 9.948063676308686e-06, "loss": 3.1363, "step": 233150 }, { "epoch": 0.22973920856221874, "grad_norm": 2.386957883834839, "learning_rate": 9.948041427019176e-06, "loss": 3.0712, "step": 233200 }, { "epoch": 0.22978846654004081, "grad_norm": 2.4780349731445312, "learning_rate": 9.94801917298983e-06, "loss": 3.1134, "step": 233250 }, { "epoch": 0.2298377245178629, "grad_norm": 2.3045809268951416, "learning_rate": 9.94799691422067e-06, "loss": 3.1154, "step": 233300 }, { "epoch": 0.229886982495685, "grad_norm": 2.267698049545288, "learning_rate": 9.947974650711716e-06, "loss": 3.124, "step": 233350 }, { "epoch": 0.22993624047350708, "grad_norm": 2.467005729675293, "learning_rate": 9.94795238246299e-06, "loss": 3.1422, "step": 233400 }, { "epoch": 0.22998549845132918, "grad_norm": 2.485252618789673, "learning_rate": 9.947930109474513e-06, "loss": 3.1126, "step": 233450 }, { "epoch": 0.23003475642915128, "grad_norm": 2.386655807495117, "learning_rate": 9.947907831746306e-06, "loss": 3.0752, "step": 233500 }, { "epoch": 0.23008401440697335, "grad_norm": 2.4380009174346924, "learning_rate": 9.947885549278392e-06, "loss": 3.1303, "step": 233550 }, { "epoch": 0.23013327238479545, "grad_norm": 2.4312243461608887, "learning_rate": 9.94786326207079e-06, "loss": 3.0604, "step": 233600 }, { "epoch": 0.23018253036261754, "grad_norm": 2.352602005004883, "learning_rate": 9.947840970123522e-06, "loss": 3.1294, "step": 233650 }, { "epoch": 0.2302317883404396, "grad_norm": 2.4404258728027344, "learning_rate": 9.947818673436608e-06, "loss": 3.1051, "step": 233700 }, { "epoch": 0.2302810463182617, "grad_norm": 2.5121829509735107, "learning_rate": 9.947796372010074e-06, "loss": 3.0941, "step": 233750 }, { "epoch": 0.23033030429608378, "grad_norm": 2.3506901264190674, "learning_rate": 9.947774065843939e-06, "loss": 3.1169, "step": 233800 }, { "epoch": 0.23037956227390588, "grad_norm": 2.3131022453308105, "learning_rate": 9.947751754938222e-06, "loss": 3.08, "step": 233850 }, { "epoch": 0.23042882025172798, "grad_norm": 2.4042625427246094, "learning_rate": 9.947729439292947e-06, "loss": 3.1609, "step": 233900 }, { "epoch": 0.23047807822955005, "grad_norm": 2.301863431930542, "learning_rate": 9.947707118908136e-06, "loss": 3.1099, "step": 233950 }, { "epoch": 0.23052733620737215, "grad_norm": 2.40221905708313, "learning_rate": 9.947684793783807e-06, "loss": 3.0671, "step": 234000 }, { "epoch": 0.23057659418519424, "grad_norm": 2.296137571334839, "learning_rate": 9.947662463919985e-06, "loss": 3.0958, "step": 234050 }, { "epoch": 0.23062585216301631, "grad_norm": 2.352381467819214, "learning_rate": 9.947640129316688e-06, "loss": 3.1078, "step": 234100 }, { "epoch": 0.2306751101408384, "grad_norm": 2.1980345249176025, "learning_rate": 9.94761778997394e-06, "loss": 3.0986, "step": 234150 }, { "epoch": 0.2307243681186605, "grad_norm": 2.5247669219970703, "learning_rate": 9.947595445891763e-06, "loss": 3.1209, "step": 234200 }, { "epoch": 0.23077362609648258, "grad_norm": 2.4769108295440674, "learning_rate": 9.947573097070175e-06, "loss": 3.1144, "step": 234250 }, { "epoch": 0.23082288407430468, "grad_norm": 2.9955673217773438, "learning_rate": 9.9475507435092e-06, "loss": 3.1635, "step": 234300 }, { "epoch": 0.23087214205212675, "grad_norm": 2.3403661251068115, "learning_rate": 9.947528385208859e-06, "loss": 3.1816, "step": 234350 }, { "epoch": 0.23092140002994885, "grad_norm": 2.369025468826294, "learning_rate": 9.947506022169174e-06, "loss": 3.1048, "step": 234400 }, { "epoch": 0.23097065800777095, "grad_norm": 2.6143558025360107, "learning_rate": 9.947483654390165e-06, "loss": 3.0768, "step": 234450 }, { "epoch": 0.23101991598559302, "grad_norm": 2.3820548057556152, "learning_rate": 9.947461281871852e-06, "loss": 3.1532, "step": 234500 }, { "epoch": 0.23106917396341511, "grad_norm": 2.3688600063323975, "learning_rate": 9.947438904614262e-06, "loss": 3.0989, "step": 234550 }, { "epoch": 0.2311184319412372, "grad_norm": 2.504629373550415, "learning_rate": 9.94741652261741e-06, "loss": 3.0706, "step": 234600 }, { "epoch": 0.23116768991905928, "grad_norm": 2.3956665992736816, "learning_rate": 9.947394135881323e-06, "loss": 3.1152, "step": 234650 }, { "epoch": 0.23121694789688138, "grad_norm": 2.325806140899658, "learning_rate": 9.947371744406018e-06, "loss": 3.1029, "step": 234700 }, { "epoch": 0.23126620587470348, "grad_norm": 2.3691487312316895, "learning_rate": 9.947349348191519e-06, "loss": 3.0954, "step": 234750 }, { "epoch": 0.23131546385252555, "grad_norm": 2.3023033142089844, "learning_rate": 9.947326947237846e-06, "loss": 3.1206, "step": 234800 }, { "epoch": 0.23136472183034765, "grad_norm": 2.5985546112060547, "learning_rate": 9.947304541545022e-06, "loss": 3.0853, "step": 234850 }, { "epoch": 0.23141397980816972, "grad_norm": 2.789794683456421, "learning_rate": 9.947282131113068e-06, "loss": 3.1346, "step": 234900 }, { "epoch": 0.23146323778599182, "grad_norm": 2.9096155166625977, "learning_rate": 9.947259715942003e-06, "loss": 3.047, "step": 234950 }, { "epoch": 0.2315124957638139, "grad_norm": 2.5125975608825684, "learning_rate": 9.947237296031854e-06, "loss": 3.1983, "step": 235000 }, { "epoch": 0.23156175374163598, "grad_norm": 2.3907930850982666, "learning_rate": 9.947214871382636e-06, "loss": 3.1063, "step": 235050 }, { "epoch": 0.23161101171945808, "grad_norm": 2.472198486328125, "learning_rate": 9.947192441994375e-06, "loss": 3.1385, "step": 235100 }, { "epoch": 0.23166026969728018, "grad_norm": 2.497612476348877, "learning_rate": 9.947170007867091e-06, "loss": 3.0985, "step": 235150 }, { "epoch": 0.23170952767510225, "grad_norm": 2.393240213394165, "learning_rate": 9.947147569000806e-06, "loss": 3.0728, "step": 235200 }, { "epoch": 0.23175878565292435, "grad_norm": 2.514873743057251, "learning_rate": 9.947125125395539e-06, "loss": 3.0995, "step": 235250 }, { "epoch": 0.23180804363074645, "grad_norm": 2.194000244140625, "learning_rate": 9.947102677051314e-06, "loss": 3.1593, "step": 235300 }, { "epoch": 0.23185730160856852, "grad_norm": 2.136803388595581, "learning_rate": 9.947080223968154e-06, "loss": 3.1685, "step": 235350 }, { "epoch": 0.23190655958639061, "grad_norm": 2.3150687217712402, "learning_rate": 9.947057766146077e-06, "loss": 3.0957, "step": 235400 }, { "epoch": 0.2319558175642127, "grad_norm": 2.1616647243499756, "learning_rate": 9.947035303585106e-06, "loss": 3.0776, "step": 235450 }, { "epoch": 0.23200507554203478, "grad_norm": 2.4263975620269775, "learning_rate": 9.947012836285264e-06, "loss": 3.0721, "step": 235500 }, { "epoch": 0.23205433351985688, "grad_norm": 2.2836196422576904, "learning_rate": 9.94699036424657e-06, "loss": 3.1284, "step": 235550 }, { "epoch": 0.23210359149767895, "grad_norm": 2.34989070892334, "learning_rate": 9.946967887469046e-06, "loss": 3.136, "step": 235600 }, { "epoch": 0.23215284947550105, "grad_norm": 2.47631573677063, "learning_rate": 9.946945405952716e-06, "loss": 3.0621, "step": 235650 }, { "epoch": 0.23220210745332315, "grad_norm": 2.312878370285034, "learning_rate": 9.9469229196976e-06, "loss": 3.1263, "step": 235700 }, { "epoch": 0.23225136543114522, "grad_norm": 2.360121965408325, "learning_rate": 9.946900428703717e-06, "loss": 3.1217, "step": 235750 }, { "epoch": 0.23230062340896732, "grad_norm": 2.383042812347412, "learning_rate": 9.94687793297109e-06, "loss": 3.0987, "step": 235800 }, { "epoch": 0.23234988138678941, "grad_norm": 2.354893207550049, "learning_rate": 9.946855432499744e-06, "loss": 3.0821, "step": 235850 }, { "epoch": 0.23239913936461148, "grad_norm": 2.3437821865081787, "learning_rate": 9.946832927289695e-06, "loss": 3.143, "step": 235900 }, { "epoch": 0.23244839734243358, "grad_norm": 2.498812198638916, "learning_rate": 9.94681041734097e-06, "loss": 3.0915, "step": 235950 }, { "epoch": 0.23249765532025568, "grad_norm": 2.435222864151001, "learning_rate": 9.946787902653588e-06, "loss": 3.1309, "step": 236000 }, { "epoch": 0.23254691329807775, "grad_norm": 2.7910876274108887, "learning_rate": 9.94676538322757e-06, "loss": 3.1228, "step": 236050 }, { "epoch": 0.23259617127589985, "grad_norm": 1.9755998849868774, "learning_rate": 9.946742859062937e-06, "loss": 3.168, "step": 236100 }, { "epoch": 0.23264542925372192, "grad_norm": 2.356219530105591, "learning_rate": 9.946720330159713e-06, "loss": 3.1698, "step": 236150 }, { "epoch": 0.23269468723154402, "grad_norm": 2.49165415763855, "learning_rate": 9.946697796517917e-06, "loss": 3.1278, "step": 236200 }, { "epoch": 0.23274394520936612, "grad_norm": 2.3530771732330322, "learning_rate": 9.946675258137572e-06, "loss": 3.1059, "step": 236250 }, { "epoch": 0.23279320318718819, "grad_norm": 2.7169032096862793, "learning_rate": 9.946652715018701e-06, "loss": 3.0824, "step": 236300 }, { "epoch": 0.23284246116501028, "grad_norm": 2.297830581665039, "learning_rate": 9.946630167161325e-06, "loss": 3.1732, "step": 236350 }, { "epoch": 0.23289171914283238, "grad_norm": 2.331650733947754, "learning_rate": 9.946607614565463e-06, "loss": 3.0953, "step": 236400 }, { "epoch": 0.23294097712065445, "grad_norm": 2.148003578186035, "learning_rate": 9.946585057231138e-06, "loss": 3.1492, "step": 236450 }, { "epoch": 0.23299023509847655, "grad_norm": 2.31485915184021, "learning_rate": 9.946562495158371e-06, "loss": 3.095, "step": 236500 }, { "epoch": 0.23303949307629865, "grad_norm": 2.520641565322876, "learning_rate": 9.946539928347186e-06, "loss": 3.112, "step": 236550 }, { "epoch": 0.23308875105412072, "grad_norm": 2.2154181003570557, "learning_rate": 9.946517356797601e-06, "loss": 3.106, "step": 236600 }, { "epoch": 0.23313800903194282, "grad_norm": 2.290479898452759, "learning_rate": 9.946494780509642e-06, "loss": 3.148, "step": 236650 }, { "epoch": 0.23318726700976491, "grad_norm": 2.226475477218628, "learning_rate": 9.946472199483327e-06, "loss": 3.059, "step": 236700 }, { "epoch": 0.23323652498758699, "grad_norm": 2.2525384426116943, "learning_rate": 9.94644961371868e-06, "loss": 3.0785, "step": 236750 }, { "epoch": 0.23328578296540908, "grad_norm": 2.2813265323638916, "learning_rate": 9.946427023215722e-06, "loss": 3.1007, "step": 236800 }, { "epoch": 0.23333504094323115, "grad_norm": 2.443009853363037, "learning_rate": 9.946404427974472e-06, "loss": 3.0649, "step": 236850 }, { "epoch": 0.23338429892105325, "grad_norm": 2.413403034210205, "learning_rate": 9.946381827994955e-06, "loss": 3.0645, "step": 236900 }, { "epoch": 0.23343355689887535, "grad_norm": 2.48709774017334, "learning_rate": 9.946359223277193e-06, "loss": 3.1155, "step": 236950 }, { "epoch": 0.23348281487669742, "grad_norm": 2.3465940952301025, "learning_rate": 9.946336613821204e-06, "loss": 3.1489, "step": 237000 }, { "epoch": 0.23353207285451952, "grad_norm": 2.4294638633728027, "learning_rate": 9.946313999627012e-06, "loss": 3.1112, "step": 237050 }, { "epoch": 0.23358133083234162, "grad_norm": 2.2374181747436523, "learning_rate": 9.946291380694642e-06, "loss": 3.0955, "step": 237100 }, { "epoch": 0.2336305888101637, "grad_norm": 2.291595935821533, "learning_rate": 9.946268757024108e-06, "loss": 3.1292, "step": 237150 }, { "epoch": 0.23367984678798578, "grad_norm": 2.592600107192993, "learning_rate": 9.946246128615438e-06, "loss": 3.0984, "step": 237200 }, { "epoch": 0.23372910476580788, "grad_norm": 2.2970426082611084, "learning_rate": 9.946223495468652e-06, "loss": 3.0784, "step": 237250 }, { "epoch": 0.23377836274362995, "grad_norm": 2.242952585220337, "learning_rate": 9.94620085758377e-06, "loss": 3.0894, "step": 237300 }, { "epoch": 0.23382762072145205, "grad_norm": 2.468498945236206, "learning_rate": 9.946178214960815e-06, "loss": 3.1731, "step": 237350 }, { "epoch": 0.23387687869927412, "grad_norm": 2.1341092586517334, "learning_rate": 9.946155567599809e-06, "loss": 3.113, "step": 237400 }, { "epoch": 0.23392613667709622, "grad_norm": 2.2872345447540283, "learning_rate": 9.946132915500772e-06, "loss": 3.1144, "step": 237450 }, { "epoch": 0.23397539465491832, "grad_norm": 2.4731791019439697, "learning_rate": 9.946110258663728e-06, "loss": 3.0541, "step": 237500 }, { "epoch": 0.2340246526327404, "grad_norm": 2.199284791946411, "learning_rate": 9.946087597088699e-06, "loss": 3.1344, "step": 237550 }, { "epoch": 0.23407391061056249, "grad_norm": 2.2212328910827637, "learning_rate": 9.946064930775703e-06, "loss": 3.1069, "step": 237600 }, { "epoch": 0.23412316858838458, "grad_norm": 2.3216452598571777, "learning_rate": 9.946042259724765e-06, "loss": 3.1191, "step": 237650 }, { "epoch": 0.23417242656620665, "grad_norm": 2.2018797397613525, "learning_rate": 9.946019583935906e-06, "loss": 3.1381, "step": 237700 }, { "epoch": 0.23422168454402875, "grad_norm": 2.1988394260406494, "learning_rate": 9.945996903409148e-06, "loss": 3.1014, "step": 237750 }, { "epoch": 0.23427094252185085, "grad_norm": 2.371311902999878, "learning_rate": 9.945974218144511e-06, "loss": 3.087, "step": 237800 }, { "epoch": 0.23432020049967292, "grad_norm": 2.372830629348755, "learning_rate": 9.945951528142019e-06, "loss": 3.1154, "step": 237850 }, { "epoch": 0.23436945847749502, "grad_norm": 2.418653726577759, "learning_rate": 9.945928833401692e-06, "loss": 3.094, "step": 237900 }, { "epoch": 0.23441871645531712, "grad_norm": 2.2956998348236084, "learning_rate": 9.945906133923555e-06, "loss": 3.129, "step": 237950 }, { "epoch": 0.2344679744331392, "grad_norm": 2.327108383178711, "learning_rate": 9.945883429707625e-06, "loss": 3.0912, "step": 238000 }, { "epoch": 0.23451723241096128, "grad_norm": 2.3238492012023926, "learning_rate": 9.945860720753926e-06, "loss": 3.1437, "step": 238050 }, { "epoch": 0.23456649038878336, "grad_norm": 2.3769428730010986, "learning_rate": 9.945838007062479e-06, "loss": 3.0836, "step": 238100 }, { "epoch": 0.23461574836660545, "grad_norm": 2.36173415184021, "learning_rate": 9.945815288633309e-06, "loss": 3.103, "step": 238150 }, { "epoch": 0.23466500634442755, "grad_norm": 2.286583185195923, "learning_rate": 9.945792565466433e-06, "loss": 3.0294, "step": 238200 }, { "epoch": 0.23471426432224962, "grad_norm": 2.3353917598724365, "learning_rate": 9.945769837561876e-06, "loss": 3.0646, "step": 238250 }, { "epoch": 0.23476352230007172, "grad_norm": 2.417656660079956, "learning_rate": 9.94574710491966e-06, "loss": 3.199, "step": 238300 }, { "epoch": 0.23481278027789382, "grad_norm": 2.3847527503967285, "learning_rate": 9.945724367539803e-06, "loss": 3.087, "step": 238350 }, { "epoch": 0.2348620382557159, "grad_norm": 2.3179914951324463, "learning_rate": 9.94570162542233e-06, "loss": 3.1376, "step": 238400 }, { "epoch": 0.234911296233538, "grad_norm": 2.2376773357391357, "learning_rate": 9.945678878567262e-06, "loss": 3.0562, "step": 238450 }, { "epoch": 0.23496055421136008, "grad_norm": 2.3091437816619873, "learning_rate": 9.945656126974621e-06, "loss": 3.07, "step": 238500 }, { "epoch": 0.23500981218918215, "grad_norm": 2.4027609825134277, "learning_rate": 9.94563337064443e-06, "loss": 3.1072, "step": 238550 }, { "epoch": 0.23505907016700425, "grad_norm": 2.3716773986816406, "learning_rate": 9.945610609576707e-06, "loss": 3.0815, "step": 238600 }, { "epoch": 0.23510832814482632, "grad_norm": 2.5280227661132812, "learning_rate": 9.945587843771479e-06, "loss": 3.1652, "step": 238650 }, { "epoch": 0.23515758612264842, "grad_norm": 2.3150553703308105, "learning_rate": 9.945565073228764e-06, "loss": 3.0599, "step": 238700 }, { "epoch": 0.23520684410047052, "grad_norm": 2.2977688312530518, "learning_rate": 9.945542297948586e-06, "loss": 3.1061, "step": 238750 }, { "epoch": 0.2352561020782926, "grad_norm": 2.2410190105438232, "learning_rate": 9.945519517930964e-06, "loss": 3.0871, "step": 238800 }, { "epoch": 0.2353053600561147, "grad_norm": 2.341073751449585, "learning_rate": 9.945496733175921e-06, "loss": 3.0806, "step": 238850 }, { "epoch": 0.23535461803393679, "grad_norm": 2.2087273597717285, "learning_rate": 9.945473943683482e-06, "loss": 3.128, "step": 238900 }, { "epoch": 0.23540387601175886, "grad_norm": 2.203188180923462, "learning_rate": 9.945451149453664e-06, "loss": 3.1197, "step": 238950 }, { "epoch": 0.23545313398958095, "grad_norm": 2.2984530925750732, "learning_rate": 9.945428350486492e-06, "loss": 3.0866, "step": 239000 }, { "epoch": 0.23550239196740305, "grad_norm": 2.2698686122894287, "learning_rate": 9.945405546781987e-06, "loss": 3.0721, "step": 239050 }, { "epoch": 0.23555164994522512, "grad_norm": 2.4228298664093018, "learning_rate": 9.94538273834017e-06, "loss": 3.0622, "step": 239100 }, { "epoch": 0.23560090792304722, "grad_norm": 2.4454684257507324, "learning_rate": 9.945359925161065e-06, "loss": 3.1434, "step": 239150 }, { "epoch": 0.23565016590086932, "grad_norm": 2.1904497146606445, "learning_rate": 9.945337107244692e-06, "loss": 3.0895, "step": 239200 }, { "epoch": 0.2356994238786914, "grad_norm": 2.315798282623291, "learning_rate": 9.945314284591072e-06, "loss": 3.0682, "step": 239250 }, { "epoch": 0.2357486818565135, "grad_norm": 2.369689464569092, "learning_rate": 9.94529145720023e-06, "loss": 3.0529, "step": 239300 }, { "epoch": 0.23579793983433556, "grad_norm": 2.3155274391174316, "learning_rate": 9.945268625072184e-06, "loss": 3.0613, "step": 239350 }, { "epoch": 0.23584719781215766, "grad_norm": 2.43719744682312, "learning_rate": 9.94524578820696e-06, "loss": 3.0858, "step": 239400 }, { "epoch": 0.23589645578997975, "grad_norm": 2.273902416229248, "learning_rate": 9.945222946604575e-06, "loss": 3.0507, "step": 239450 }, { "epoch": 0.23594571376780182, "grad_norm": 2.4932000637054443, "learning_rate": 9.945200100265056e-06, "loss": 3.0097, "step": 239500 }, { "epoch": 0.23599497174562392, "grad_norm": 2.2390365600585938, "learning_rate": 9.945177249188423e-06, "loss": 3.144, "step": 239550 }, { "epoch": 0.23604422972344602, "grad_norm": 2.2054250240325928, "learning_rate": 9.945154393374696e-06, "loss": 3.0919, "step": 239600 }, { "epoch": 0.2360934877012681, "grad_norm": 2.186556100845337, "learning_rate": 9.945131532823899e-06, "loss": 3.1275, "step": 239650 }, { "epoch": 0.2361427456790902, "grad_norm": 2.33681321144104, "learning_rate": 9.945108667536053e-06, "loss": 3.1071, "step": 239700 }, { "epoch": 0.23619200365691229, "grad_norm": 2.218372344970703, "learning_rate": 9.945085797511181e-06, "loss": 3.1533, "step": 239750 }, { "epoch": 0.23624126163473436, "grad_norm": 2.4958925247192383, "learning_rate": 9.945062922749302e-06, "loss": 3.0747, "step": 239800 }, { "epoch": 0.23629051961255645, "grad_norm": 2.375368595123291, "learning_rate": 9.945040043250442e-06, "loss": 3.0925, "step": 239850 }, { "epoch": 0.23633977759037852, "grad_norm": 2.586843490600586, "learning_rate": 9.94501715901462e-06, "loss": 3.0692, "step": 239900 }, { "epoch": 0.23638903556820062, "grad_norm": 2.3325328826904297, "learning_rate": 9.944994270041858e-06, "loss": 3.0961, "step": 239950 }, { "epoch": 0.23643829354602272, "grad_norm": 2.5129683017730713, "learning_rate": 9.94497137633218e-06, "loss": 3.0944, "step": 240000 }, { "epoch": 0.2364875515238448, "grad_norm": 2.7419254779815674, "learning_rate": 9.944948477885606e-06, "loss": 3.1364, "step": 240050 }, { "epoch": 0.2365368095016669, "grad_norm": 2.284538507461548, "learning_rate": 9.944925574702158e-06, "loss": 3.1313, "step": 240100 }, { "epoch": 0.236586067479489, "grad_norm": 2.3621649742126465, "learning_rate": 9.94490266678186e-06, "loss": 3.1105, "step": 240150 }, { "epoch": 0.23663532545731106, "grad_norm": 2.373086929321289, "learning_rate": 9.944879754124732e-06, "loss": 3.1358, "step": 240200 }, { "epoch": 0.23668458343513316, "grad_norm": 2.3248748779296875, "learning_rate": 9.944856836730795e-06, "loss": 3.1529, "step": 240250 }, { "epoch": 0.23673384141295525, "grad_norm": 2.3124942779541016, "learning_rate": 9.944833914600075e-06, "loss": 3.0888, "step": 240300 }, { "epoch": 0.23678309939077732, "grad_norm": 2.2947301864624023, "learning_rate": 9.94481098773259e-06, "loss": 3.1118, "step": 240350 }, { "epoch": 0.23683235736859942, "grad_norm": 2.3648996353149414, "learning_rate": 9.944788056128363e-06, "loss": 3.0994, "step": 240400 }, { "epoch": 0.23688161534642152, "grad_norm": 2.7386412620544434, "learning_rate": 9.944765119787416e-06, "loss": 3.1072, "step": 240450 }, { "epoch": 0.2369308733242436, "grad_norm": 2.2639384269714355, "learning_rate": 9.944742178709772e-06, "loss": 3.0822, "step": 240500 }, { "epoch": 0.2369801313020657, "grad_norm": 2.700481653213501, "learning_rate": 9.944719232895451e-06, "loss": 3.0245, "step": 240550 }, { "epoch": 0.23702938927988776, "grad_norm": 2.525482416152954, "learning_rate": 9.944696282344478e-06, "loss": 3.0619, "step": 240600 }, { "epoch": 0.23707864725770986, "grad_norm": 2.4162042140960693, "learning_rate": 9.94467332705687e-06, "loss": 3.1195, "step": 240650 }, { "epoch": 0.23712790523553195, "grad_norm": 2.306652069091797, "learning_rate": 9.944650367032656e-06, "loss": 3.063, "step": 240700 }, { "epoch": 0.23717716321335403, "grad_norm": 2.3941709995269775, "learning_rate": 9.94462740227185e-06, "loss": 3.0593, "step": 240750 }, { "epoch": 0.23722642119117612, "grad_norm": 2.2683653831481934, "learning_rate": 9.944604432774483e-06, "loss": 3.0285, "step": 240800 }, { "epoch": 0.23727567916899822, "grad_norm": 2.3559648990631104, "learning_rate": 9.94458145854057e-06, "loss": 3.0575, "step": 240850 }, { "epoch": 0.2373249371468203, "grad_norm": 2.288796901702881, "learning_rate": 9.944558479570134e-06, "loss": 3.1064, "step": 240900 }, { "epoch": 0.2373741951246424, "grad_norm": 2.430438756942749, "learning_rate": 9.9445354958632e-06, "loss": 3.08, "step": 240950 }, { "epoch": 0.2374234531024645, "grad_norm": 2.3012094497680664, "learning_rate": 9.944512507419785e-06, "loss": 3.0908, "step": 241000 }, { "epoch": 0.23747271108028656, "grad_norm": 2.2216012477874756, "learning_rate": 9.944489514239917e-06, "loss": 3.0953, "step": 241050 }, { "epoch": 0.23752196905810866, "grad_norm": 2.4044082164764404, "learning_rate": 9.944466516323615e-06, "loss": 3.1058, "step": 241100 }, { "epoch": 0.23757122703593073, "grad_norm": 2.2939109802246094, "learning_rate": 9.944443513670902e-06, "loss": 3.0908, "step": 241150 }, { "epoch": 0.23762048501375282, "grad_norm": 2.329458713531494, "learning_rate": 9.944420506281798e-06, "loss": 3.0783, "step": 241200 }, { "epoch": 0.23766974299157492, "grad_norm": 2.2308907508850098, "learning_rate": 9.944397494156325e-06, "loss": 3.0368, "step": 241250 }, { "epoch": 0.237719000969397, "grad_norm": 2.725761651992798, "learning_rate": 9.944374477294509e-06, "loss": 3.1561, "step": 241300 }, { "epoch": 0.2377682589472191, "grad_norm": 2.354994058609009, "learning_rate": 9.944351455696368e-06, "loss": 3.1204, "step": 241350 }, { "epoch": 0.2378175169250412, "grad_norm": 2.437471628189087, "learning_rate": 9.944328429361927e-06, "loss": 3.0021, "step": 241400 }, { "epoch": 0.23786677490286326, "grad_norm": 2.31559681892395, "learning_rate": 9.944305398291205e-06, "loss": 3.0467, "step": 241450 }, { "epoch": 0.23791603288068536, "grad_norm": 2.5003604888916016, "learning_rate": 9.944282362484226e-06, "loss": 3.0871, "step": 241500 }, { "epoch": 0.23796529085850746, "grad_norm": 2.2023606300354004, "learning_rate": 9.944259321941013e-06, "loss": 3.0978, "step": 241550 }, { "epoch": 0.23801454883632953, "grad_norm": 2.5916011333465576, "learning_rate": 9.944236276661583e-06, "loss": 3.0538, "step": 241600 }, { "epoch": 0.23806380681415162, "grad_norm": 2.684414863586426, "learning_rate": 9.944213226645965e-06, "loss": 3.1453, "step": 241650 }, { "epoch": 0.2381130647919737, "grad_norm": 2.5312578678131104, "learning_rate": 9.944190171894178e-06, "loss": 3.0608, "step": 241700 }, { "epoch": 0.2381623227697958, "grad_norm": 2.345900535583496, "learning_rate": 9.944167112406243e-06, "loss": 3.0536, "step": 241750 }, { "epoch": 0.2382115807476179, "grad_norm": 2.156029462814331, "learning_rate": 9.944144048182182e-06, "loss": 3.1098, "step": 241800 }, { "epoch": 0.23826083872543996, "grad_norm": 2.43034029006958, "learning_rate": 9.94412097922202e-06, "loss": 3.0925, "step": 241850 }, { "epoch": 0.23831009670326206, "grad_norm": 2.4536616802215576, "learning_rate": 9.944097905525775e-06, "loss": 3.1358, "step": 241900 }, { "epoch": 0.23835935468108416, "grad_norm": 2.4177565574645996, "learning_rate": 9.944074827093474e-06, "loss": 3.0219, "step": 241950 }, { "epoch": 0.23840861265890623, "grad_norm": 2.363697052001953, "learning_rate": 9.944051743925135e-06, "loss": 3.0701, "step": 242000 }, { "epoch": 0.23845787063672833, "grad_norm": 2.3304929733276367, "learning_rate": 9.944028656020783e-06, "loss": 3.1136, "step": 242050 }, { "epoch": 0.23850712861455042, "grad_norm": 2.4017462730407715, "learning_rate": 9.944005563380436e-06, "loss": 3.0388, "step": 242100 }, { "epoch": 0.2385563865923725, "grad_norm": 2.2806389331817627, "learning_rate": 9.943982466004121e-06, "loss": 3.0634, "step": 242150 }, { "epoch": 0.2386056445701946, "grad_norm": 2.1579458713531494, "learning_rate": 9.943959363891857e-06, "loss": 3.1295, "step": 242200 }, { "epoch": 0.2386549025480167, "grad_norm": 2.259321451187134, "learning_rate": 9.943936257043669e-06, "loss": 3.1484, "step": 242250 }, { "epoch": 0.23870416052583876, "grad_norm": 2.4302003383636475, "learning_rate": 9.943913145459574e-06, "loss": 3.0832, "step": 242300 }, { "epoch": 0.23875341850366086, "grad_norm": 2.4563777446746826, "learning_rate": 9.9438900291396e-06, "loss": 3.095, "step": 242350 }, { "epoch": 0.23880267648148293, "grad_norm": 2.42958664894104, "learning_rate": 9.943866908083765e-06, "loss": 3.1334, "step": 242400 }, { "epoch": 0.23885193445930503, "grad_norm": 2.34328293800354, "learning_rate": 9.943843782292094e-06, "loss": 3.0816, "step": 242450 }, { "epoch": 0.23890119243712712, "grad_norm": 2.5655317306518555, "learning_rate": 9.943820651764607e-06, "loss": 3.1291, "step": 242500 }, { "epoch": 0.2389504504149492, "grad_norm": 2.731804847717285, "learning_rate": 9.943797516501326e-06, "loss": 3.086, "step": 242550 }, { "epoch": 0.2389997083927713, "grad_norm": 2.2610297203063965, "learning_rate": 9.943774376502276e-06, "loss": 3.0559, "step": 242600 }, { "epoch": 0.2390489663705934, "grad_norm": 2.340064764022827, "learning_rate": 9.943751231767475e-06, "loss": 3.141, "step": 242650 }, { "epoch": 0.23909822434841546, "grad_norm": 2.2533581256866455, "learning_rate": 9.943728082296951e-06, "loss": 3.175, "step": 242700 }, { "epoch": 0.23914748232623756, "grad_norm": 2.355695962905884, "learning_rate": 9.94370492809072e-06, "loss": 3.1397, "step": 242750 }, { "epoch": 0.23919674030405966, "grad_norm": 2.5224623680114746, "learning_rate": 9.943681769148807e-06, "loss": 3.0451, "step": 242800 }, { "epoch": 0.23924599828188173, "grad_norm": 2.355323314666748, "learning_rate": 9.943658605471234e-06, "loss": 3.1289, "step": 242850 }, { "epoch": 0.23929525625970383, "grad_norm": 2.353156328201294, "learning_rate": 9.943635437058023e-06, "loss": 3.077, "step": 242900 }, { "epoch": 0.2393445142375259, "grad_norm": 2.290187120437622, "learning_rate": 9.943612263909197e-06, "loss": 3.0734, "step": 242950 }, { "epoch": 0.239393772215348, "grad_norm": 2.2772364616394043, "learning_rate": 9.943589086024777e-06, "loss": 3.156, "step": 243000 }, { "epoch": 0.2394430301931701, "grad_norm": 2.2556495666503906, "learning_rate": 9.943565903404786e-06, "loss": 3.0659, "step": 243050 }, { "epoch": 0.23949228817099216, "grad_norm": 2.3475306034088135, "learning_rate": 9.943542716049247e-06, "loss": 3.0376, "step": 243100 }, { "epoch": 0.23954154614881426, "grad_norm": 2.7231764793395996, "learning_rate": 9.94351952395818e-06, "loss": 3.0422, "step": 243150 }, { "epoch": 0.23959080412663636, "grad_norm": 2.324819564819336, "learning_rate": 9.943496327131608e-06, "loss": 3.1233, "step": 243200 }, { "epoch": 0.23964006210445843, "grad_norm": 2.3405513763427734, "learning_rate": 9.943473125569553e-06, "loss": 3.148, "step": 243250 }, { "epoch": 0.23968932008228053, "grad_norm": 2.444082260131836, "learning_rate": 9.943449919272041e-06, "loss": 3.1241, "step": 243300 }, { "epoch": 0.23973857806010263, "grad_norm": 2.4510481357574463, "learning_rate": 9.943426708239089e-06, "loss": 3.0944, "step": 243350 }, { "epoch": 0.2397878360379247, "grad_norm": 2.507953405380249, "learning_rate": 9.94340349247072e-06, "loss": 3.1351, "step": 243400 }, { "epoch": 0.2398370940157468, "grad_norm": 2.488377571105957, "learning_rate": 9.94338027196696e-06, "loss": 3.1303, "step": 243450 }, { "epoch": 0.2398863519935689, "grad_norm": 2.2698919773101807, "learning_rate": 9.943357046727829e-06, "loss": 3.1505, "step": 243500 }, { "epoch": 0.23993560997139096, "grad_norm": 2.312382459640503, "learning_rate": 9.943333816753346e-06, "loss": 3.0448, "step": 243550 }, { "epoch": 0.23998486794921306, "grad_norm": 2.2288942337036133, "learning_rate": 9.943310582043539e-06, "loss": 3.0905, "step": 243600 }, { "epoch": 0.24003412592703513, "grad_norm": 2.3471872806549072, "learning_rate": 9.943287342598426e-06, "loss": 3.0926, "step": 243650 }, { "epoch": 0.24008338390485723, "grad_norm": Infinity, "learning_rate": 9.943264098418032e-06, "loss": 3.1906, "step": 243700 }, { "epoch": 0.24013264188267933, "grad_norm": 2.334940195083618, "learning_rate": 9.943240849502378e-06, "loss": 3.1313, "step": 243750 }, { "epoch": 0.2401818998605014, "grad_norm": 2.666944980621338, "learning_rate": 9.943217595851486e-06, "loss": 3.0732, "step": 243800 }, { "epoch": 0.2402311578383235, "grad_norm": 2.1752078533172607, "learning_rate": 9.94319433746538e-06, "loss": 3.0713, "step": 243850 }, { "epoch": 0.2402804158161456, "grad_norm": 2.3898892402648926, "learning_rate": 9.943171074344079e-06, "loss": 3.0909, "step": 243900 }, { "epoch": 0.24032967379396766, "grad_norm": 2.4039320945739746, "learning_rate": 9.943147806487608e-06, "loss": 3.1199, "step": 243950 }, { "epoch": 0.24037893177178976, "grad_norm": 2.532168388366699, "learning_rate": 9.943124533895989e-06, "loss": 3.056, "step": 244000 }, { "epoch": 0.24042818974961186, "grad_norm": 2.3794476985931396, "learning_rate": 9.943101256569242e-06, "loss": 3.1421, "step": 244050 }, { "epoch": 0.24047744772743393, "grad_norm": 2.511522054672241, "learning_rate": 9.943077974507391e-06, "loss": 3.0952, "step": 244100 }, { "epoch": 0.24052670570525603, "grad_norm": 2.5325663089752197, "learning_rate": 9.94305468771046e-06, "loss": 3.0879, "step": 244150 }, { "epoch": 0.2405759636830781, "grad_norm": 2.221858263015747, "learning_rate": 9.94303139617847e-06, "loss": 3.1731, "step": 244200 }, { "epoch": 0.2406252216609002, "grad_norm": 2.549050807952881, "learning_rate": 9.943008099911442e-06, "loss": 3.1099, "step": 244250 }, { "epoch": 0.2406744796387223, "grad_norm": 2.2249364852905273, "learning_rate": 9.942984798909398e-06, "loss": 3.0743, "step": 244300 }, { "epoch": 0.24072373761654436, "grad_norm": 2.3810582160949707, "learning_rate": 9.942961493172365e-06, "loss": 3.0952, "step": 244350 }, { "epoch": 0.24077299559436646, "grad_norm": 2.2447354793548584, "learning_rate": 9.94293818270036e-06, "loss": 3.0618, "step": 244400 }, { "epoch": 0.24082225357218856, "grad_norm": 2.1127240657806396, "learning_rate": 9.942914867493407e-06, "loss": 3.0744, "step": 244450 }, { "epoch": 0.24087151155001063, "grad_norm": 2.4088492393493652, "learning_rate": 9.94289154755153e-06, "loss": 3.0908, "step": 244500 }, { "epoch": 0.24092076952783273, "grad_norm": 2.2426552772521973, "learning_rate": 9.942868222874748e-06, "loss": 3.0922, "step": 244550 }, { "epoch": 0.24097002750565483, "grad_norm": 2.174344539642334, "learning_rate": 9.942844893463086e-06, "loss": 3.0839, "step": 244600 }, { "epoch": 0.2410192854834769, "grad_norm": 2.3258755207061768, "learning_rate": 9.942821559316565e-06, "loss": 3.1183, "step": 244650 }, { "epoch": 0.241068543461299, "grad_norm": 2.352816104888916, "learning_rate": 9.94279822043521e-06, "loss": 3.1089, "step": 244700 }, { "epoch": 0.2411178014391211, "grad_norm": 2.3318135738372803, "learning_rate": 9.942774876819038e-06, "loss": 3.1354, "step": 244750 }, { "epoch": 0.24116705941694316, "grad_norm": 2.3550751209259033, "learning_rate": 9.942751528468079e-06, "loss": 3.1324, "step": 244800 }, { "epoch": 0.24121631739476526, "grad_norm": 2.3526620864868164, "learning_rate": 9.942728175382348e-06, "loss": 3.0591, "step": 244850 }, { "epoch": 0.24126557537258733, "grad_norm": 2.287975549697876, "learning_rate": 9.942704817561872e-06, "loss": 3.1311, "step": 244900 }, { "epoch": 0.24131483335040943, "grad_norm": 2.3440470695495605, "learning_rate": 9.94268145500667e-06, "loss": 2.9949, "step": 244950 }, { "epoch": 0.24136409132823153, "grad_norm": 2.3846359252929688, "learning_rate": 9.94265808771677e-06, "loss": 3.1672, "step": 245000 }, { "epoch": 0.2414133493060536, "grad_norm": 2.5466792583465576, "learning_rate": 9.942634715692188e-06, "loss": 3.1009, "step": 245050 }, { "epoch": 0.2414626072838757, "grad_norm": 2.335782766342163, "learning_rate": 9.942611338932948e-06, "loss": 3.0527, "step": 245100 }, { "epoch": 0.2415118652616978, "grad_norm": 2.589045763015747, "learning_rate": 9.942587957439075e-06, "loss": 3.0058, "step": 245150 }, { "epoch": 0.24156112323951986, "grad_norm": 2.268077850341797, "learning_rate": 9.942564571210588e-06, "loss": 3.1519, "step": 245200 }, { "epoch": 0.24161038121734196, "grad_norm": 2.2644951343536377, "learning_rate": 9.942541180247514e-06, "loss": 3.0837, "step": 245250 }, { "epoch": 0.24165963919516406, "grad_norm": 2.3352344036102295, "learning_rate": 9.94251778454987e-06, "loss": 3.1186, "step": 245300 }, { "epoch": 0.24170889717298613, "grad_norm": 2.324587106704712, "learning_rate": 9.942494384117682e-06, "loss": 3.1247, "step": 245350 }, { "epoch": 0.24175815515080823, "grad_norm": 2.2567708492279053, "learning_rate": 9.942470978950971e-06, "loss": 3.1065, "step": 245400 }, { "epoch": 0.2418074131286303, "grad_norm": 2.1775035858154297, "learning_rate": 9.942447569049761e-06, "loss": 3.0743, "step": 245450 }, { "epoch": 0.2418566711064524, "grad_norm": 2.361942768096924, "learning_rate": 9.942424154414071e-06, "loss": 3.0667, "step": 245500 }, { "epoch": 0.2419059290842745, "grad_norm": 2.2732067108154297, "learning_rate": 9.94240073504393e-06, "loss": 3.0814, "step": 245550 }, { "epoch": 0.24195518706209657, "grad_norm": 2.2713191509246826, "learning_rate": 9.942377310939353e-06, "loss": 3.1276, "step": 245600 }, { "epoch": 0.24200444503991866, "grad_norm": 2.5068674087524414, "learning_rate": 9.942353882100365e-06, "loss": 3.1656, "step": 245650 }, { "epoch": 0.24205370301774076, "grad_norm": 2.6648664474487305, "learning_rate": 9.94233044852699e-06, "loss": 3.0344, "step": 245700 }, { "epoch": 0.24210296099556283, "grad_norm": 2.379760265350342, "learning_rate": 9.94230701021925e-06, "loss": 3.0594, "step": 245750 }, { "epoch": 0.24215221897338493, "grad_norm": 2.6570470333099365, "learning_rate": 9.942283567177166e-06, "loss": 3.1055, "step": 245800 }, { "epoch": 0.24220147695120703, "grad_norm": 2.401010513305664, "learning_rate": 9.942260119400761e-06, "loss": 3.0429, "step": 245850 }, { "epoch": 0.2422507349290291, "grad_norm": 2.4101784229278564, "learning_rate": 9.942236666890059e-06, "loss": 3.1159, "step": 245900 }, { "epoch": 0.2422999929068512, "grad_norm": 2.2877323627471924, "learning_rate": 9.94221320964508e-06, "loss": 3.0643, "step": 245950 }, { "epoch": 0.2423492508846733, "grad_norm": 2.5058717727661133, "learning_rate": 9.94218974766585e-06, "loss": 3.0869, "step": 246000 }, { "epoch": 0.24239850886249537, "grad_norm": 2.3630261421203613, "learning_rate": 9.942166280952385e-06, "loss": 3.1406, "step": 246050 }, { "epoch": 0.24244776684031746, "grad_norm": 2.2738709449768066, "learning_rate": 9.942142809504716e-06, "loss": 3.1039, "step": 246100 }, { "epoch": 0.24249702481813953, "grad_norm": 2.516711950302124, "learning_rate": 9.94211933332286e-06, "loss": 3.1241, "step": 246150 }, { "epoch": 0.24254628279596163, "grad_norm": 2.224327802658081, "learning_rate": 9.942095852406838e-06, "loss": 3.085, "step": 246200 }, { "epoch": 0.24259554077378373, "grad_norm": 2.5019657611846924, "learning_rate": 9.942072366756678e-06, "loss": 3.0569, "step": 246250 }, { "epoch": 0.2426447987516058, "grad_norm": 2.3533637523651123, "learning_rate": 9.942048876372399e-06, "loss": 3.0918, "step": 246300 }, { "epoch": 0.2426940567294279, "grad_norm": 2.9378414154052734, "learning_rate": 9.942025381254025e-06, "loss": 3.1072, "step": 246350 }, { "epoch": 0.24274331470725, "grad_norm": 2.682642936706543, "learning_rate": 9.942001881401577e-06, "loss": 3.1172, "step": 246400 }, { "epoch": 0.24279257268507207, "grad_norm": 2.404968500137329, "learning_rate": 9.941978376815077e-06, "loss": 3.1451, "step": 246450 }, { "epoch": 0.24284183066289416, "grad_norm": 2.4377310276031494, "learning_rate": 9.94195486749455e-06, "loss": 3.1186, "step": 246500 }, { "epoch": 0.24289108864071626, "grad_norm": 2.4593746662139893, "learning_rate": 9.941931353440017e-06, "loss": 3.0653, "step": 246550 }, { "epoch": 0.24294034661853833, "grad_norm": 2.4421639442443848, "learning_rate": 9.9419078346515e-06, "loss": 3.1909, "step": 246600 }, { "epoch": 0.24298960459636043, "grad_norm": 2.4267258644104004, "learning_rate": 9.941884311129023e-06, "loss": 3.0655, "step": 246650 }, { "epoch": 0.2430388625741825, "grad_norm": 2.5625085830688477, "learning_rate": 9.94186078287261e-06, "loss": 2.9992, "step": 246700 }, { "epoch": 0.2430881205520046, "grad_norm": 2.464536666870117, "learning_rate": 9.941837249882278e-06, "loss": 3.0667, "step": 246750 }, { "epoch": 0.2431373785298267, "grad_norm": 2.300596237182617, "learning_rate": 9.941813712158056e-06, "loss": 3.14, "step": 246800 }, { "epoch": 0.24318663650764877, "grad_norm": 2.354001045227051, "learning_rate": 9.94179016969996e-06, "loss": 3.0716, "step": 246850 }, { "epoch": 0.24323589448547087, "grad_norm": 2.6441471576690674, "learning_rate": 9.941766622508017e-06, "loss": 3.0748, "step": 246900 }, { "epoch": 0.24328515246329296, "grad_norm": 2.2656538486480713, "learning_rate": 9.94174307058225e-06, "loss": 3.115, "step": 246950 }, { "epoch": 0.24333441044111503, "grad_norm": 2.654812812805176, "learning_rate": 9.941719513922679e-06, "loss": 3.1058, "step": 247000 }, { "epoch": 0.24338366841893713, "grad_norm": 2.21592116355896, "learning_rate": 9.941695952529328e-06, "loss": 3.0971, "step": 247050 }, { "epoch": 0.24343292639675923, "grad_norm": 2.258265495300293, "learning_rate": 9.941672386402219e-06, "loss": 3.1109, "step": 247100 }, { "epoch": 0.2434821843745813, "grad_norm": 2.21028470993042, "learning_rate": 9.941648815541375e-06, "loss": 3.0554, "step": 247150 }, { "epoch": 0.2435314423524034, "grad_norm": 2.3918704986572266, "learning_rate": 9.941625239946819e-06, "loss": 3.0069, "step": 247200 }, { "epoch": 0.2435807003302255, "grad_norm": 2.466355562210083, "learning_rate": 9.941601659618572e-06, "loss": 3.1028, "step": 247250 }, { "epoch": 0.24362995830804757, "grad_norm": 2.2478737831115723, "learning_rate": 9.941578074556656e-06, "loss": 3.101, "step": 247300 }, { "epoch": 0.24367921628586967, "grad_norm": 2.276060104370117, "learning_rate": 9.941554484761098e-06, "loss": 3.0946, "step": 247350 }, { "epoch": 0.24372847426369174, "grad_norm": 2.4429895877838135, "learning_rate": 9.941530890231917e-06, "loss": 3.1029, "step": 247400 }, { "epoch": 0.24377773224151383, "grad_norm": 2.5371768474578857, "learning_rate": 9.941507290969137e-06, "loss": 3.1083, "step": 247450 }, { "epoch": 0.24382699021933593, "grad_norm": 2.7976884841918945, "learning_rate": 9.94148368697278e-06, "loss": 3.1047, "step": 247500 }, { "epoch": 0.243876248197158, "grad_norm": 2.624077320098877, "learning_rate": 9.941460078242866e-06, "loss": 3.1258, "step": 247550 }, { "epoch": 0.2439255061749801, "grad_norm": 2.4488766193389893, "learning_rate": 9.941436464779422e-06, "loss": 3.0496, "step": 247600 }, { "epoch": 0.2439747641528022, "grad_norm": 2.2939250469207764, "learning_rate": 9.941412846582469e-06, "loss": 3.1059, "step": 247650 }, { "epoch": 0.24402402213062427, "grad_norm": 2.470970869064331, "learning_rate": 9.941389223652029e-06, "loss": 3.1238, "step": 247700 }, { "epoch": 0.24407328010844637, "grad_norm": 2.7596423625946045, "learning_rate": 9.941365595988127e-06, "loss": 3.1022, "step": 247750 }, { "epoch": 0.24412253808626846, "grad_norm": 2.537104606628418, "learning_rate": 9.94134196359078e-06, "loss": 3.1173, "step": 247800 }, { "epoch": 0.24417179606409053, "grad_norm": 2.3499011993408203, "learning_rate": 9.941318326460017e-06, "loss": 3.124, "step": 247850 }, { "epoch": 0.24422105404191263, "grad_norm": 2.350213050842285, "learning_rate": 9.941294684595856e-06, "loss": 3.1126, "step": 247900 }, { "epoch": 0.2442703120197347, "grad_norm": 2.3174033164978027, "learning_rate": 9.941271037998323e-06, "loss": 3.0122, "step": 247950 }, { "epoch": 0.2443195699975568, "grad_norm": 2.215648651123047, "learning_rate": 9.94124738666744e-06, "loss": 3.1004, "step": 248000 }, { "epoch": 0.2443688279753789, "grad_norm": 2.441850423812866, "learning_rate": 9.941223730603229e-06, "loss": 3.1195, "step": 248050 }, { "epoch": 0.24441808595320097, "grad_norm": 2.470325231552124, "learning_rate": 9.94120006980571e-06, "loss": 3.0901, "step": 248100 }, { "epoch": 0.24446734393102307, "grad_norm": 2.3005940914154053, "learning_rate": 9.94117640427491e-06, "loss": 3.1085, "step": 248150 }, { "epoch": 0.24451660190884517, "grad_norm": 2.5016424655914307, "learning_rate": 9.94115273401085e-06, "loss": 3.0549, "step": 248200 }, { "epoch": 0.24456585988666724, "grad_norm": 2.3000781536102295, "learning_rate": 9.941129059013553e-06, "loss": 3.0505, "step": 248250 }, { "epoch": 0.24461511786448933, "grad_norm": 2.508760929107666, "learning_rate": 9.94110537928304e-06, "loss": 3.0527, "step": 248300 }, { "epoch": 0.24466437584231143, "grad_norm": 2.4953818321228027, "learning_rate": 9.941081694819336e-06, "loss": 3.0984, "step": 248350 }, { "epoch": 0.2447136338201335, "grad_norm": 2.1743760108947754, "learning_rate": 9.941058005622463e-06, "loss": 3.1083, "step": 248400 }, { "epoch": 0.2447628917979556, "grad_norm": 2.5311193466186523, "learning_rate": 9.941034311692444e-06, "loss": 3.0759, "step": 248450 }, { "epoch": 0.2448121497757777, "grad_norm": 2.205291748046875, "learning_rate": 9.941010613029298e-06, "loss": 3.0108, "step": 248500 }, { "epoch": 0.24486140775359977, "grad_norm": 2.2675647735595703, "learning_rate": 9.940986909633053e-06, "loss": 3.0623, "step": 248550 }, { "epoch": 0.24491066573142187, "grad_norm": 2.4191153049468994, "learning_rate": 9.940963201503728e-06, "loss": 3.0732, "step": 248600 }, { "epoch": 0.24495992370924394, "grad_norm": 2.2322371006011963, "learning_rate": 9.940939488641349e-06, "loss": 3.1583, "step": 248650 }, { "epoch": 0.24500918168706604, "grad_norm": 2.179018497467041, "learning_rate": 9.940915771045936e-06, "loss": 3.1251, "step": 248700 }, { "epoch": 0.24505843966488813, "grad_norm": 2.383819103240967, "learning_rate": 9.940892048717513e-06, "loss": 3.125, "step": 248750 }, { "epoch": 0.2451076976427102, "grad_norm": 2.652226448059082, "learning_rate": 9.940868321656102e-06, "loss": 3.1239, "step": 248800 }, { "epoch": 0.2451569556205323, "grad_norm": 2.601790428161621, "learning_rate": 9.940844589861725e-06, "loss": 3.0994, "step": 248850 }, { "epoch": 0.2452062135983544, "grad_norm": 2.3763363361358643, "learning_rate": 9.940820853334408e-06, "loss": 3.1328, "step": 248900 }, { "epoch": 0.24525547157617647, "grad_norm": 2.5235986709594727, "learning_rate": 9.94079711207417e-06, "loss": 3.0695, "step": 248950 }, { "epoch": 0.24530472955399857, "grad_norm": 2.45487380027771, "learning_rate": 9.940773366081036e-06, "loss": 3.148, "step": 249000 }, { "epoch": 0.24535398753182067, "grad_norm": 2.436323404312134, "learning_rate": 9.940749615355028e-06, "loss": 3.1074, "step": 249050 }, { "epoch": 0.24540324550964274, "grad_norm": 2.496150255203247, "learning_rate": 9.940725859896167e-06, "loss": 3.1093, "step": 249100 }, { "epoch": 0.24545250348746483, "grad_norm": 2.441930055618286, "learning_rate": 9.94070209970448e-06, "loss": 3.0142, "step": 249150 }, { "epoch": 0.2455017614652869, "grad_norm": 2.4377288818359375, "learning_rate": 9.940678334779985e-06, "loss": 3.0887, "step": 249200 }, { "epoch": 0.245551019443109, "grad_norm": 2.2656266689300537, "learning_rate": 9.940654565122709e-06, "loss": 3.07, "step": 249250 }, { "epoch": 0.2456002774209311, "grad_norm": 2.2421586513519287, "learning_rate": 9.940630790732671e-06, "loss": 3.1638, "step": 249300 }, { "epoch": 0.24564953539875317, "grad_norm": 2.350172519683838, "learning_rate": 9.940607011609898e-06, "loss": 3.0672, "step": 249350 }, { "epoch": 0.24569879337657527, "grad_norm": 2.2374517917633057, "learning_rate": 9.940583227754408e-06, "loss": 3.1196, "step": 249400 }, { "epoch": 0.24574805135439737, "grad_norm": 2.556140422821045, "learning_rate": 9.940559439166228e-06, "loss": 3.0958, "step": 249450 }, { "epoch": 0.24579730933221944, "grad_norm": 2.3725037574768066, "learning_rate": 9.940535645845377e-06, "loss": 3.0805, "step": 249500 }, { "epoch": 0.24584656731004154, "grad_norm": 2.8568263053894043, "learning_rate": 9.940511847791881e-06, "loss": 3.0543, "step": 249550 }, { "epoch": 0.24589582528786363, "grad_norm": 2.3095898628234863, "learning_rate": 9.94048804500576e-06, "loss": 3.141, "step": 249600 }, { "epoch": 0.2459450832656857, "grad_norm": 2.4278645515441895, "learning_rate": 9.940464237487041e-06, "loss": 3.1736, "step": 249650 }, { "epoch": 0.2459943412435078, "grad_norm": 2.2884814739227295, "learning_rate": 9.940440425235742e-06, "loss": 3.1215, "step": 249700 }, { "epoch": 0.24604359922132987, "grad_norm": 2.3589158058166504, "learning_rate": 9.940416608251889e-06, "loss": 3.1271, "step": 249750 }, { "epoch": 0.24609285719915197, "grad_norm": 2.3644821643829346, "learning_rate": 9.940392786535505e-06, "loss": 3.1211, "step": 249800 }, { "epoch": 0.24614211517697407, "grad_norm": 2.3447327613830566, "learning_rate": 9.940368960086609e-06, "loss": 3.1404, "step": 249850 }, { "epoch": 0.24619137315479614, "grad_norm": 2.534984827041626, "learning_rate": 9.940345128905226e-06, "loss": 3.0886, "step": 249900 }, { "epoch": 0.24624063113261824, "grad_norm": 2.302534341812134, "learning_rate": 9.940321292991381e-06, "loss": 3.08, "step": 249950 }, { "epoch": 0.24628988911044034, "grad_norm": 2.476647138595581, "learning_rate": 9.940297452345096e-06, "loss": 3.2269, "step": 250000 }, { "epoch": 0.2463391470882624, "grad_norm": 2.394244909286499, "learning_rate": 9.940273606966391e-06, "loss": 3.0744, "step": 250050 }, { "epoch": 0.2463884050660845, "grad_norm": 2.248342275619507, "learning_rate": 9.940249756855291e-06, "loss": 3.132, "step": 250100 }, { "epoch": 0.2464376630439066, "grad_norm": 2.3945417404174805, "learning_rate": 9.94022590201182e-06, "loss": 3.1471, "step": 250150 }, { "epoch": 0.24648692102172867, "grad_norm": 2.2157044410705566, "learning_rate": 9.940202042435997e-06, "loss": 3.1202, "step": 250200 }, { "epoch": 0.24653617899955077, "grad_norm": 2.2267935276031494, "learning_rate": 9.940178178127849e-06, "loss": 3.1011, "step": 250250 }, { "epoch": 0.24658543697737287, "grad_norm": 2.3848650455474854, "learning_rate": 9.940154309087397e-06, "loss": 3.111, "step": 250300 }, { "epoch": 0.24663469495519494, "grad_norm": 2.446502447128296, "learning_rate": 9.940130435314664e-06, "loss": 3.0433, "step": 250350 }, { "epoch": 0.24668395293301704, "grad_norm": 2.696394920349121, "learning_rate": 9.940106556809672e-06, "loss": 3.0365, "step": 250400 }, { "epoch": 0.2467332109108391, "grad_norm": 2.2666432857513428, "learning_rate": 9.940082673572447e-06, "loss": 3.0728, "step": 250450 }, { "epoch": 0.2467824688886612, "grad_norm": 2.562997341156006, "learning_rate": 9.940058785603008e-06, "loss": 3.0335, "step": 250500 }, { "epoch": 0.2468317268664833, "grad_norm": 2.45611310005188, "learning_rate": 9.940034892901378e-06, "loss": 3.1366, "step": 250550 }, { "epoch": 0.24688098484430537, "grad_norm": 2.1991193294525146, "learning_rate": 9.940010995467584e-06, "loss": 3.025, "step": 250600 }, { "epoch": 0.24693024282212747, "grad_norm": 2.2913434505462646, "learning_rate": 9.939987093301646e-06, "loss": 3.1212, "step": 250650 }, { "epoch": 0.24697950079994957, "grad_norm": 2.203089952468872, "learning_rate": 9.939963186403586e-06, "loss": 3.0513, "step": 250700 }, { "epoch": 0.24702875877777164, "grad_norm": 2.4255423545837402, "learning_rate": 9.939939274773429e-06, "loss": 3.0374, "step": 250750 }, { "epoch": 0.24707801675559374, "grad_norm": 2.334313154220581, "learning_rate": 9.939915358411196e-06, "loss": 3.0595, "step": 250800 }, { "epoch": 0.24712727473341584, "grad_norm": 2.455349922180176, "learning_rate": 9.939891437316912e-06, "loss": 3.1169, "step": 250850 }, { "epoch": 0.2471765327112379, "grad_norm": 2.473872184753418, "learning_rate": 9.939867511490597e-06, "loss": 3.0962, "step": 250900 }, { "epoch": 0.24722579068906, "grad_norm": 2.2428600788116455, "learning_rate": 9.939843580932279e-06, "loss": 3.0097, "step": 250950 }, { "epoch": 0.24727504866688207, "grad_norm": 2.6244871616363525, "learning_rate": 9.939819645641975e-06, "loss": 3.1197, "step": 251000 }, { "epoch": 0.24732430664470417, "grad_norm": 2.2568256855010986, "learning_rate": 9.939795705619711e-06, "loss": 3.0428, "step": 251050 }, { "epoch": 0.24737356462252627, "grad_norm": 2.41304349899292, "learning_rate": 9.93977176086551e-06, "loss": 3.074, "step": 251100 }, { "epoch": 0.24742282260034834, "grad_norm": 2.391720771789551, "learning_rate": 9.939747811379395e-06, "loss": 3.1085, "step": 251150 }, { "epoch": 0.24747208057817044, "grad_norm": 2.445913314819336, "learning_rate": 9.939723857161388e-06, "loss": 3.0879, "step": 251200 }, { "epoch": 0.24752133855599254, "grad_norm": 2.472515106201172, "learning_rate": 9.939699898211513e-06, "loss": 3.0373, "step": 251250 }, { "epoch": 0.2475705965338146, "grad_norm": 2.427020788192749, "learning_rate": 9.939675934529791e-06, "loss": 3.0324, "step": 251300 }, { "epoch": 0.2476198545116367, "grad_norm": 2.411036968231201, "learning_rate": 9.939651966116245e-06, "loss": 3.0639, "step": 251350 }, { "epoch": 0.2476691124894588, "grad_norm": 2.1841461658477783, "learning_rate": 9.939627992970901e-06, "loss": 3.0497, "step": 251400 }, { "epoch": 0.24771837046728087, "grad_norm": 2.2170310020446777, "learning_rate": 9.939604015093781e-06, "loss": 3.115, "step": 251450 }, { "epoch": 0.24776762844510297, "grad_norm": 2.244953155517578, "learning_rate": 9.939580032484907e-06, "loss": 3.1103, "step": 251500 }, { "epoch": 0.24781688642292507, "grad_norm": 2.2516987323760986, "learning_rate": 9.9395560451443e-06, "loss": 3.0713, "step": 251550 }, { "epoch": 0.24786614440074714, "grad_norm": 2.214170217514038, "learning_rate": 9.939532053071987e-06, "loss": 3.081, "step": 251600 }, { "epoch": 0.24791540237856924, "grad_norm": 2.3635520935058594, "learning_rate": 9.93950805626799e-06, "loss": 3.1075, "step": 251650 }, { "epoch": 0.2479646603563913, "grad_norm": 2.274703025817871, "learning_rate": 9.93948405473233e-06, "loss": 3.1463, "step": 251700 }, { "epoch": 0.2480139183342134, "grad_norm": 2.226524591445923, "learning_rate": 9.93946004846503e-06, "loss": 3.0669, "step": 251750 }, { "epoch": 0.2480631763120355, "grad_norm": 2.3225700855255127, "learning_rate": 9.939436037466114e-06, "loss": 3.0302, "step": 251800 }, { "epoch": 0.24811243428985758, "grad_norm": 2.2212424278259277, "learning_rate": 9.939412021735605e-06, "loss": 3.0683, "step": 251850 }, { "epoch": 0.24816169226767967, "grad_norm": 2.229910135269165, "learning_rate": 9.939388001273528e-06, "loss": 3.1137, "step": 251900 }, { "epoch": 0.24821095024550177, "grad_norm": 2.3148345947265625, "learning_rate": 9.939363976079903e-06, "loss": 3.0051, "step": 251950 }, { "epoch": 0.24826020822332384, "grad_norm": 2.599792957305908, "learning_rate": 9.939339946154755e-06, "loss": 3.0721, "step": 252000 }, { "epoch": 0.24830946620114594, "grad_norm": 2.226205587387085, "learning_rate": 9.939315911498105e-06, "loss": 3.083, "step": 252050 }, { "epoch": 0.24835872417896804, "grad_norm": 2.3240468502044678, "learning_rate": 9.939291872109978e-06, "loss": 3.0912, "step": 252100 }, { "epoch": 0.2484079821567901, "grad_norm": 2.4869561195373535, "learning_rate": 9.939267827990396e-06, "loss": 3.031, "step": 252150 }, { "epoch": 0.2484572401346122, "grad_norm": 2.18959379196167, "learning_rate": 9.939243779139381e-06, "loss": 3.0822, "step": 252200 }, { "epoch": 0.24850649811243428, "grad_norm": 2.275770425796509, "learning_rate": 9.939219725556959e-06, "loss": 3.1012, "step": 252250 }, { "epoch": 0.24855575609025637, "grad_norm": 2.4653937816619873, "learning_rate": 9.93919566724315e-06, "loss": 3.0809, "step": 252300 }, { "epoch": 0.24860501406807847, "grad_norm": 2.7099692821502686, "learning_rate": 9.939171604197978e-06, "loss": 3.0608, "step": 252350 }, { "epoch": 0.24865427204590054, "grad_norm": 2.438619613647461, "learning_rate": 9.939147536421467e-06, "loss": 3.0793, "step": 252400 }, { "epoch": 0.24870353002372264, "grad_norm": 2.2371585369110107, "learning_rate": 9.93912346391364e-06, "loss": 3.0508, "step": 252450 }, { "epoch": 0.24875278800154474, "grad_norm": 2.340819835662842, "learning_rate": 9.939099386674518e-06, "loss": 3.1413, "step": 252500 }, { "epoch": 0.2488020459793668, "grad_norm": 2.3207669258117676, "learning_rate": 9.939075304704127e-06, "loss": 3.146, "step": 252550 }, { "epoch": 0.2488513039571889, "grad_norm": 2.2835347652435303, "learning_rate": 9.939051218002488e-06, "loss": 3.048, "step": 252600 }, { "epoch": 0.248900561935011, "grad_norm": 2.399665117263794, "learning_rate": 9.939027126569624e-06, "loss": 3.1409, "step": 252650 }, { "epoch": 0.24894981991283308, "grad_norm": 2.2042250633239746, "learning_rate": 9.939003030405561e-06, "loss": 3.1106, "step": 252700 }, { "epoch": 0.24899907789065517, "grad_norm": 2.4495363235473633, "learning_rate": 9.938978929510318e-06, "loss": 3.1508, "step": 252750 }, { "epoch": 0.24904833586847727, "grad_norm": 2.259352922439575, "learning_rate": 9.93895482388392e-06, "loss": 3.0992, "step": 252800 }, { "epoch": 0.24909759384629934, "grad_norm": 2.372040271759033, "learning_rate": 9.938930713526392e-06, "loss": 3.2258, "step": 252850 }, { "epoch": 0.24914685182412144, "grad_norm": 2.3804309368133545, "learning_rate": 9.938906598437751e-06, "loss": 3.0736, "step": 252900 }, { "epoch": 0.2491961098019435, "grad_norm": 2.170020580291748, "learning_rate": 9.938882478618028e-06, "loss": 3.1219, "step": 252950 }, { "epoch": 0.2492453677797656, "grad_norm": 2.4325509071350098, "learning_rate": 9.93885835406724e-06, "loss": 3.1179, "step": 253000 }, { "epoch": 0.2492946257575877, "grad_norm": 2.3336915969848633, "learning_rate": 9.938834224785414e-06, "loss": 3.0529, "step": 253050 }, { "epoch": 0.24934388373540978, "grad_norm": 2.2493019104003906, "learning_rate": 9.938810090772572e-06, "loss": 3.2062, "step": 253100 }, { "epoch": 0.24939314171323188, "grad_norm": 2.281674861907959, "learning_rate": 9.938785952028735e-06, "loss": 3.0677, "step": 253150 }, { "epoch": 0.24944239969105397, "grad_norm": 2.3389601707458496, "learning_rate": 9.93876180855393e-06, "loss": 3.0819, "step": 253200 }, { "epoch": 0.24949165766887604, "grad_norm": 2.5760600566864014, "learning_rate": 9.938737660348175e-06, "loss": 3.0431, "step": 253250 }, { "epoch": 0.24954091564669814, "grad_norm": 2.4122016429901123, "learning_rate": 9.938713507411496e-06, "loss": 3.1449, "step": 253300 }, { "epoch": 0.24959017362452024, "grad_norm": 2.3927791118621826, "learning_rate": 9.938689349743919e-06, "loss": 3.0748, "step": 253350 }, { "epoch": 0.2496394316023423, "grad_norm": 2.356490135192871, "learning_rate": 9.938665187345464e-06, "loss": 3.0579, "step": 253400 }, { "epoch": 0.2496886895801644, "grad_norm": 2.36407470703125, "learning_rate": 9.938641020216153e-06, "loss": 3.1203, "step": 253450 }, { "epoch": 0.24973794755798648, "grad_norm": 2.6700825691223145, "learning_rate": 9.938616848356011e-06, "loss": 3.1075, "step": 253500 }, { "epoch": 0.24978720553580858, "grad_norm": 2.545535087585449, "learning_rate": 9.938592671765062e-06, "loss": 3.1192, "step": 253550 }, { "epoch": 0.24983646351363067, "grad_norm": 2.2854321002960205, "learning_rate": 9.938568490443326e-06, "loss": 3.0484, "step": 253600 }, { "epoch": 0.24988572149145274, "grad_norm": 2.2227954864501953, "learning_rate": 9.938544304390829e-06, "loss": 3.0424, "step": 253650 }, { "epoch": 0.24993497946927484, "grad_norm": 2.2075023651123047, "learning_rate": 9.938520113607592e-06, "loss": 3.1165, "step": 253700 }, { "epoch": 0.24998423744709694, "grad_norm": 2.250612497329712, "learning_rate": 9.938495918093642e-06, "loss": 3.0227, "step": 253750 }, { "epoch": 0.25003349542491904, "grad_norm": 2.37180233001709, "learning_rate": 9.938471717848998e-06, "loss": 3.1164, "step": 253800 }, { "epoch": 0.2500827534027411, "grad_norm": 2.2624518871307373, "learning_rate": 9.938447512873686e-06, "loss": 3.0784, "step": 253850 }, { "epoch": 0.2501320113805632, "grad_norm": 2.4083847999572754, "learning_rate": 9.938423303167726e-06, "loss": 3.1082, "step": 253900 }, { "epoch": 0.2501812693583853, "grad_norm": 2.560121774673462, "learning_rate": 9.938399088731145e-06, "loss": 3.0605, "step": 253950 }, { "epoch": 0.2502305273362074, "grad_norm": 2.244215726852417, "learning_rate": 9.938374869563964e-06, "loss": 3.0814, "step": 254000 }, { "epoch": 0.25027978531402945, "grad_norm": 2.2970893383026123, "learning_rate": 9.938350645666207e-06, "loss": 3.0951, "step": 254050 }, { "epoch": 0.25032904329185157, "grad_norm": 2.2271292209625244, "learning_rate": 9.938326417037895e-06, "loss": 3.1191, "step": 254100 }, { "epoch": 0.25037830126967364, "grad_norm": 2.465325355529785, "learning_rate": 9.938302183679054e-06, "loss": 3.1004, "step": 254150 }, { "epoch": 0.2504275592474957, "grad_norm": 2.3096797466278076, "learning_rate": 9.938277945589708e-06, "loss": 3.0869, "step": 254200 }, { "epoch": 0.25047681722531784, "grad_norm": 2.523975133895874, "learning_rate": 9.938253702769876e-06, "loss": 3.0681, "step": 254250 }, { "epoch": 0.2505260752031399, "grad_norm": 2.206946849822998, "learning_rate": 9.938229455219584e-06, "loss": 3.1071, "step": 254300 }, { "epoch": 0.250575333180962, "grad_norm": 2.196448564529419, "learning_rate": 9.938205202938855e-06, "loss": 3.046, "step": 254350 }, { "epoch": 0.25062459115878405, "grad_norm": 2.299151659011841, "learning_rate": 9.938180945927713e-06, "loss": 2.9449, "step": 254400 }, { "epoch": 0.2506738491366062, "grad_norm": 2.4217722415924072, "learning_rate": 9.93815668418618e-06, "loss": 3.0386, "step": 254450 }, { "epoch": 0.25072310711442825, "grad_norm": 2.3608052730560303, "learning_rate": 9.938132417714279e-06, "loss": 3.1092, "step": 254500 }, { "epoch": 0.2507723650922503, "grad_norm": 2.2930378913879395, "learning_rate": 9.938108146512033e-06, "loss": 3.043, "step": 254550 }, { "epoch": 0.25082162307007244, "grad_norm": 2.4093875885009766, "learning_rate": 9.938083870579466e-06, "loss": 3.0264, "step": 254600 }, { "epoch": 0.2508708810478945, "grad_norm": 2.457159996032715, "learning_rate": 9.938059589916604e-06, "loss": 3.0552, "step": 254650 }, { "epoch": 0.2509201390257166, "grad_norm": 2.319514513015747, "learning_rate": 9.938035304523465e-06, "loss": 3.0099, "step": 254700 }, { "epoch": 0.2509693970035387, "grad_norm": 2.348623514175415, "learning_rate": 9.938011014400077e-06, "loss": 3.1371, "step": 254750 }, { "epoch": 0.2510186549813608, "grad_norm": 2.42630672454834, "learning_rate": 9.93798671954646e-06, "loss": 3.0626, "step": 254800 }, { "epoch": 0.25106791295918285, "grad_norm": 2.381896734237671, "learning_rate": 9.937962419962638e-06, "loss": 3.0872, "step": 254850 }, { "epoch": 0.251117170937005, "grad_norm": 2.2345798015594482, "learning_rate": 9.937938115648635e-06, "loss": 3.1616, "step": 254900 }, { "epoch": 0.25116642891482704, "grad_norm": 2.2448477745056152, "learning_rate": 9.937913806604475e-06, "loss": 3.1088, "step": 254950 }, { "epoch": 0.2512156868926491, "grad_norm": 2.418372869491577, "learning_rate": 9.93788949283018e-06, "loss": 3.1079, "step": 255000 }, { "epoch": 0.25126494487047124, "grad_norm": 2.391432285308838, "learning_rate": 9.937865174325773e-06, "loss": 3.1139, "step": 255050 }, { "epoch": 0.2513142028482933, "grad_norm": 2.4851670265197754, "learning_rate": 9.937840851091278e-06, "loss": 3.1061, "step": 255100 }, { "epoch": 0.2513634608261154, "grad_norm": 2.5608394145965576, "learning_rate": 9.937816523126717e-06, "loss": 3.1126, "step": 255150 }, { "epoch": 0.2514127188039375, "grad_norm": 2.205476760864258, "learning_rate": 9.937792190432115e-06, "loss": 3.1022, "step": 255200 }, { "epoch": 0.2514619767817596, "grad_norm": 2.371083974838257, "learning_rate": 9.937767853007496e-06, "loss": 3.0961, "step": 255250 }, { "epoch": 0.25151123475958165, "grad_norm": 2.2045812606811523, "learning_rate": 9.937743510852881e-06, "loss": 3.0957, "step": 255300 }, { "epoch": 0.2515604927374038, "grad_norm": 2.368514060974121, "learning_rate": 9.937719163968294e-06, "loss": 3.0679, "step": 255350 }, { "epoch": 0.25160975071522584, "grad_norm": 2.3765487670898438, "learning_rate": 9.93769481235376e-06, "loss": 3.1062, "step": 255400 }, { "epoch": 0.2516590086930479, "grad_norm": 2.5080883502960205, "learning_rate": 9.9376704560093e-06, "loss": 3.0998, "step": 255450 }, { "epoch": 0.25170826667087004, "grad_norm": 2.4907028675079346, "learning_rate": 9.937646094934938e-06, "loss": 3.1767, "step": 255500 }, { "epoch": 0.2517575246486921, "grad_norm": 2.3081626892089844, "learning_rate": 9.937621729130698e-06, "loss": 3.0126, "step": 255550 }, { "epoch": 0.2518067826265142, "grad_norm": 2.3124520778656006, "learning_rate": 9.937597358596604e-06, "loss": 3.1518, "step": 255600 }, { "epoch": 0.25185604060433625, "grad_norm": 2.241363048553467, "learning_rate": 9.937572983332678e-06, "loss": 3.0758, "step": 255650 }, { "epoch": 0.2519052985821584, "grad_norm": 2.295393705368042, "learning_rate": 9.937548603338943e-06, "loss": 3.0508, "step": 255700 }, { "epoch": 0.25195455655998045, "grad_norm": 2.449326515197754, "learning_rate": 9.937524218615424e-06, "loss": 3.1305, "step": 255750 }, { "epoch": 0.2520038145378025, "grad_norm": 2.3756635189056396, "learning_rate": 9.937499829162143e-06, "loss": 3.0365, "step": 255800 }, { "epoch": 0.25205307251562464, "grad_norm": 2.17541766166687, "learning_rate": 9.937475434979124e-06, "loss": 3.1491, "step": 255850 }, { "epoch": 0.2521023304934467, "grad_norm": 2.4092917442321777, "learning_rate": 9.93745103606639e-06, "loss": 3.0689, "step": 255900 }, { "epoch": 0.2521515884712688, "grad_norm": 2.1509618759155273, "learning_rate": 9.937426632423964e-06, "loss": 3.0114, "step": 255950 }, { "epoch": 0.2522008464490909, "grad_norm": 2.3913373947143555, "learning_rate": 9.937402224051871e-06, "loss": 3.0665, "step": 256000 }, { "epoch": 0.252250104426913, "grad_norm": 2.494486093521118, "learning_rate": 9.937377810950131e-06, "loss": 3.0495, "step": 256050 }, { "epoch": 0.25229936240473505, "grad_norm": 2.439788818359375, "learning_rate": 9.937353393118772e-06, "loss": 3.1098, "step": 256100 }, { "epoch": 0.2523486203825572, "grad_norm": 2.43515682220459, "learning_rate": 9.937328970557814e-06, "loss": 3.0703, "step": 256150 }, { "epoch": 0.25239787836037925, "grad_norm": 2.293869972229004, "learning_rate": 9.937304543267283e-06, "loss": 3.114, "step": 256200 }, { "epoch": 0.2524471363382013, "grad_norm": 2.7489066123962402, "learning_rate": 9.937280111247199e-06, "loss": 3.0844, "step": 256250 }, { "epoch": 0.25249639431602344, "grad_norm": 2.326929807662964, "learning_rate": 9.937255674497588e-06, "loss": 3.128, "step": 256300 }, { "epoch": 0.2525456522938455, "grad_norm": 2.3794050216674805, "learning_rate": 9.937231233018473e-06, "loss": 3.0979, "step": 256350 }, { "epoch": 0.2525949102716676, "grad_norm": 2.313791275024414, "learning_rate": 9.937206786809877e-06, "loss": 3.0731, "step": 256400 }, { "epoch": 0.2526441682494897, "grad_norm": 2.9497628211975098, "learning_rate": 9.937182335871823e-06, "loss": 3.0589, "step": 256450 }, { "epoch": 0.2526934262273118, "grad_norm": 2.3087689876556396, "learning_rate": 9.937157880204336e-06, "loss": 3.0867, "step": 256500 }, { "epoch": 0.25274268420513385, "grad_norm": 2.239593982696533, "learning_rate": 9.937133419807437e-06, "loss": 3.037, "step": 256550 }, { "epoch": 0.252791942182956, "grad_norm": 2.4424500465393066, "learning_rate": 9.937108954681152e-06, "loss": 3.071, "step": 256600 }, { "epoch": 0.25284120016077805, "grad_norm": 2.4858295917510986, "learning_rate": 9.937084484825502e-06, "loss": 3.0842, "step": 256650 }, { "epoch": 0.2528904581386001, "grad_norm": 2.2719104290008545, "learning_rate": 9.937060010240512e-06, "loss": 3.1002, "step": 256700 }, { "epoch": 0.25293971611642224, "grad_norm": 2.20900821685791, "learning_rate": 9.937035530926207e-06, "loss": 3.0935, "step": 256750 }, { "epoch": 0.2529889740942443, "grad_norm": 2.385298252105713, "learning_rate": 9.937011046882606e-06, "loss": 3.0206, "step": 256800 }, { "epoch": 0.2530382320720664, "grad_norm": 2.3431928157806396, "learning_rate": 9.936986558109736e-06, "loss": 3.0918, "step": 256850 }, { "epoch": 0.25308749004988845, "grad_norm": 2.3850743770599365, "learning_rate": 9.93696206460762e-06, "loss": 3.0065, "step": 256900 }, { "epoch": 0.2531367480277106, "grad_norm": 2.22607159614563, "learning_rate": 9.93693756637628e-06, "loss": 3.0331, "step": 256950 }, { "epoch": 0.25318600600553265, "grad_norm": 2.354191541671753, "learning_rate": 9.936913063415741e-06, "loss": 3.0231, "step": 257000 }, { "epoch": 0.2532352639833547, "grad_norm": 2.306366443634033, "learning_rate": 9.936888555726025e-06, "loss": 3.059, "step": 257050 }, { "epoch": 0.25328452196117685, "grad_norm": 2.1821885108947754, "learning_rate": 9.936864043307158e-06, "loss": 3.1197, "step": 257100 }, { "epoch": 0.2533337799389989, "grad_norm": 2.3182213306427, "learning_rate": 9.936839526159162e-06, "loss": 3.1396, "step": 257150 }, { "epoch": 0.253383037916821, "grad_norm": 2.3195254802703857, "learning_rate": 9.936815004282057e-06, "loss": 3.0491, "step": 257200 }, { "epoch": 0.2534322958946431, "grad_norm": 2.2796568870544434, "learning_rate": 9.936790477675873e-06, "loss": 3.0364, "step": 257250 }, { "epoch": 0.2534815538724652, "grad_norm": 2.2190959453582764, "learning_rate": 9.93676594634063e-06, "loss": 3.1216, "step": 257300 }, { "epoch": 0.25353081185028725, "grad_norm": 2.4295012950897217, "learning_rate": 9.93674141027635e-06, "loss": 3.1017, "step": 257350 }, { "epoch": 0.2535800698281094, "grad_norm": 2.4229323863983154, "learning_rate": 9.93671686948306e-06, "loss": 3.0808, "step": 257400 }, { "epoch": 0.25362932780593145, "grad_norm": 2.158740520477295, "learning_rate": 9.936692323960782e-06, "loss": 3.1169, "step": 257450 }, { "epoch": 0.2536785857837535, "grad_norm": 2.242999792098999, "learning_rate": 9.936667773709539e-06, "loss": 3.0133, "step": 257500 }, { "epoch": 0.25372784376157564, "grad_norm": 2.2652461528778076, "learning_rate": 9.936643218729354e-06, "loss": 2.9839, "step": 257550 }, { "epoch": 0.2537771017393977, "grad_norm": 2.542161464691162, "learning_rate": 9.936618659020252e-06, "loss": 3.1022, "step": 257600 }, { "epoch": 0.2538263597172198, "grad_norm": 2.307978868484497, "learning_rate": 9.936594094582255e-06, "loss": 3.1068, "step": 257650 }, { "epoch": 0.2538756176950419, "grad_norm": 2.342412233352661, "learning_rate": 9.936569525415389e-06, "loss": 3.0855, "step": 257700 }, { "epoch": 0.253924875672864, "grad_norm": 2.3787951469421387, "learning_rate": 9.936544951519675e-06, "loss": 3.1073, "step": 257750 }, { "epoch": 0.25397413365068605, "grad_norm": 2.3494632244110107, "learning_rate": 9.936520372895137e-06, "loss": 3.1068, "step": 257800 }, { "epoch": 0.2540233916285082, "grad_norm": 2.307288885116577, "learning_rate": 9.9364957895418e-06, "loss": 3.1082, "step": 257850 }, { "epoch": 0.25407264960633025, "grad_norm": 2.1457531452178955, "learning_rate": 9.936471201459686e-06, "loss": 3.0961, "step": 257900 }, { "epoch": 0.2541219075841523, "grad_norm": 2.312784194946289, "learning_rate": 9.93644660864882e-06, "loss": 3.0106, "step": 257950 }, { "epoch": 0.25417116556197444, "grad_norm": 2.2943084239959717, "learning_rate": 9.936422011109224e-06, "loss": 3.0397, "step": 258000 }, { "epoch": 0.2542204235397965, "grad_norm": 2.3307204246520996, "learning_rate": 9.936397408840922e-06, "loss": 3.1069, "step": 258050 }, { "epoch": 0.2542696815176186, "grad_norm": 2.537062168121338, "learning_rate": 9.936372801843938e-06, "loss": 3.0722, "step": 258100 }, { "epoch": 0.25431893949544065, "grad_norm": 2.456529140472412, "learning_rate": 9.936348190118295e-06, "loss": 3.0776, "step": 258150 }, { "epoch": 0.2543681974732628, "grad_norm": 2.2354092597961426, "learning_rate": 9.936323573664017e-06, "loss": 3.0956, "step": 258200 }, { "epoch": 0.25441745545108485, "grad_norm": 2.306375741958618, "learning_rate": 9.936298952481129e-06, "loss": 3.0752, "step": 258250 }, { "epoch": 0.2544667134289069, "grad_norm": 2.3969156742095947, "learning_rate": 9.936274326569652e-06, "loss": 3.0308, "step": 258300 }, { "epoch": 0.25451597140672905, "grad_norm": 2.2978198528289795, "learning_rate": 9.936249695929612e-06, "loss": 3.0466, "step": 258350 }, { "epoch": 0.2545652293845511, "grad_norm": 2.1055655479431152, "learning_rate": 9.93622506056103e-06, "loss": 3.0605, "step": 258400 }, { "epoch": 0.2546144873623732, "grad_norm": 2.231285333633423, "learning_rate": 9.936200420463931e-06, "loss": 3.0917, "step": 258450 }, { "epoch": 0.2546637453401953, "grad_norm": 2.7285451889038086, "learning_rate": 9.936175775638339e-06, "loss": 3.1291, "step": 258500 }, { "epoch": 0.2547130033180174, "grad_norm": 2.3237266540527344, "learning_rate": 9.936151126084277e-06, "loss": 3.0943, "step": 258550 }, { "epoch": 0.25476226129583945, "grad_norm": 2.3082072734832764, "learning_rate": 9.936126471801769e-06, "loss": 3.0941, "step": 258600 }, { "epoch": 0.2548115192736616, "grad_norm": 2.368072271347046, "learning_rate": 9.936101812790836e-06, "loss": 3.1081, "step": 258650 }, { "epoch": 0.25486077725148365, "grad_norm": 2.303162097930908, "learning_rate": 9.936077149051509e-06, "loss": 3.0867, "step": 258700 }, { "epoch": 0.2549100352293057, "grad_norm": 2.3232405185699463, "learning_rate": 9.936052480583802e-06, "loss": 3.0254, "step": 258750 }, { "epoch": 0.25495929320712785, "grad_norm": 2.335277557373047, "learning_rate": 9.936027807387745e-06, "loss": 3.1087, "step": 258800 }, { "epoch": 0.2550085511849499, "grad_norm": 2.686619997024536, "learning_rate": 9.936003129463361e-06, "loss": 3.111, "step": 258850 }, { "epoch": 0.255057809162772, "grad_norm": 2.3179800510406494, "learning_rate": 9.93597844681067e-06, "loss": 3.0888, "step": 258900 }, { "epoch": 0.2551070671405941, "grad_norm": 2.3299334049224854, "learning_rate": 9.935953759429699e-06, "loss": 3.0873, "step": 258950 }, { "epoch": 0.2551563251184162, "grad_norm": 2.3747615814208984, "learning_rate": 9.935929067320473e-06, "loss": 3.1039, "step": 259000 }, { "epoch": 0.25520558309623825, "grad_norm": 2.4137990474700928, "learning_rate": 9.93590437048301e-06, "loss": 3.1233, "step": 259050 }, { "epoch": 0.2552548410740604, "grad_norm": 2.3562309741973877, "learning_rate": 9.935879668917341e-06, "loss": 3.0377, "step": 259100 }, { "epoch": 0.25530409905188245, "grad_norm": 2.2881319522857666, "learning_rate": 9.935854962623483e-06, "loss": 3.0762, "step": 259150 }, { "epoch": 0.2553533570297045, "grad_norm": 2.317577362060547, "learning_rate": 9.935830251601463e-06, "loss": 3.041, "step": 259200 }, { "epoch": 0.25540261500752665, "grad_norm": 2.4774842262268066, "learning_rate": 9.935805535851304e-06, "loss": 3.1181, "step": 259250 }, { "epoch": 0.2554518729853487, "grad_norm": 2.4339921474456787, "learning_rate": 9.93578081537303e-06, "loss": 3.095, "step": 259300 }, { "epoch": 0.2555011309631708, "grad_norm": 2.331709623336792, "learning_rate": 9.935756090166666e-06, "loss": 3.1146, "step": 259350 }, { "epoch": 0.25555038894099286, "grad_norm": 2.356633186340332, "learning_rate": 9.935731360232232e-06, "loss": 3.0883, "step": 259400 }, { "epoch": 0.255599646918815, "grad_norm": 2.58652400970459, "learning_rate": 9.935706625569754e-06, "loss": 3.0339, "step": 259450 }, { "epoch": 0.25564890489663705, "grad_norm": 2.6274592876434326, "learning_rate": 9.935681886179258e-06, "loss": 3.0788, "step": 259500 }, { "epoch": 0.2556981628744591, "grad_norm": 2.425046682357788, "learning_rate": 9.935657142060762e-06, "loss": 3.1278, "step": 259550 }, { "epoch": 0.25574742085228125, "grad_norm": 2.570483922958374, "learning_rate": 9.935632393214295e-06, "loss": 3.0888, "step": 259600 }, { "epoch": 0.2557966788301033, "grad_norm": 2.372954845428467, "learning_rate": 9.935607639639877e-06, "loss": 3.0642, "step": 259650 }, { "epoch": 0.2558459368079254, "grad_norm": 2.1559035778045654, "learning_rate": 9.935582881337535e-06, "loss": 3.0936, "step": 259700 }, { "epoch": 0.2558951947857475, "grad_norm": 2.422140121459961, "learning_rate": 9.935558118307291e-06, "loss": 3.061, "step": 259750 }, { "epoch": 0.2559444527635696, "grad_norm": 2.3255815505981445, "learning_rate": 9.935533350549168e-06, "loss": 3.0689, "step": 259800 }, { "epoch": 0.25599371074139166, "grad_norm": 2.442577362060547, "learning_rate": 9.935508578063192e-06, "loss": 3.0887, "step": 259850 }, { "epoch": 0.2560429687192138, "grad_norm": 2.306621551513672, "learning_rate": 9.935483800849384e-06, "loss": 3.1181, "step": 259900 }, { "epoch": 0.25609222669703585, "grad_norm": 2.4201009273529053, "learning_rate": 9.935459018907769e-06, "loss": 3.0226, "step": 259950 }, { "epoch": 0.2561414846748579, "grad_norm": 2.2478420734405518, "learning_rate": 9.93543423223837e-06, "loss": 3.112, "step": 260000 }, { "epoch": 0.25619074265268005, "grad_norm": 2.3587725162506104, "learning_rate": 9.935409440841212e-06, "loss": 3.0511, "step": 260050 }, { "epoch": 0.2562400006305021, "grad_norm": 2.2620270252227783, "learning_rate": 9.93538464471632e-06, "loss": 3.0882, "step": 260100 }, { "epoch": 0.2562892586083242, "grad_norm": 2.373584032058716, "learning_rate": 9.935359843863713e-06, "loss": 3.0674, "step": 260150 }, { "epoch": 0.2563385165861463, "grad_norm": 2.468580961227417, "learning_rate": 9.93533503828342e-06, "loss": 3.085, "step": 260200 }, { "epoch": 0.2563877745639684, "grad_norm": 2.366079330444336, "learning_rate": 9.93531022797546e-06, "loss": 3.1251, "step": 260250 }, { "epoch": 0.25643703254179046, "grad_norm": 2.385525703430176, "learning_rate": 9.935285412939863e-06, "loss": 3.0382, "step": 260300 }, { "epoch": 0.2564862905196126, "grad_norm": 2.186537027359009, "learning_rate": 9.935260593176646e-06, "loss": 3.042, "step": 260350 }, { "epoch": 0.25653554849743465, "grad_norm": 2.526230812072754, "learning_rate": 9.935235768685839e-06, "loss": 3.0707, "step": 260400 }, { "epoch": 0.2565848064752567, "grad_norm": 2.3876867294311523, "learning_rate": 9.935210939467459e-06, "loss": 3.0912, "step": 260450 }, { "epoch": 0.25663406445307885, "grad_norm": 2.1879594326019287, "learning_rate": 9.935186105521534e-06, "loss": 3.0342, "step": 260500 }, { "epoch": 0.2566833224309009, "grad_norm": 2.351672887802124, "learning_rate": 9.935161266848088e-06, "loss": 3.0352, "step": 260550 }, { "epoch": 0.256732580408723, "grad_norm": 2.398935079574585, "learning_rate": 9.935136423447145e-06, "loss": 3.1426, "step": 260600 }, { "epoch": 0.25678183838654506, "grad_norm": 2.1015028953552246, "learning_rate": 9.935111575318728e-06, "loss": 3.0576, "step": 260650 }, { "epoch": 0.2568310963643672, "grad_norm": 2.2587039470672607, "learning_rate": 9.935086722462858e-06, "loss": 3.0852, "step": 260700 }, { "epoch": 0.25688035434218925, "grad_norm": 2.2581071853637695, "learning_rate": 9.935061864879563e-06, "loss": 3.079, "step": 260750 }, { "epoch": 0.2569296123200113, "grad_norm": 2.4232730865478516, "learning_rate": 9.935037002568865e-06, "loss": 3.116, "step": 260800 }, { "epoch": 0.25697887029783345, "grad_norm": 2.2190699577331543, "learning_rate": 9.935012135530788e-06, "loss": 3.0915, "step": 260850 }, { "epoch": 0.2570281282756555, "grad_norm": 2.35693621635437, "learning_rate": 9.934987263765355e-06, "loss": 3.133, "step": 260900 }, { "epoch": 0.2570773862534776, "grad_norm": 2.2636609077453613, "learning_rate": 9.934962387272593e-06, "loss": 3.0729, "step": 260950 }, { "epoch": 0.2571266442312997, "grad_norm": 2.295818328857422, "learning_rate": 9.93493750605252e-06, "loss": 3.0473, "step": 261000 }, { "epoch": 0.2571759022091218, "grad_norm": 2.446805477142334, "learning_rate": 9.934912620105167e-06, "loss": 3.0861, "step": 261050 }, { "epoch": 0.25722516018694386, "grad_norm": 2.388827085494995, "learning_rate": 9.93488772943055e-06, "loss": 3.0732, "step": 261100 }, { "epoch": 0.257274418164766, "grad_norm": 2.3711190223693848, "learning_rate": 9.934862834028701e-06, "loss": 3.1042, "step": 261150 }, { "epoch": 0.25732367614258805, "grad_norm": 2.3979122638702393, "learning_rate": 9.934837933899637e-06, "loss": 3.1186, "step": 261200 }, { "epoch": 0.2573729341204101, "grad_norm": 2.3179659843444824, "learning_rate": 9.934813029043387e-06, "loss": 3.0394, "step": 261250 }, { "epoch": 0.25742219209823225, "grad_norm": 2.533970355987549, "learning_rate": 9.934788119459971e-06, "loss": 3.1065, "step": 261300 }, { "epoch": 0.2574714500760543, "grad_norm": 3.6127326488494873, "learning_rate": 9.934763205149414e-06, "loss": 3.0721, "step": 261350 }, { "epoch": 0.2575207080538764, "grad_norm": 2.2646238803863525, "learning_rate": 9.934738286111742e-06, "loss": 3.0592, "step": 261400 }, { "epoch": 0.2575699660316985, "grad_norm": 2.26383376121521, "learning_rate": 9.934713362346975e-06, "loss": 3.02, "step": 261450 }, { "epoch": 0.2576192240095206, "grad_norm": 2.250631332397461, "learning_rate": 9.934688433855141e-06, "loss": 3.0955, "step": 261500 }, { "epoch": 0.25766848198734266, "grad_norm": 2.442823886871338, "learning_rate": 9.93466350063626e-06, "loss": 3.036, "step": 261550 }, { "epoch": 0.2577177399651648, "grad_norm": 2.4431490898132324, "learning_rate": 9.93463856269036e-06, "loss": 3.1021, "step": 261600 }, { "epoch": 0.25776699794298685, "grad_norm": 2.3174731731414795, "learning_rate": 9.934613620017462e-06, "loss": 3.0641, "step": 261650 }, { "epoch": 0.2578162559208089, "grad_norm": 2.353742837905884, "learning_rate": 9.93458867261759e-06, "loss": 3.0665, "step": 261700 }, { "epoch": 0.25786551389863105, "grad_norm": 2.464116096496582, "learning_rate": 9.934563720490769e-06, "loss": 3.1115, "step": 261750 }, { "epoch": 0.2579147718764531, "grad_norm": 2.7770769596099854, "learning_rate": 9.934538763637021e-06, "loss": 3.0913, "step": 261800 }, { "epoch": 0.2579640298542752, "grad_norm": 2.283189535140991, "learning_rate": 9.934513802056372e-06, "loss": 3.0772, "step": 261850 }, { "epoch": 0.25801328783209726, "grad_norm": 2.1706981658935547, "learning_rate": 9.934488835748845e-06, "loss": 3.1118, "step": 261900 }, { "epoch": 0.2580625458099194, "grad_norm": 2.529280185699463, "learning_rate": 9.934463864714465e-06, "loss": 3.0571, "step": 261950 }, { "epoch": 0.25811180378774146, "grad_norm": 2.722412347793579, "learning_rate": 9.934438888953255e-06, "loss": 3.0161, "step": 262000 }, { "epoch": 0.2581610617655635, "grad_norm": 2.439823627471924, "learning_rate": 9.934413908465238e-06, "loss": 3.0327, "step": 262050 }, { "epoch": 0.25821031974338565, "grad_norm": 2.273712635040283, "learning_rate": 9.93438892325044e-06, "loss": 3.1111, "step": 262100 }, { "epoch": 0.2582595777212077, "grad_norm": 2.21352219581604, "learning_rate": 9.934363933308882e-06, "loss": 3.1152, "step": 262150 }, { "epoch": 0.2583088356990298, "grad_norm": 2.3663134574890137, "learning_rate": 9.93433893864059e-06, "loss": 3.1107, "step": 262200 }, { "epoch": 0.2583580936768519, "grad_norm": 2.292452335357666, "learning_rate": 9.93431393924559e-06, "loss": 3.0541, "step": 262250 }, { "epoch": 0.258407351654674, "grad_norm": 2.2005016803741455, "learning_rate": 9.9342889351239e-06, "loss": 2.9605, "step": 262300 }, { "epoch": 0.25845660963249606, "grad_norm": 2.263667583465576, "learning_rate": 9.93426392627555e-06, "loss": 3.1351, "step": 262350 }, { "epoch": 0.2585058676103182, "grad_norm": 2.321561336517334, "learning_rate": 9.934238912700562e-06, "loss": 3.0939, "step": 262400 }, { "epoch": 0.25855512558814026, "grad_norm": 2.230804681777954, "learning_rate": 9.93421389439896e-06, "loss": 3.0738, "step": 262450 }, { "epoch": 0.2586043835659623, "grad_norm": 2.3438355922698975, "learning_rate": 9.934188871370766e-06, "loss": 3.0903, "step": 262500 }, { "epoch": 0.25865364154378445, "grad_norm": 2.8147759437561035, "learning_rate": 9.934163843616005e-06, "loss": 3.077, "step": 262550 }, { "epoch": 0.2587028995216065, "grad_norm": 2.487534761428833, "learning_rate": 9.934138811134704e-06, "loss": 3.1012, "step": 262600 }, { "epoch": 0.2587521574994286, "grad_norm": 2.518249988555908, "learning_rate": 9.934113773926881e-06, "loss": 3.0756, "step": 262650 }, { "epoch": 0.2588014154772507, "grad_norm": 2.2651939392089844, "learning_rate": 9.934088731992565e-06, "loss": 3.0796, "step": 262700 }, { "epoch": 0.2588506734550728, "grad_norm": 2.461376667022705, "learning_rate": 9.934063685331778e-06, "loss": 3.0791, "step": 262750 }, { "epoch": 0.25889993143289486, "grad_norm": 2.3872203826904297, "learning_rate": 9.934038633944544e-06, "loss": 3.123, "step": 262800 }, { "epoch": 0.258949189410717, "grad_norm": 2.391319990158081, "learning_rate": 9.93401357783089e-06, "loss": 3.0479, "step": 262850 }, { "epoch": 0.25899844738853905, "grad_norm": 2.4777257442474365, "learning_rate": 9.933988516990836e-06, "loss": 2.9947, "step": 262900 }, { "epoch": 0.2590477053663611, "grad_norm": 2.2939376831054688, "learning_rate": 9.933963451424407e-06, "loss": 3.0737, "step": 262950 }, { "epoch": 0.25909696334418325, "grad_norm": 2.2882626056671143, "learning_rate": 9.933938381131626e-06, "loss": 3.146, "step": 263000 }, { "epoch": 0.2591462213220053, "grad_norm": 2.247429370880127, "learning_rate": 9.933913306112519e-06, "loss": 3.0689, "step": 263050 }, { "epoch": 0.2591954792998274, "grad_norm": 2.337207555770874, "learning_rate": 9.93388822636711e-06, "loss": 3.096, "step": 263100 }, { "epoch": 0.25924473727764946, "grad_norm": 2.3328146934509277, "learning_rate": 9.933863141895422e-06, "loss": 3.0534, "step": 263150 }, { "epoch": 0.2592939952554716, "grad_norm": 2.4184648990631104, "learning_rate": 9.93383805269748e-06, "loss": 3.0944, "step": 263200 }, { "epoch": 0.25934325323329366, "grad_norm": 2.757802963256836, "learning_rate": 9.933812958773307e-06, "loss": 3.1401, "step": 263250 }, { "epoch": 0.25939251121111573, "grad_norm": 2.634526014328003, "learning_rate": 9.933787860122929e-06, "loss": 3.1205, "step": 263300 }, { "epoch": 0.25944176918893785, "grad_norm": 2.460357189178467, "learning_rate": 9.933762756746366e-06, "loss": 3.0786, "step": 263350 }, { "epoch": 0.2594910271667599, "grad_norm": 2.6272666454315186, "learning_rate": 9.933737648643647e-06, "loss": 3.0671, "step": 263400 }, { "epoch": 0.259540285144582, "grad_norm": 2.2090325355529785, "learning_rate": 9.933712535814793e-06, "loss": 3.1257, "step": 263450 }, { "epoch": 0.2595895431224041, "grad_norm": 2.153813362121582, "learning_rate": 9.933687418259827e-06, "loss": 3.0344, "step": 263500 }, { "epoch": 0.2596388011002262, "grad_norm": 2.376289129257202, "learning_rate": 9.933662295978777e-06, "loss": 3.0392, "step": 263550 }, { "epoch": 0.25968805907804826, "grad_norm": 2.3167994022369385, "learning_rate": 9.933637168971664e-06, "loss": 3.075, "step": 263600 }, { "epoch": 0.2597373170558704, "grad_norm": 2.3211567401885986, "learning_rate": 9.933612037238513e-06, "loss": 3.078, "step": 263650 }, { "epoch": 0.25978657503369246, "grad_norm": 2.733569622039795, "learning_rate": 9.933586900779349e-06, "loss": 3.1042, "step": 263700 }, { "epoch": 0.2598358330115145, "grad_norm": 2.2014262676239014, "learning_rate": 9.933561759594194e-06, "loss": 3.0498, "step": 263750 }, { "epoch": 0.25988509098933665, "grad_norm": 2.3283870220184326, "learning_rate": 9.933536613683072e-06, "loss": 3.0603, "step": 263800 }, { "epoch": 0.2599343489671587, "grad_norm": 2.323721408843994, "learning_rate": 9.933511463046011e-06, "loss": 3.1603, "step": 263850 }, { "epoch": 0.2599836069449808, "grad_norm": 2.5439741611480713, "learning_rate": 9.933486307683032e-06, "loss": 3.056, "step": 263900 }, { "epoch": 0.2600328649228029, "grad_norm": 2.3531341552734375, "learning_rate": 9.933461147594157e-06, "loss": 3.1219, "step": 263950 }, { "epoch": 0.260082122900625, "grad_norm": 2.582170248031616, "learning_rate": 9.933435982779414e-06, "loss": 3.0702, "step": 264000 }, { "epoch": 0.26013138087844706, "grad_norm": 2.203338861465454, "learning_rate": 9.933410813238826e-06, "loss": 3.1252, "step": 264050 }, { "epoch": 0.2601806388562692, "grad_norm": 2.2364983558654785, "learning_rate": 9.933385638972415e-06, "loss": 3.0388, "step": 264100 }, { "epoch": 0.26022989683409126, "grad_norm": 2.5165584087371826, "learning_rate": 9.933360459980208e-06, "loss": 3.0387, "step": 264150 }, { "epoch": 0.2602791548119133, "grad_norm": 2.4020442962646484, "learning_rate": 9.933335276262228e-06, "loss": 3.0208, "step": 264200 }, { "epoch": 0.26032841278973545, "grad_norm": 2.365980625152588, "learning_rate": 9.9333100878185e-06, "loss": 3.0542, "step": 264250 }, { "epoch": 0.2603776707675575, "grad_norm": 2.275216579437256, "learning_rate": 9.933284894649046e-06, "loss": 3.0926, "step": 264300 }, { "epoch": 0.2604269287453796, "grad_norm": 2.225046396255493, "learning_rate": 9.933259696753891e-06, "loss": 3.167, "step": 264350 }, { "epoch": 0.26047618672320166, "grad_norm": 2.237882375717163, "learning_rate": 9.933234494133061e-06, "loss": 3.1036, "step": 264400 }, { "epoch": 0.2605254447010238, "grad_norm": 2.2599258422851562, "learning_rate": 9.933209286786579e-06, "loss": 3.0438, "step": 264450 }, { "epoch": 0.26057470267884586, "grad_norm": 2.489572525024414, "learning_rate": 9.933184074714469e-06, "loss": 3.0697, "step": 264500 }, { "epoch": 0.26062396065666793, "grad_norm": 2.3389358520507812, "learning_rate": 9.933158857916753e-06, "loss": 3.0055, "step": 264550 }, { "epoch": 0.26067321863449006, "grad_norm": 2.131216526031494, "learning_rate": 9.933133636393456e-06, "loss": 3.0557, "step": 264600 }, { "epoch": 0.2607224766123121, "grad_norm": 2.314331293106079, "learning_rate": 9.933108410144607e-06, "loss": 3.0307, "step": 264650 }, { "epoch": 0.2607717345901342, "grad_norm": 2.502854347229004, "learning_rate": 9.933083179170225e-06, "loss": 3.0467, "step": 264700 }, { "epoch": 0.2608209925679563, "grad_norm": 2.3536572456359863, "learning_rate": 9.933057943470334e-06, "loss": 3.0191, "step": 264750 }, { "epoch": 0.2608702505457784, "grad_norm": 2.1705939769744873, "learning_rate": 9.933032703044963e-06, "loss": 3.0696, "step": 264800 }, { "epoch": 0.26091950852360046, "grad_norm": 2.4820799827575684, "learning_rate": 9.933007457894131e-06, "loss": 3.1286, "step": 264850 }, { "epoch": 0.2609687665014226, "grad_norm": 2.2030234336853027, "learning_rate": 9.932982208017863e-06, "loss": 3.0509, "step": 264900 }, { "epoch": 0.26101802447924466, "grad_norm": 2.47273588180542, "learning_rate": 9.932956953416185e-06, "loss": 3.1139, "step": 264950 }, { "epoch": 0.26106728245706673, "grad_norm": 2.392012596130371, "learning_rate": 9.93293169408912e-06, "loss": 3.0688, "step": 265000 }, { "epoch": 0.26111654043488886, "grad_norm": 2.3819375038146973, "learning_rate": 9.932906430036695e-06, "loss": 3.0052, "step": 265050 }, { "epoch": 0.2611657984127109, "grad_norm": 2.3022165298461914, "learning_rate": 9.932881161258931e-06, "loss": 3.0962, "step": 265100 }, { "epoch": 0.261215056390533, "grad_norm": 2.3350555896759033, "learning_rate": 9.932855887755852e-06, "loss": 3.0902, "step": 265150 }, { "epoch": 0.2612643143683551, "grad_norm": 2.639866352081299, "learning_rate": 9.932830609527483e-06, "loss": 3.0366, "step": 265200 }, { "epoch": 0.2613135723461772, "grad_norm": 2.1750435829162598, "learning_rate": 9.93280532657385e-06, "loss": 3.0234, "step": 265250 }, { "epoch": 0.26136283032399926, "grad_norm": 2.2043814659118652, "learning_rate": 9.932780038894977e-06, "loss": 3.0747, "step": 265300 }, { "epoch": 0.2614120883018214, "grad_norm": 2.2014036178588867, "learning_rate": 9.932754746490885e-06, "loss": 3.1524, "step": 265350 }, { "epoch": 0.26146134627964346, "grad_norm": 2.2648870944976807, "learning_rate": 9.9327294493616e-06, "loss": 3.0915, "step": 265400 }, { "epoch": 0.26151060425746553, "grad_norm": 2.376891613006592, "learning_rate": 9.932704147507149e-06, "loss": 3.0386, "step": 265450 }, { "epoch": 0.26155986223528765, "grad_norm": 2.5101590156555176, "learning_rate": 9.93267884092755e-06, "loss": 3.0351, "step": 265500 }, { "epoch": 0.2616091202131097, "grad_norm": 2.0977275371551514, "learning_rate": 9.932653529622833e-06, "loss": 3.087, "step": 265550 }, { "epoch": 0.2616583781909318, "grad_norm": 2.2630743980407715, "learning_rate": 9.93262821359302e-06, "loss": 3.0467, "step": 265600 }, { "epoch": 0.26170763616875387, "grad_norm": 2.4610393047332764, "learning_rate": 9.932602892838135e-06, "loss": 3.0657, "step": 265650 }, { "epoch": 0.261756894146576, "grad_norm": 2.184044599533081, "learning_rate": 9.932577567358203e-06, "loss": 3.1085, "step": 265700 }, { "epoch": 0.26180615212439806, "grad_norm": 2.263251304626465, "learning_rate": 9.932552237153248e-06, "loss": 3.0666, "step": 265750 }, { "epoch": 0.26185541010222013, "grad_norm": 4.362361907958984, "learning_rate": 9.932526902223295e-06, "loss": 3.1286, "step": 265800 }, { "epoch": 0.26190466808004226, "grad_norm": 2.264329433441162, "learning_rate": 9.932501562568366e-06, "loss": 3.1373, "step": 265850 }, { "epoch": 0.26195392605786433, "grad_norm": 2.3149986267089844, "learning_rate": 9.932476218188487e-06, "loss": 3.0524, "step": 265900 }, { "epoch": 0.2620031840356864, "grad_norm": 2.3355472087860107, "learning_rate": 9.932450869083683e-06, "loss": 3.0105, "step": 265950 }, { "epoch": 0.2620524420135085, "grad_norm": 2.240657329559326, "learning_rate": 9.932425515253976e-06, "loss": 3.0171, "step": 266000 }, { "epoch": 0.2621016999913306, "grad_norm": 2.3302268981933594, "learning_rate": 9.932400156699393e-06, "loss": 3.0928, "step": 266050 }, { "epoch": 0.26215095796915266, "grad_norm": 2.484650135040283, "learning_rate": 9.932374793419957e-06, "loss": 3.1762, "step": 266100 }, { "epoch": 0.2622002159469748, "grad_norm": 2.251443386077881, "learning_rate": 9.93234942541569e-06, "loss": 3.0228, "step": 266150 }, { "epoch": 0.26224947392479686, "grad_norm": 2.3148317337036133, "learning_rate": 9.932324052686619e-06, "loss": 3.0373, "step": 266200 }, { "epoch": 0.26229873190261893, "grad_norm": 2.5024971961975098, "learning_rate": 9.93229867523277e-06, "loss": 3.0128, "step": 266250 }, { "epoch": 0.26234798988044106, "grad_norm": 2.3264265060424805, "learning_rate": 9.932273293054163e-06, "loss": 3.0972, "step": 266300 }, { "epoch": 0.2623972478582631, "grad_norm": 2.3888535499572754, "learning_rate": 9.932247906150826e-06, "loss": 3.1076, "step": 266350 }, { "epoch": 0.2624465058360852, "grad_norm": 2.172363519668579, "learning_rate": 9.93222251452278e-06, "loss": 3.058, "step": 266400 }, { "epoch": 0.2624957638139073, "grad_norm": 2.3558425903320312, "learning_rate": 9.932197118170052e-06, "loss": 3.0377, "step": 266450 }, { "epoch": 0.2625450217917294, "grad_norm": 2.2067978382110596, "learning_rate": 9.932171717092666e-06, "loss": 3.0166, "step": 266500 }, { "epoch": 0.26259427976955146, "grad_norm": 2.311547040939331, "learning_rate": 9.932146311290644e-06, "loss": 3.055, "step": 266550 }, { "epoch": 0.2626435377473736, "grad_norm": 2.256028890609741, "learning_rate": 9.932120900764014e-06, "loss": 3.1205, "step": 266600 }, { "epoch": 0.26269279572519566, "grad_norm": 2.339846611022949, "learning_rate": 9.932095485512798e-06, "loss": 3.0983, "step": 266650 }, { "epoch": 0.26274205370301773, "grad_norm": 2.1908113956451416, "learning_rate": 9.93207006553702e-06, "loss": 3.1126, "step": 266700 }, { "epoch": 0.26279131168083986, "grad_norm": 2.4471638202667236, "learning_rate": 9.932044640836704e-06, "loss": 3.0624, "step": 266750 }, { "epoch": 0.2628405696586619, "grad_norm": 2.3092570304870605, "learning_rate": 9.93201921141188e-06, "loss": 3.1238, "step": 266800 }, { "epoch": 0.262889827636484, "grad_norm": 2.2091095447540283, "learning_rate": 9.931993777262562e-06, "loss": 3.0867, "step": 266850 }, { "epoch": 0.26293908561430607, "grad_norm": 2.350799560546875, "learning_rate": 9.931968338388783e-06, "loss": 3.0603, "step": 266900 }, { "epoch": 0.2629883435921282, "grad_norm": 2.4707634449005127, "learning_rate": 9.931942894790564e-06, "loss": 3.0413, "step": 266950 }, { "epoch": 0.26303760156995026, "grad_norm": 2.3779118061065674, "learning_rate": 9.931917446467931e-06, "loss": 3.0396, "step": 267000 }, { "epoch": 0.26308685954777233, "grad_norm": 2.4660568237304688, "learning_rate": 9.931891993420906e-06, "loss": 3.0844, "step": 267050 }, { "epoch": 0.26313611752559446, "grad_norm": 2.157242774963379, "learning_rate": 9.931866535649515e-06, "loss": 3.0511, "step": 267100 }, { "epoch": 0.26318537550341653, "grad_norm": 2.20387864112854, "learning_rate": 9.931841073153783e-06, "loss": 3.0741, "step": 267150 }, { "epoch": 0.2632346334812386, "grad_norm": 2.4419519901275635, "learning_rate": 9.931815605933733e-06, "loss": 3.088, "step": 267200 }, { "epoch": 0.2632838914590607, "grad_norm": 2.4699862003326416, "learning_rate": 9.931790133989389e-06, "loss": 3.0822, "step": 267250 }, { "epoch": 0.2633331494368828, "grad_norm": 2.392817258834839, "learning_rate": 9.931764657320778e-06, "loss": 3.0149, "step": 267300 }, { "epoch": 0.26338240741470487, "grad_norm": 2.3471341133117676, "learning_rate": 9.93173917592792e-06, "loss": 3.1146, "step": 267350 }, { "epoch": 0.263431665392527, "grad_norm": 2.19142484664917, "learning_rate": 9.931713689810843e-06, "loss": 3.0603, "step": 267400 }, { "epoch": 0.26348092337034906, "grad_norm": 2.2037861347198486, "learning_rate": 9.931688198969571e-06, "loss": 3.0948, "step": 267450 }, { "epoch": 0.26353018134817113, "grad_norm": 2.2188687324523926, "learning_rate": 9.931662703404128e-06, "loss": 3.0214, "step": 267500 }, { "epoch": 0.26357943932599326, "grad_norm": 2.434837579727173, "learning_rate": 9.931637203114538e-06, "loss": 2.9666, "step": 267550 }, { "epoch": 0.26362869730381533, "grad_norm": 2.5141408443450928, "learning_rate": 9.931611698100826e-06, "loss": 3.0448, "step": 267600 }, { "epoch": 0.2636779552816374, "grad_norm": 2.3356690406799316, "learning_rate": 9.931586188363016e-06, "loss": 3.0299, "step": 267650 }, { "epoch": 0.2637272132594595, "grad_norm": 2.327857732772827, "learning_rate": 9.931560673901133e-06, "loss": 2.9855, "step": 267700 }, { "epoch": 0.2637764712372816, "grad_norm": 2.3119494915008545, "learning_rate": 9.9315351547152e-06, "loss": 3.0294, "step": 267750 }, { "epoch": 0.26382572921510367, "grad_norm": 2.3527722358703613, "learning_rate": 9.931509630805244e-06, "loss": 3.0841, "step": 267800 }, { "epoch": 0.2638749871929258, "grad_norm": 2.3760812282562256, "learning_rate": 9.931484102171287e-06, "loss": 3.1242, "step": 267850 }, { "epoch": 0.26392424517074786, "grad_norm": 2.3739471435546875, "learning_rate": 9.931458568813354e-06, "loss": 3.1044, "step": 267900 }, { "epoch": 0.26397350314856993, "grad_norm": 2.1638565063476562, "learning_rate": 9.93143303073147e-06, "loss": 2.9749, "step": 267950 }, { "epoch": 0.26402276112639206, "grad_norm": 2.283241033554077, "learning_rate": 9.93140748792566e-06, "loss": 3.1227, "step": 268000 }, { "epoch": 0.26407201910421413, "grad_norm": 2.333972692489624, "learning_rate": 9.931381940395949e-06, "loss": 3.1183, "step": 268050 }, { "epoch": 0.2641212770820362, "grad_norm": 2.2984774112701416, "learning_rate": 9.931356388142358e-06, "loss": 3.0598, "step": 268100 }, { "epoch": 0.26417053505985827, "grad_norm": 2.1914758682250977, "learning_rate": 9.931330831164913e-06, "loss": 3.0551, "step": 268150 }, { "epoch": 0.2642197930376804, "grad_norm": 2.3171284198760986, "learning_rate": 9.931305269463642e-06, "loss": 3.088, "step": 268200 }, { "epoch": 0.26426905101550247, "grad_norm": 2.267775297164917, "learning_rate": 9.931279703038566e-06, "loss": 3.037, "step": 268250 }, { "epoch": 0.26431830899332454, "grad_norm": 2.392009973526001, "learning_rate": 9.93125413188971e-06, "loss": 3.0467, "step": 268300 }, { "epoch": 0.26436756697114666, "grad_norm": 2.2809956073760986, "learning_rate": 9.931228556017098e-06, "loss": 3.1192, "step": 268350 }, { "epoch": 0.26441682494896873, "grad_norm": 2.3661491870880127, "learning_rate": 9.931202975420754e-06, "loss": 3.0382, "step": 268400 }, { "epoch": 0.2644660829267908, "grad_norm": 2.132511615753174, "learning_rate": 9.931177390100707e-06, "loss": 3.0897, "step": 268450 }, { "epoch": 0.26451534090461293, "grad_norm": 2.3445851802825928, "learning_rate": 9.931151800056974e-06, "loss": 3.0581, "step": 268500 }, { "epoch": 0.264564598882435, "grad_norm": 2.1925172805786133, "learning_rate": 9.931126205289587e-06, "loss": 3.1101, "step": 268550 }, { "epoch": 0.26461385686025707, "grad_norm": 2.3380303382873535, "learning_rate": 9.931100605798567e-06, "loss": 3.1113, "step": 268600 }, { "epoch": 0.2646631148380792, "grad_norm": 2.3160200119018555, "learning_rate": 9.931075001583939e-06, "loss": 3.0643, "step": 268650 }, { "epoch": 0.26471237281590126, "grad_norm": 2.8013155460357666, "learning_rate": 9.931049392645725e-06, "loss": 3.0196, "step": 268700 }, { "epoch": 0.26476163079372333, "grad_norm": 2.285468101501465, "learning_rate": 9.931023778983953e-06, "loss": 3.0136, "step": 268750 }, { "epoch": 0.26481088877154546, "grad_norm": 2.200941801071167, "learning_rate": 9.930998160598648e-06, "loss": 3.0095, "step": 268800 }, { "epoch": 0.26486014674936753, "grad_norm": 2.4813740253448486, "learning_rate": 9.93097253748983e-06, "loss": 3.0315, "step": 268850 }, { "epoch": 0.2649094047271896, "grad_norm": 2.2768115997314453, "learning_rate": 9.930946909657528e-06, "loss": 2.9886, "step": 268900 }, { "epoch": 0.2649586627050117, "grad_norm": 2.332987070083618, "learning_rate": 9.930921277101764e-06, "loss": 3.0596, "step": 268950 }, { "epoch": 0.2650079206828338, "grad_norm": 2.2726166248321533, "learning_rate": 9.930895639822566e-06, "loss": 3.1239, "step": 269000 }, { "epoch": 0.26505717866065587, "grad_norm": 2.167916774749756, "learning_rate": 9.930869997819954e-06, "loss": 2.9781, "step": 269050 }, { "epoch": 0.265106436638478, "grad_norm": 2.412663698196411, "learning_rate": 9.930844351093956e-06, "loss": 3.0431, "step": 269100 }, { "epoch": 0.26515569461630006, "grad_norm": 2.2605154514312744, "learning_rate": 9.930818699644594e-06, "loss": 3.0331, "step": 269150 }, { "epoch": 0.26520495259412213, "grad_norm": 2.4331347942352295, "learning_rate": 9.930793043471893e-06, "loss": 3.0291, "step": 269200 }, { "epoch": 0.2652542105719442, "grad_norm": 2.2273125648498535, "learning_rate": 9.930767382575881e-06, "loss": 3.0805, "step": 269250 }, { "epoch": 0.26530346854976633, "grad_norm": 2.470411539077759, "learning_rate": 9.930741716956578e-06, "loss": 3.0566, "step": 269300 }, { "epoch": 0.2653527265275884, "grad_norm": 2.1139142513275146, "learning_rate": 9.93071604661401e-06, "loss": 3.0558, "step": 269350 }, { "epoch": 0.26540198450541047, "grad_norm": 2.672767162322998, "learning_rate": 9.930690371548204e-06, "loss": 3.1067, "step": 269400 }, { "epoch": 0.2654512424832326, "grad_norm": 2.3223862648010254, "learning_rate": 9.930664691759182e-06, "loss": 3.0552, "step": 269450 }, { "epoch": 0.26550050046105467, "grad_norm": 2.2276573181152344, "learning_rate": 9.93063900724697e-06, "loss": 3.1189, "step": 269500 }, { "epoch": 0.26554975843887674, "grad_norm": 2.4031879901885986, "learning_rate": 9.93061331801159e-06, "loss": 3.0826, "step": 269550 }, { "epoch": 0.26559901641669886, "grad_norm": 2.3633084297180176, "learning_rate": 9.93058762405307e-06, "loss": 3.0899, "step": 269600 }, { "epoch": 0.26564827439452093, "grad_norm": 2.3614721298217773, "learning_rate": 9.930561925371434e-06, "loss": 3.0562, "step": 269650 }, { "epoch": 0.265697532372343, "grad_norm": 2.260119915008545, "learning_rate": 9.930536221966704e-06, "loss": 3.064, "step": 269700 }, { "epoch": 0.26574679035016513, "grad_norm": 2.4764583110809326, "learning_rate": 9.930510513838907e-06, "loss": 3.0648, "step": 269750 }, { "epoch": 0.2657960483279872, "grad_norm": 2.397484064102173, "learning_rate": 9.930484800988067e-06, "loss": 3.1114, "step": 269800 }, { "epoch": 0.26584530630580927, "grad_norm": 2.2097394466400146, "learning_rate": 9.93045908341421e-06, "loss": 3.0295, "step": 269850 }, { "epoch": 0.2658945642836314, "grad_norm": 2.3395190238952637, "learning_rate": 9.930433361117358e-06, "loss": 3.0029, "step": 269900 }, { "epoch": 0.26594382226145347, "grad_norm": 2.3066799640655518, "learning_rate": 9.930407634097536e-06, "loss": 3.0202, "step": 269950 }, { "epoch": 0.26599308023927554, "grad_norm": 2.244288444519043, "learning_rate": 9.930381902354772e-06, "loss": 3.0024, "step": 270000 }, { "epoch": 0.26604233821709766, "grad_norm": 2.343271493911743, "learning_rate": 9.930356165889086e-06, "loss": 3.0317, "step": 270050 }, { "epoch": 0.26609159619491973, "grad_norm": 2.3791308403015137, "learning_rate": 9.930330424700505e-06, "loss": 3.1241, "step": 270100 }, { "epoch": 0.2661408541727418, "grad_norm": 2.274815797805786, "learning_rate": 9.930304678789053e-06, "loss": 3.0702, "step": 270150 }, { "epoch": 0.26619011215056393, "grad_norm": 2.459787130355835, "learning_rate": 9.930278928154757e-06, "loss": 3.0791, "step": 270200 }, { "epoch": 0.266239370128386, "grad_norm": 2.2481462955474854, "learning_rate": 9.930253172797639e-06, "loss": 3.1322, "step": 270250 }, { "epoch": 0.26628862810620807, "grad_norm": 2.261033058166504, "learning_rate": 9.930227412717726e-06, "loss": 3.027, "step": 270300 }, { "epoch": 0.2663378860840302, "grad_norm": 2.3554444313049316, "learning_rate": 9.930201647915039e-06, "loss": 3.1461, "step": 270350 }, { "epoch": 0.26638714406185227, "grad_norm": 2.144186019897461, "learning_rate": 9.930175878389606e-06, "loss": 3.0161, "step": 270400 }, { "epoch": 0.26643640203967434, "grad_norm": 2.2613089084625244, "learning_rate": 9.930150104141449e-06, "loss": 2.9733, "step": 270450 }, { "epoch": 0.2664856600174964, "grad_norm": 2.3649911880493164, "learning_rate": 9.930124325170598e-06, "loss": 3.0628, "step": 270500 }, { "epoch": 0.26653491799531853, "grad_norm": 2.3926491737365723, "learning_rate": 9.93009854147707e-06, "loss": 3.0291, "step": 270550 }, { "epoch": 0.2665841759731406, "grad_norm": 2.2569563388824463, "learning_rate": 9.930072753060895e-06, "loss": 3.0413, "step": 270600 }, { "epoch": 0.2666334339509627, "grad_norm": 2.189606189727783, "learning_rate": 9.930046959922097e-06, "loss": 3.0346, "step": 270650 }, { "epoch": 0.2666826919287848, "grad_norm": 2.42091703414917, "learning_rate": 9.9300211620607e-06, "loss": 3.0232, "step": 270700 }, { "epoch": 0.26673194990660687, "grad_norm": 2.3920516967773438, "learning_rate": 9.929995359476728e-06, "loss": 3.1093, "step": 270750 }, { "epoch": 0.26678120788442894, "grad_norm": 2.4465250968933105, "learning_rate": 9.929969552170207e-06, "loss": 3.1086, "step": 270800 }, { "epoch": 0.26683046586225107, "grad_norm": 2.2704055309295654, "learning_rate": 9.929943740141161e-06, "loss": 2.9978, "step": 270850 }, { "epoch": 0.26687972384007314, "grad_norm": 2.15385103225708, "learning_rate": 9.929917923389616e-06, "loss": 3.0402, "step": 270900 }, { "epoch": 0.2669289818178952, "grad_norm": 2.4136264324188232, "learning_rate": 9.929892101915595e-06, "loss": 3.0892, "step": 270950 }, { "epoch": 0.26697823979571733, "grad_norm": 2.185389280319214, "learning_rate": 9.929866275719123e-06, "loss": 3.0925, "step": 271000 }, { "epoch": 0.2670274977735394, "grad_norm": 2.221203088760376, "learning_rate": 9.929840444800225e-06, "loss": 3.0896, "step": 271050 }, { "epoch": 0.26707675575136147, "grad_norm": 2.408501386642456, "learning_rate": 9.929814609158927e-06, "loss": 3.0498, "step": 271100 }, { "epoch": 0.2671260137291836, "grad_norm": 2.5423808097839355, "learning_rate": 9.929788768795251e-06, "loss": 3.0229, "step": 271150 }, { "epoch": 0.26717527170700567, "grad_norm": 2.1282596588134766, "learning_rate": 9.929762923709225e-06, "loss": 3.0489, "step": 271200 }, { "epoch": 0.26722452968482774, "grad_norm": 2.2854952812194824, "learning_rate": 9.929737073900872e-06, "loss": 3.0191, "step": 271250 }, { "epoch": 0.26727378766264986, "grad_norm": 2.2662932872772217, "learning_rate": 9.929711219370215e-06, "loss": 3.0382, "step": 271300 }, { "epoch": 0.26732304564047193, "grad_norm": 2.3308775424957275, "learning_rate": 9.929685360117282e-06, "loss": 3.0798, "step": 271350 }, { "epoch": 0.267372303618294, "grad_norm": 2.184906244277954, "learning_rate": 9.929659496142097e-06, "loss": 3.0082, "step": 271400 }, { "epoch": 0.26742156159611613, "grad_norm": 2.2457683086395264, "learning_rate": 9.929633627444683e-06, "loss": 3.0844, "step": 271450 }, { "epoch": 0.2674708195739382, "grad_norm": 2.2955844402313232, "learning_rate": 9.929607754025069e-06, "loss": 3.1065, "step": 271500 }, { "epoch": 0.26752007755176027, "grad_norm": 2.494981050491333, "learning_rate": 9.929581875883273e-06, "loss": 3.0446, "step": 271550 }, { "epoch": 0.2675693355295824, "grad_norm": 2.127133846282959, "learning_rate": 9.929555993019324e-06, "loss": 3.0748, "step": 271600 }, { "epoch": 0.26761859350740447, "grad_norm": 2.496340036392212, "learning_rate": 9.929530105433248e-06, "loss": 3.0314, "step": 271650 }, { "epoch": 0.26766785148522654, "grad_norm": 2.3220291137695312, "learning_rate": 9.929504213125068e-06, "loss": 3.0969, "step": 271700 }, { "epoch": 0.2677171094630486, "grad_norm": 2.497211217880249, "learning_rate": 9.92947831609481e-06, "loss": 3.0178, "step": 271750 }, { "epoch": 0.26776636744087073, "grad_norm": 2.320970296859741, "learning_rate": 9.929452414342496e-06, "loss": 3.0464, "step": 271800 }, { "epoch": 0.2678156254186928, "grad_norm": 2.276566505432129, "learning_rate": 9.929426507868151e-06, "loss": 3.0418, "step": 271850 }, { "epoch": 0.2678648833965149, "grad_norm": 2.424804210662842, "learning_rate": 9.929400596671804e-06, "loss": 3.1204, "step": 271900 }, { "epoch": 0.267914141374337, "grad_norm": 2.230098009109497, "learning_rate": 9.929374680753477e-06, "loss": 3.129, "step": 271950 }, { "epoch": 0.26796339935215907, "grad_norm": 2.2621920108795166, "learning_rate": 9.929348760113196e-06, "loss": 3.0144, "step": 272000 }, { "epoch": 0.26801265732998114, "grad_norm": 2.318976402282715, "learning_rate": 9.929322834750983e-06, "loss": 3.1055, "step": 272050 }, { "epoch": 0.26806191530780327, "grad_norm": 2.242457866668701, "learning_rate": 9.929296904666865e-06, "loss": 3.0869, "step": 272100 }, { "epoch": 0.26811117328562534, "grad_norm": 2.2995502948760986, "learning_rate": 9.929270969860868e-06, "loss": 3.0195, "step": 272150 }, { "epoch": 0.2681604312634474, "grad_norm": 2.282332181930542, "learning_rate": 9.929245030333012e-06, "loss": 3.063, "step": 272200 }, { "epoch": 0.26820968924126953, "grad_norm": 2.1786437034606934, "learning_rate": 9.929219086083328e-06, "loss": 3.0731, "step": 272250 }, { "epoch": 0.2682589472190916, "grad_norm": 2.2779195308685303, "learning_rate": 9.92919313711184e-06, "loss": 3.0209, "step": 272300 }, { "epoch": 0.2683082051969137, "grad_norm": 2.3118577003479004, "learning_rate": 9.929167183418567e-06, "loss": 3.0956, "step": 272350 }, { "epoch": 0.2683574631747358, "grad_norm": 2.2368698120117188, "learning_rate": 9.929141225003538e-06, "loss": 3.0833, "step": 272400 }, { "epoch": 0.26840672115255787, "grad_norm": 2.3733928203582764, "learning_rate": 9.92911526186678e-06, "loss": 2.9921, "step": 272450 }, { "epoch": 0.26845597913037994, "grad_norm": 2.183927297592163, "learning_rate": 9.929089294008315e-06, "loss": 3.0316, "step": 272500 }, { "epoch": 0.26850523710820207, "grad_norm": 2.3431320190429688, "learning_rate": 9.929063321428168e-06, "loss": 3.0807, "step": 272550 }, { "epoch": 0.26855449508602414, "grad_norm": 2.522692918777466, "learning_rate": 9.929037344126365e-06, "loss": 3.0794, "step": 272600 }, { "epoch": 0.2686037530638462, "grad_norm": 2.2585055828094482, "learning_rate": 9.929011362102929e-06, "loss": 3.1138, "step": 272650 }, { "epoch": 0.26865301104166833, "grad_norm": 2.2940473556518555, "learning_rate": 9.928985375357887e-06, "loss": 3.0833, "step": 272700 }, { "epoch": 0.2687022690194904, "grad_norm": 2.331890821456909, "learning_rate": 9.928959383891263e-06, "loss": 3.0485, "step": 272750 }, { "epoch": 0.2687515269973125, "grad_norm": 2.3480982780456543, "learning_rate": 9.928933387703081e-06, "loss": 3.035, "step": 272800 }, { "epoch": 0.2688007849751346, "grad_norm": 2.298905611038208, "learning_rate": 9.928907386793367e-06, "loss": 3.0126, "step": 272850 }, { "epoch": 0.26885004295295667, "grad_norm": 2.1908907890319824, "learning_rate": 9.928881381162147e-06, "loss": 3.0541, "step": 272900 }, { "epoch": 0.26889930093077874, "grad_norm": 2.188222885131836, "learning_rate": 9.928855370809443e-06, "loss": 3.119, "step": 272950 }, { "epoch": 0.2689485589086008, "grad_norm": 2.541836738586426, "learning_rate": 9.928829355735283e-06, "loss": 3.0236, "step": 273000 }, { "epoch": 0.26899781688642294, "grad_norm": 2.7478833198547363, "learning_rate": 9.92880333593969e-06, "loss": 3.1098, "step": 273050 }, { "epoch": 0.269047074864245, "grad_norm": 2.185455560684204, "learning_rate": 9.928777311422689e-06, "loss": 3.0006, "step": 273100 }, { "epoch": 0.2690963328420671, "grad_norm": 2.260162830352783, "learning_rate": 9.928751282184305e-06, "loss": 3.0833, "step": 273150 }, { "epoch": 0.2691455908198892, "grad_norm": 2.1929636001586914, "learning_rate": 9.928725248224562e-06, "loss": 3.0352, "step": 273200 }, { "epoch": 0.2691948487977113, "grad_norm": 2.374878168106079, "learning_rate": 9.928699209543489e-06, "loss": 3.0867, "step": 273250 }, { "epoch": 0.26924410677553334, "grad_norm": 2.516448497772217, "learning_rate": 9.928673166141106e-06, "loss": 2.986, "step": 273300 }, { "epoch": 0.26929336475335547, "grad_norm": 2.273820161819458, "learning_rate": 9.92864711801744e-06, "loss": 3.0314, "step": 273350 }, { "epoch": 0.26934262273117754, "grad_norm": 2.3430898189544678, "learning_rate": 9.928621065172518e-06, "loss": 3.1224, "step": 273400 }, { "epoch": 0.2693918807089996, "grad_norm": 2.2021443843841553, "learning_rate": 9.92859500760636e-06, "loss": 3.0769, "step": 273450 }, { "epoch": 0.26944113868682174, "grad_norm": 2.352940320968628, "learning_rate": 9.928568945318997e-06, "loss": 3.0709, "step": 273500 }, { "epoch": 0.2694903966646438, "grad_norm": 2.5808804035186768, "learning_rate": 9.92854287831045e-06, "loss": 2.9843, "step": 273550 }, { "epoch": 0.2695396546424659, "grad_norm": 2.486743688583374, "learning_rate": 9.928516806580745e-06, "loss": 3.0733, "step": 273600 }, { "epoch": 0.269588912620288, "grad_norm": 2.575140953063965, "learning_rate": 9.928490730129904e-06, "loss": 3.0852, "step": 273650 }, { "epoch": 0.26963817059811007, "grad_norm": 2.3352348804473877, "learning_rate": 9.928464648957958e-06, "loss": 3.0906, "step": 273700 }, { "epoch": 0.26968742857593214, "grad_norm": 2.220576524734497, "learning_rate": 9.928438563064928e-06, "loss": 3.0577, "step": 273750 }, { "epoch": 0.26973668655375427, "grad_norm": 2.4445624351501465, "learning_rate": 9.928412472450842e-06, "loss": 3.0408, "step": 273800 }, { "epoch": 0.26978594453157634, "grad_norm": 2.130986213684082, "learning_rate": 9.928386377115719e-06, "loss": 3.0116, "step": 273850 }, { "epoch": 0.2698352025093984, "grad_norm": 2.1336708068847656, "learning_rate": 9.92836027705959e-06, "loss": 3.1137, "step": 273900 }, { "epoch": 0.26988446048722053, "grad_norm": 2.2219667434692383, "learning_rate": 9.928334172282477e-06, "loss": 3.1239, "step": 273950 }, { "epoch": 0.2699337184650426, "grad_norm": 2.501338005065918, "learning_rate": 9.928308062784406e-06, "loss": 3.0486, "step": 274000 }, { "epoch": 0.2699829764428647, "grad_norm": 2.1796624660491943, "learning_rate": 9.928281948565402e-06, "loss": 3.1381, "step": 274050 }, { "epoch": 0.2700322344206868, "grad_norm": 2.40246844291687, "learning_rate": 9.928255829625488e-06, "loss": 3.0751, "step": 274100 }, { "epoch": 0.27008149239850887, "grad_norm": 2.3338255882263184, "learning_rate": 9.928229705964692e-06, "loss": 3.0246, "step": 274150 }, { "epoch": 0.27013075037633094, "grad_norm": 2.2065982818603516, "learning_rate": 9.928203577583038e-06, "loss": 3.0701, "step": 274200 }, { "epoch": 0.270180008354153, "grad_norm": 2.411497116088867, "learning_rate": 9.92817744448055e-06, "loss": 3.1053, "step": 274250 }, { "epoch": 0.27022926633197514, "grad_norm": 2.2643861770629883, "learning_rate": 9.928151306657257e-06, "loss": 3.0841, "step": 274300 }, { "epoch": 0.2702785243097972, "grad_norm": 2.32452392578125, "learning_rate": 9.999550306114007e-06, "loss": 3.0754, "step": 274350 }, { "epoch": 0.2703277822876193, "grad_norm": 2.2244744300842285, "learning_rate": 9.999548118816721e-06, "loss": 3.0503, "step": 274400 }, { "epoch": 0.2703770402654414, "grad_norm": 2.102048635482788, "learning_rate": 9.9995459262131e-06, "loss": 3.0734, "step": 274450 }, { "epoch": 0.2704262982432635, "grad_norm": 2.422478675842285, "learning_rate": 9.999543728303145e-06, "loss": 3.0853, "step": 274500 }, { "epoch": 0.27047555622108554, "grad_norm": 2.229930877685547, "learning_rate": 9.999541525086861e-06, "loss": 3.0648, "step": 274550 }, { "epoch": 0.27052481419890767, "grad_norm": 2.4857280254364014, "learning_rate": 9.999539316564249e-06, "loss": 3.0386, "step": 274600 }, { "epoch": 0.27057407217672974, "grad_norm": 2.320030689239502, "learning_rate": 9.999537102735309e-06, "loss": 3.0781, "step": 274650 }, { "epoch": 0.2706233301545518, "grad_norm": 2.167858362197876, "learning_rate": 9.999534883600047e-06, "loss": 3.1322, "step": 274700 }, { "epoch": 0.27067258813237394, "grad_norm": 2.2518534660339355, "learning_rate": 9.999532659158462e-06, "loss": 3.0216, "step": 274750 }, { "epoch": 0.270721846110196, "grad_norm": 2.3899853229522705, "learning_rate": 9.99953042941056e-06, "loss": 3.0256, "step": 274800 }, { "epoch": 0.2707711040880181, "grad_norm": 2.4551777839660645, "learning_rate": 9.999528194356338e-06, "loss": 3.0872, "step": 274850 }, { "epoch": 0.2708203620658402, "grad_norm": 2.323352098464966, "learning_rate": 9.999525953995805e-06, "loss": 3.0886, "step": 274900 }, { "epoch": 0.2708696200436623, "grad_norm": 2.2773430347442627, "learning_rate": 9.999523708328959e-06, "loss": 3.085, "step": 274950 }, { "epoch": 0.27091887802148434, "grad_norm": 2.1326823234558105, "learning_rate": 9.999521457355803e-06, "loss": 3.1329, "step": 275000 }, { "epoch": 0.27096813599930647, "grad_norm": 2.406972646713257, "learning_rate": 9.99951920107634e-06, "loss": 3.047, "step": 275050 }, { "epoch": 0.27101739397712854, "grad_norm": 2.329613208770752, "learning_rate": 9.999516939490571e-06, "loss": 3.0738, "step": 275100 }, { "epoch": 0.2710666519549506, "grad_norm": 2.3396027088165283, "learning_rate": 9.999514672598502e-06, "loss": 3.0553, "step": 275150 }, { "epoch": 0.27111590993277274, "grad_norm": 2.7780160903930664, "learning_rate": 9.999512400400132e-06, "loss": 3.0506, "step": 275200 }, { "epoch": 0.2711651679105948, "grad_norm": 2.3778438568115234, "learning_rate": 9.999510122895465e-06, "loss": 3.041, "step": 275250 }, { "epoch": 0.2712144258884169, "grad_norm": 2.399900436401367, "learning_rate": 9.999507840084504e-06, "loss": 3.1217, "step": 275300 }, { "epoch": 0.271263683866239, "grad_norm": 2.2647435665130615, "learning_rate": 9.999505551967248e-06, "loss": 3.0607, "step": 275350 }, { "epoch": 0.2713129418440611, "grad_norm": 2.193024158477783, "learning_rate": 9.999503258543701e-06, "loss": 3.0361, "step": 275400 }, { "epoch": 0.27136219982188314, "grad_norm": 2.6412267684936523, "learning_rate": 9.99950095981387e-06, "loss": 3.0924, "step": 275450 }, { "epoch": 0.2714114577997052, "grad_norm": 2.4857845306396484, "learning_rate": 9.99949865577775e-06, "loss": 3.0801, "step": 275500 }, { "epoch": 0.27146071577752734, "grad_norm": 2.2095165252685547, "learning_rate": 9.99949634643535e-06, "loss": 2.9726, "step": 275550 }, { "epoch": 0.2715099737553494, "grad_norm": 2.298539638519287, "learning_rate": 9.999494031786666e-06, "loss": 3.0459, "step": 275600 }, { "epoch": 0.2715592317331715, "grad_norm": 2.4887797832489014, "learning_rate": 9.999491711831708e-06, "loss": 3.1113, "step": 275650 }, { "epoch": 0.2716084897109936, "grad_norm": 2.295506000518799, "learning_rate": 9.999489386570473e-06, "loss": 2.996, "step": 275700 }, { "epoch": 0.2716577476888157, "grad_norm": 2.3383877277374268, "learning_rate": 9.999487056002965e-06, "loss": 3.0084, "step": 275750 }, { "epoch": 0.27170700566663775, "grad_norm": 2.214759588241577, "learning_rate": 9.999484720129185e-06, "loss": 2.9972, "step": 275800 }, { "epoch": 0.2717562636444599, "grad_norm": 2.309497117996216, "learning_rate": 9.999482378949136e-06, "loss": 3.0523, "step": 275850 }, { "epoch": 0.27180552162228194, "grad_norm": 2.2757134437561035, "learning_rate": 9.999480032462826e-06, "loss": 3.0658, "step": 275900 }, { "epoch": 0.271854779600104, "grad_norm": 2.424730062484741, "learning_rate": 9.999477680670247e-06, "loss": 3.0694, "step": 275950 }, { "epoch": 0.27190403757792614, "grad_norm": 2.357182264328003, "learning_rate": 9.999475323571412e-06, "loss": 3.0474, "step": 276000 }, { "epoch": 0.2719532955557482, "grad_norm": 2.403914213180542, "learning_rate": 9.999472961166317e-06, "loss": 3.0675, "step": 276050 }, { "epoch": 0.2720025535335703, "grad_norm": 2.138929605484009, "learning_rate": 9.999470593454965e-06, "loss": 3.0148, "step": 276100 }, { "epoch": 0.2720518115113924, "grad_norm": 2.279961347579956, "learning_rate": 9.999468220437362e-06, "loss": 3.0594, "step": 276150 }, { "epoch": 0.2721010694892145, "grad_norm": 2.349294662475586, "learning_rate": 9.999465842113508e-06, "loss": 3.082, "step": 276200 }, { "epoch": 0.27215032746703655, "grad_norm": 2.2538154125213623, "learning_rate": 9.999463458483405e-06, "loss": 3.0437, "step": 276250 }, { "epoch": 0.27219958544485867, "grad_norm": 2.328101873397827, "learning_rate": 9.999461069547058e-06, "loss": 3.0793, "step": 276300 }, { "epoch": 0.27224884342268074, "grad_norm": 2.2643866539001465, "learning_rate": 9.999458675304468e-06, "loss": 3.0896, "step": 276350 }, { "epoch": 0.2722981014005028, "grad_norm": 2.397108554840088, "learning_rate": 9.999456275755636e-06, "loss": 2.9836, "step": 276400 }, { "epoch": 0.27234735937832494, "grad_norm": 2.1680667400360107, "learning_rate": 9.999453870900565e-06, "loss": 3.007, "step": 276450 }, { "epoch": 0.272396617356147, "grad_norm": 2.438023567199707, "learning_rate": 9.999451460739261e-06, "loss": 3.0018, "step": 276500 }, { "epoch": 0.2724458753339691, "grad_norm": 2.4156405925750732, "learning_rate": 9.999449045271723e-06, "loss": 3.0385, "step": 276550 }, { "epoch": 0.2724951333117912, "grad_norm": 2.2574119567871094, "learning_rate": 9.999446624497955e-06, "loss": 3.0362, "step": 276600 }, { "epoch": 0.2725443912896133, "grad_norm": 2.3206326961517334, "learning_rate": 9.999444198417959e-06, "loss": 3.0519, "step": 276650 }, { "epoch": 0.27259364926743535, "grad_norm": 2.3654370307922363, "learning_rate": 9.999441767031737e-06, "loss": 3.0711, "step": 276700 }, { "epoch": 0.2726429072452574, "grad_norm": 2.2183473110198975, "learning_rate": 9.999439330339295e-06, "loss": 3.0429, "step": 276750 }, { "epoch": 0.27269216522307954, "grad_norm": 2.310246229171753, "learning_rate": 9.999436888340631e-06, "loss": 3.0839, "step": 276800 }, { "epoch": 0.2727414232009016, "grad_norm": 2.2874252796173096, "learning_rate": 9.99943444103575e-06, "loss": 3.0924, "step": 276850 }, { "epoch": 0.2727906811787237, "grad_norm": 2.253023624420166, "learning_rate": 9.999431988424655e-06, "loss": 3.08, "step": 276900 }, { "epoch": 0.2728399391565458, "grad_norm": 2.5274083614349365, "learning_rate": 9.999429530507347e-06, "loss": 3.037, "step": 276950 }, { "epoch": 0.2728891971343679, "grad_norm": 2.3758950233459473, "learning_rate": 9.999427067283832e-06, "loss": 3.0963, "step": 277000 }, { "epoch": 0.27293845511218995, "grad_norm": 2.412848711013794, "learning_rate": 9.999424598754106e-06, "loss": 3.0997, "step": 277050 }, { "epoch": 0.2729877130900121, "grad_norm": 2.5760467052459717, "learning_rate": 9.999422124918177e-06, "loss": 3.1185, "step": 277100 }, { "epoch": 0.27303697106783414, "grad_norm": 2.035867929458618, "learning_rate": 9.999419645776049e-06, "loss": 2.9927, "step": 277150 }, { "epoch": 0.2730862290456562, "grad_norm": 2.4078593254089355, "learning_rate": 9.999417161327717e-06, "loss": 3.0616, "step": 277200 }, { "epoch": 0.27313548702347834, "grad_norm": 2.224599838256836, "learning_rate": 9.999414671573194e-06, "loss": 3.1021, "step": 277250 }, { "epoch": 0.2731847450013004, "grad_norm": 2.329906702041626, "learning_rate": 9.999412176512473e-06, "loss": 3.0387, "step": 277300 }, { "epoch": 0.2732340029791225, "grad_norm": 2.3796935081481934, "learning_rate": 9.999409676145564e-06, "loss": 3.0656, "step": 277350 }, { "epoch": 0.2732832609569446, "grad_norm": 2.7732667922973633, "learning_rate": 9.999407170472464e-06, "loss": 3.0732, "step": 277400 }, { "epoch": 0.2733325189347667, "grad_norm": 2.2295989990234375, "learning_rate": 9.999404659493179e-06, "loss": 3.058, "step": 277450 }, { "epoch": 0.27338177691258875, "grad_norm": 2.2737255096435547, "learning_rate": 9.999402143207711e-06, "loss": 3.0743, "step": 277500 }, { "epoch": 0.2734310348904109, "grad_norm": 2.4207143783569336, "learning_rate": 9.999399621616062e-06, "loss": 3.0996, "step": 277550 }, { "epoch": 0.27348029286823294, "grad_norm": 2.2210240364074707, "learning_rate": 9.999397094718236e-06, "loss": 3.0456, "step": 277600 }, { "epoch": 0.273529550846055, "grad_norm": 2.215590000152588, "learning_rate": 9.999394562514234e-06, "loss": 3.1106, "step": 277650 }, { "epoch": 0.27357880882387714, "grad_norm": 2.325078248977661, "learning_rate": 9.999392025004061e-06, "loss": 3.0774, "step": 277700 }, { "epoch": 0.2736280668016992, "grad_norm": 2.2615644931793213, "learning_rate": 9.999389482187716e-06, "loss": 2.971, "step": 277750 }, { "epoch": 0.2736773247795213, "grad_norm": 2.4708690643310547, "learning_rate": 9.999386934065206e-06, "loss": 3.0943, "step": 277800 }, { "epoch": 0.2737265827573434, "grad_norm": 2.424464464187622, "learning_rate": 9.999384380636531e-06, "loss": 3.0831, "step": 277850 }, { "epoch": 0.2737758407351655, "grad_norm": 2.24625563621521, "learning_rate": 9.999381821901696e-06, "loss": 2.9923, "step": 277900 }, { "epoch": 0.27382509871298755, "grad_norm": 2.2028613090515137, "learning_rate": 9.999379257860699e-06, "loss": 3.0483, "step": 277950 }, { "epoch": 0.2738743566908096, "grad_norm": 2.288938045501709, "learning_rate": 9.999376688513546e-06, "loss": 3.002, "step": 278000 }, { "epoch": 0.27392361466863174, "grad_norm": 2.2996225357055664, "learning_rate": 9.99937411386024e-06, "loss": 3.081, "step": 278050 }, { "epoch": 0.2739728726464538, "grad_norm": 2.2090985774993896, "learning_rate": 9.999371533900786e-06, "loss": 3.0787, "step": 278100 }, { "epoch": 0.2740221306242759, "grad_norm": 2.265267848968506, "learning_rate": 9.999368948635181e-06, "loss": 3.1141, "step": 278150 }, { "epoch": 0.274071388602098, "grad_norm": 2.383439302444458, "learning_rate": 9.999366358063432e-06, "loss": 3.0144, "step": 278200 }, { "epoch": 0.2741206465799201, "grad_norm": 2.314011573791504, "learning_rate": 9.99936376218554e-06, "loss": 3.0932, "step": 278250 }, { "epoch": 0.27416990455774215, "grad_norm": 2.345916509628296, "learning_rate": 9.999361161001508e-06, "loss": 3.0309, "step": 278300 }, { "epoch": 0.2742191625355643, "grad_norm": 2.444455862045288, "learning_rate": 9.99935855451134e-06, "loss": 3.045, "step": 278350 }, { "epoch": 0.27426842051338635, "grad_norm": 2.2926480770111084, "learning_rate": 9.999355942715037e-06, "loss": 3.0693, "step": 278400 }, { "epoch": 0.2743176784912084, "grad_norm": 2.5589003562927246, "learning_rate": 9.999353325612603e-06, "loss": 3.0614, "step": 278450 }, { "epoch": 0.27436693646903054, "grad_norm": 2.4288032054901123, "learning_rate": 9.99935070320404e-06, "loss": 3.0479, "step": 278500 }, { "epoch": 0.2744161944468526, "grad_norm": 2.4304988384246826, "learning_rate": 9.99934807548935e-06, "loss": 2.9636, "step": 278550 }, { "epoch": 0.2744654524246747, "grad_norm": 2.3672192096710205, "learning_rate": 9.99934544246854e-06, "loss": 3.0841, "step": 278600 }, { "epoch": 0.2745147104024968, "grad_norm": 2.1683273315429688, "learning_rate": 9.999342804141607e-06, "loss": 3.1062, "step": 278650 }, { "epoch": 0.2745639683803189, "grad_norm": 2.4212872982025146, "learning_rate": 9.999340160508556e-06, "loss": 3.061, "step": 278700 }, { "epoch": 0.27461322635814095, "grad_norm": 2.1729488372802734, "learning_rate": 9.999337511569393e-06, "loss": 3.089, "step": 278750 }, { "epoch": 0.2746624843359631, "grad_norm": 2.3220813274383545, "learning_rate": 9.999334857324117e-06, "loss": 3.0259, "step": 278800 }, { "epoch": 0.27471174231378515, "grad_norm": 2.301771402359009, "learning_rate": 9.999332197772732e-06, "loss": 3.0314, "step": 278850 }, { "epoch": 0.2747610002916072, "grad_norm": 2.2659647464752197, "learning_rate": 9.99932953291524e-06, "loss": 3.07, "step": 278900 }, { "epoch": 0.27481025826942934, "grad_norm": 2.3310420513153076, "learning_rate": 9.999326862751645e-06, "loss": 3.0517, "step": 278950 }, { "epoch": 0.2748595162472514, "grad_norm": 2.45888614654541, "learning_rate": 9.99932418728195e-06, "loss": 3.0257, "step": 279000 }, { "epoch": 0.2749087742250735, "grad_norm": 2.3410592079162598, "learning_rate": 9.999321506506155e-06, "loss": 3.0609, "step": 279050 }, { "epoch": 0.2749580322028956, "grad_norm": 2.3784685134887695, "learning_rate": 9.999318820424266e-06, "loss": 3.0115, "step": 279100 }, { "epoch": 0.2750072901807177, "grad_norm": 2.281674861907959, "learning_rate": 9.999316129036286e-06, "loss": 3.0028, "step": 279150 }, { "epoch": 0.27505654815853975, "grad_norm": 2.2566380500793457, "learning_rate": 9.999313432342216e-06, "loss": 3.0574, "step": 279200 }, { "epoch": 0.2751058061363618, "grad_norm": 2.232377529144287, "learning_rate": 9.999310730342061e-06, "loss": 3.0344, "step": 279250 }, { "epoch": 0.27515506411418394, "grad_norm": 2.369981288909912, "learning_rate": 9.999308023035821e-06, "loss": 3.0882, "step": 279300 }, { "epoch": 0.275204322092006, "grad_norm": 2.341582775115967, "learning_rate": 9.999305310423502e-06, "loss": 2.9885, "step": 279350 }, { "epoch": 0.2752535800698281, "grad_norm": 2.3325612545013428, "learning_rate": 9.999302592505105e-06, "loss": 3.099, "step": 279400 }, { "epoch": 0.2753028380476502, "grad_norm": 2.308741331100464, "learning_rate": 9.99929986928063e-06, "loss": 3.0759, "step": 279450 }, { "epoch": 0.2753520960254723, "grad_norm": 2.2781736850738525, "learning_rate": 9.999297140750087e-06, "loss": 3.0622, "step": 279500 }, { "epoch": 0.27540135400329435, "grad_norm": 2.5265614986419678, "learning_rate": 9.999294406913472e-06, "loss": 3.0741, "step": 279550 }, { "epoch": 0.2754506119811165, "grad_norm": 2.3374273777008057, "learning_rate": 9.999291667770794e-06, "loss": 3.0682, "step": 279600 }, { "epoch": 0.27549986995893855, "grad_norm": 2.3963499069213867, "learning_rate": 9.99928892332205e-06, "loss": 3.0536, "step": 279650 }, { "epoch": 0.2755491279367606, "grad_norm": 2.218161106109619, "learning_rate": 9.999286173567247e-06, "loss": 3.0509, "step": 279700 }, { "epoch": 0.27559838591458274, "grad_norm": 2.3588054180145264, "learning_rate": 9.999283418506387e-06, "loss": 3.0322, "step": 279750 }, { "epoch": 0.2756476438924048, "grad_norm": 2.259392023086548, "learning_rate": 9.999280658139472e-06, "loss": 3.0548, "step": 279800 }, { "epoch": 0.2756969018702269, "grad_norm": 2.2124147415161133, "learning_rate": 9.999277892466505e-06, "loss": 3.1132, "step": 279850 }, { "epoch": 0.275746159848049, "grad_norm": 2.6995434761047363, "learning_rate": 9.999275121487489e-06, "loss": 3.0042, "step": 279900 }, { "epoch": 0.2757954178258711, "grad_norm": 2.2837610244750977, "learning_rate": 9.999272345202427e-06, "loss": 3.0604, "step": 279950 }, { "epoch": 0.27584467580369315, "grad_norm": 2.3456192016601562, "learning_rate": 9.999269563611325e-06, "loss": 3.0495, "step": 280000 }, { "epoch": 0.2758939337815153, "grad_norm": 2.2953476905822754, "learning_rate": 9.99926677671418e-06, "loss": 3.0511, "step": 280050 }, { "epoch": 0.27594319175933735, "grad_norm": 2.3278610706329346, "learning_rate": 9.999263984511e-06, "loss": 3.0001, "step": 280100 }, { "epoch": 0.2759924497371594, "grad_norm": 2.863896608352661, "learning_rate": 9.999261187001786e-06, "loss": 3.0405, "step": 280150 }, { "epoch": 0.27604170771498154, "grad_norm": 2.3923697471618652, "learning_rate": 9.999258384186543e-06, "loss": 3.0992, "step": 280200 }, { "epoch": 0.2760909656928036, "grad_norm": 2.3339266777038574, "learning_rate": 9.999255576065269e-06, "loss": 3.0458, "step": 280250 }, { "epoch": 0.2761402236706257, "grad_norm": 2.28961181640625, "learning_rate": 9.99925276263797e-06, "loss": 3.1952, "step": 280300 }, { "epoch": 0.2761894816484478, "grad_norm": 2.163423538208008, "learning_rate": 9.999249943904651e-06, "loss": 3.076, "step": 280350 }, { "epoch": 0.2762387396262699, "grad_norm": 2.619786262512207, "learning_rate": 9.999247119865314e-06, "loss": 3.0659, "step": 280400 }, { "epoch": 0.27628799760409195, "grad_norm": 2.1788508892059326, "learning_rate": 9.999244290519957e-06, "loss": 3.0832, "step": 280450 }, { "epoch": 0.276337255581914, "grad_norm": 2.5952553749084473, "learning_rate": 9.99924145586859e-06, "loss": 3.0865, "step": 280500 }, { "epoch": 0.27638651355973615, "grad_norm": 2.305605411529541, "learning_rate": 9.999238615911213e-06, "loss": 3.0527, "step": 280550 }, { "epoch": 0.2764357715375582, "grad_norm": 2.2598345279693604, "learning_rate": 9.999235770647828e-06, "loss": 3.129, "step": 280600 }, { "epoch": 0.2764850295153803, "grad_norm": 2.29292368888855, "learning_rate": 9.99923292007844e-06, "loss": 2.9794, "step": 280650 }, { "epoch": 0.2765342874932024, "grad_norm": 2.3193204402923584, "learning_rate": 9.99923006420305e-06, "loss": 3.0118, "step": 280700 }, { "epoch": 0.2765835454710245, "grad_norm": 2.672445774078369, "learning_rate": 9.999227203021664e-06, "loss": 3.0638, "step": 280750 }, { "epoch": 0.27663280344884655, "grad_norm": 2.384770393371582, "learning_rate": 9.99922433653428e-06, "loss": 3.0788, "step": 280800 }, { "epoch": 0.2766820614266687, "grad_norm": 2.2893855571746826, "learning_rate": 9.999221464740906e-06, "loss": 3.0239, "step": 280850 }, { "epoch": 0.27673131940449075, "grad_norm": 2.4062585830688477, "learning_rate": 9.999218587641545e-06, "loss": 3.1151, "step": 280900 }, { "epoch": 0.2767805773823128, "grad_norm": 2.216714382171631, "learning_rate": 9.999215705236196e-06, "loss": 3.0571, "step": 280950 }, { "epoch": 0.27682983536013495, "grad_norm": 2.533191204071045, "learning_rate": 9.999212817524866e-06, "loss": 3.0038, "step": 281000 }, { "epoch": 0.276879093337957, "grad_norm": 2.2471811771392822, "learning_rate": 9.999209924507556e-06, "loss": 3.0829, "step": 281050 }, { "epoch": 0.2769283513157791, "grad_norm": 2.387820243835449, "learning_rate": 9.999207026184268e-06, "loss": 2.9373, "step": 281100 }, { "epoch": 0.2769776092936012, "grad_norm": 2.3220479488372803, "learning_rate": 9.999204122555008e-06, "loss": 3.1097, "step": 281150 }, { "epoch": 0.2770268672714233, "grad_norm": 2.3009231090545654, "learning_rate": 9.999201213619778e-06, "loss": 3.0515, "step": 281200 }, { "epoch": 0.27707612524924535, "grad_norm": 2.261983633041382, "learning_rate": 9.999198299378581e-06, "loss": 3.0796, "step": 281250 }, { "epoch": 0.2771253832270675, "grad_norm": 2.4418768882751465, "learning_rate": 9.999195379831419e-06, "loss": 3.0667, "step": 281300 }, { "epoch": 0.27717464120488955, "grad_norm": 2.669725179672241, "learning_rate": 9.999192454978297e-06, "loss": 3.0625, "step": 281350 }, { "epoch": 0.2772238991827116, "grad_norm": 2.325237274169922, "learning_rate": 9.999189524819216e-06, "loss": 3.0592, "step": 281400 }, { "epoch": 0.27727315716053375, "grad_norm": 2.2468607425689697, "learning_rate": 9.99918658935418e-06, "loss": 3.1522, "step": 281450 }, { "epoch": 0.2773224151383558, "grad_norm": 2.2150721549987793, "learning_rate": 9.999183648583193e-06, "loss": 3.0317, "step": 281500 }, { "epoch": 0.2773716731161779, "grad_norm": 2.246112585067749, "learning_rate": 9.999180702506256e-06, "loss": 3.0674, "step": 281550 }, { "epoch": 0.277420931094, "grad_norm": 2.3870432376861572, "learning_rate": 9.999177751123374e-06, "loss": 3.0318, "step": 281600 }, { "epoch": 0.2774701890718221, "grad_norm": 2.290712356567383, "learning_rate": 9.99917479443455e-06, "loss": 3.0748, "step": 281650 }, { "epoch": 0.27751944704964415, "grad_norm": 2.2130281925201416, "learning_rate": 9.999171832439787e-06, "loss": 3.0302, "step": 281700 }, { "epoch": 0.2775687050274662, "grad_norm": 2.1901941299438477, "learning_rate": 9.99916886513909e-06, "loss": 3.0167, "step": 281750 }, { "epoch": 0.27761796300528835, "grad_norm": 2.1854476928710938, "learning_rate": 9.999165892532457e-06, "loss": 3.0492, "step": 281800 }, { "epoch": 0.2776672209831104, "grad_norm": 2.3490681648254395, "learning_rate": 9.999162914619895e-06, "loss": 3.0485, "step": 281850 }, { "epoch": 0.2777164789609325, "grad_norm": 2.260258913040161, "learning_rate": 9.999159931401408e-06, "loss": 3.0847, "step": 281900 }, { "epoch": 0.2777657369387546, "grad_norm": 2.290295362472534, "learning_rate": 9.999156942876994e-06, "loss": 3.0546, "step": 281950 }, { "epoch": 0.2778149949165767, "grad_norm": 2.217444658279419, "learning_rate": 9.999153949046664e-06, "loss": 3.0567, "step": 282000 }, { "epoch": 0.27786425289439876, "grad_norm": 2.2332184314727783, "learning_rate": 9.999150949910414e-06, "loss": 3.088, "step": 282050 }, { "epoch": 0.2779135108722209, "grad_norm": 2.3670341968536377, "learning_rate": 9.999147945468251e-06, "loss": 3.0415, "step": 282100 }, { "epoch": 0.27796276885004295, "grad_norm": 2.2965822219848633, "learning_rate": 9.999144935720178e-06, "loss": 3.0574, "step": 282150 }, { "epoch": 0.278012026827865, "grad_norm": 2.296645164489746, "learning_rate": 9.999141920666197e-06, "loss": 3.125, "step": 282200 }, { "epoch": 0.27806128480568715, "grad_norm": 2.32525634765625, "learning_rate": 9.999138900306311e-06, "loss": 2.9826, "step": 282250 }, { "epoch": 0.2781105427835092, "grad_norm": 2.3741369247436523, "learning_rate": 9.999135874640525e-06, "loss": 3.0473, "step": 282300 }, { "epoch": 0.2781598007613313, "grad_norm": 2.440596103668213, "learning_rate": 9.999132843668842e-06, "loss": 3.0467, "step": 282350 }, { "epoch": 0.2782090587391534, "grad_norm": 2.3162362575531006, "learning_rate": 9.999129807391263e-06, "loss": 3.0696, "step": 282400 }, { "epoch": 0.2782583167169755, "grad_norm": 2.7181031703948975, "learning_rate": 9.999126765807792e-06, "loss": 3.1181, "step": 282450 }, { "epoch": 0.27830757469479755, "grad_norm": 2.3502416610717773, "learning_rate": 9.999123718918433e-06, "loss": 3.0978, "step": 282500 }, { "epoch": 0.2783568326726197, "grad_norm": 2.2742552757263184, "learning_rate": 9.99912066672319e-06, "loss": 3.0828, "step": 282550 }, { "epoch": 0.27840609065044175, "grad_norm": 2.249372720718384, "learning_rate": 9.999117609222064e-06, "loss": 3.098, "step": 282600 }, { "epoch": 0.2784553486282638, "grad_norm": 2.2155802249908447, "learning_rate": 9.99911454641506e-06, "loss": 3.0289, "step": 282650 }, { "epoch": 0.27850460660608595, "grad_norm": 2.2292087078094482, "learning_rate": 9.99911147830218e-06, "loss": 3.0769, "step": 282700 }, { "epoch": 0.278553864583908, "grad_norm": 2.421933650970459, "learning_rate": 9.99910840488343e-06, "loss": 3.067, "step": 282750 }, { "epoch": 0.2786031225617301, "grad_norm": 2.8418850898742676, "learning_rate": 9.99910532615881e-06, "loss": 3.067, "step": 282800 }, { "epoch": 0.2786523805395522, "grad_norm": 2.1977171897888184, "learning_rate": 9.999102242128325e-06, "loss": 3.0485, "step": 282850 }, { "epoch": 0.2787016385173743, "grad_norm": 2.5100700855255127, "learning_rate": 9.999099152791978e-06, "loss": 2.9979, "step": 282900 }, { "epoch": 0.27875089649519635, "grad_norm": 2.441507339477539, "learning_rate": 9.999096058149772e-06, "loss": 3.0531, "step": 282950 }, { "epoch": 0.2788001544730184, "grad_norm": 2.248713254928589, "learning_rate": 9.99909295820171e-06, "loss": 3.0602, "step": 283000 }, { "epoch": 0.27884941245084055, "grad_norm": 2.4158966541290283, "learning_rate": 9.999089852947796e-06, "loss": 3.0717, "step": 283050 }, { "epoch": 0.2788986704286626, "grad_norm": 2.2319061756134033, "learning_rate": 9.999086742388031e-06, "loss": 3.078, "step": 283100 }, { "epoch": 0.2789479284064847, "grad_norm": 2.2059195041656494, "learning_rate": 9.999083626522425e-06, "loss": 3.1234, "step": 283150 }, { "epoch": 0.2789971863843068, "grad_norm": 2.0863349437713623, "learning_rate": 9.999080505350972e-06, "loss": 3.0232, "step": 283200 }, { "epoch": 0.2790464443621289, "grad_norm": 2.1934447288513184, "learning_rate": 9.999077378873681e-06, "loss": 3.057, "step": 283250 }, { "epoch": 0.27909570233995096, "grad_norm": 2.5078065395355225, "learning_rate": 9.999074247090554e-06, "loss": 2.9812, "step": 283300 }, { "epoch": 0.2791449603177731, "grad_norm": 2.371345043182373, "learning_rate": 9.999071110001597e-06, "loss": 3.1486, "step": 283350 }, { "epoch": 0.27919421829559515, "grad_norm": 2.221829652786255, "learning_rate": 9.999067967606809e-06, "loss": 3.0852, "step": 283400 }, { "epoch": 0.2792434762734172, "grad_norm": 2.3172950744628906, "learning_rate": 9.999064819906194e-06, "loss": 3.1421, "step": 283450 }, { "epoch": 0.27929273425123935, "grad_norm": 2.5547940731048584, "learning_rate": 9.999061666899757e-06, "loss": 3.0961, "step": 283500 }, { "epoch": 0.2793419922290614, "grad_norm": 2.365513801574707, "learning_rate": 9.999058508587502e-06, "loss": 3.0703, "step": 283550 }, { "epoch": 0.2793912502068835, "grad_norm": 2.114649534225464, "learning_rate": 9.99905534496943e-06, "loss": 3.0166, "step": 283600 }, { "epoch": 0.2794405081847056, "grad_norm": 2.3697657585144043, "learning_rate": 9.999052176045548e-06, "loss": 3.035, "step": 283650 }, { "epoch": 0.2794897661625277, "grad_norm": 2.3598742485046387, "learning_rate": 9.999049001815855e-06, "loss": 3.0013, "step": 283700 }, { "epoch": 0.27953902414034976, "grad_norm": 2.351696014404297, "learning_rate": 9.999045822280358e-06, "loss": 2.9739, "step": 283750 }, { "epoch": 0.2795882821181719, "grad_norm": 2.223191261291504, "learning_rate": 9.999042637439056e-06, "loss": 3.0387, "step": 283800 }, { "epoch": 0.27963754009599395, "grad_norm": 2.472574472427368, "learning_rate": 9.999039447291957e-06, "loss": 3.0597, "step": 283850 }, { "epoch": 0.279686798073816, "grad_norm": 2.383924722671509, "learning_rate": 9.99903625183906e-06, "loss": 3.0159, "step": 283900 }, { "epoch": 0.27973605605163815, "grad_norm": 2.2692716121673584, "learning_rate": 9.999033051080373e-06, "loss": 3.1268, "step": 283950 }, { "epoch": 0.2797853140294602, "grad_norm": 2.2256224155426025, "learning_rate": 9.999029845015899e-06, "loss": 3.0149, "step": 284000 }, { "epoch": 0.2798345720072823, "grad_norm": 2.4322094917297363, "learning_rate": 9.999026633645637e-06, "loss": 3.0928, "step": 284050 }, { "epoch": 0.27988382998510436, "grad_norm": 2.566685199737549, "learning_rate": 9.999023416969595e-06, "loss": 3.046, "step": 284100 }, { "epoch": 0.2799330879629265, "grad_norm": 2.224459409713745, "learning_rate": 9.999020194987772e-06, "loss": 3.0461, "step": 284150 }, { "epoch": 0.27998234594074856, "grad_norm": 2.3266563415527344, "learning_rate": 9.999016967700176e-06, "loss": 3.0313, "step": 284200 }, { "epoch": 0.2800316039185706, "grad_norm": 2.110482931137085, "learning_rate": 9.999013735106806e-06, "loss": 3.0403, "step": 284250 }, { "epoch": 0.28008086189639275, "grad_norm": 2.3589885234832764, "learning_rate": 9.99901049720767e-06, "loss": 3.0329, "step": 284300 }, { "epoch": 0.2801301198742148, "grad_norm": 2.2958736419677734, "learning_rate": 9.99900725400277e-06, "loss": 3.0081, "step": 284350 }, { "epoch": 0.2801793778520369, "grad_norm": 2.3444833755493164, "learning_rate": 9.999004005492105e-06, "loss": 3.0006, "step": 284400 }, { "epoch": 0.280228635829859, "grad_norm": 2.2705130577087402, "learning_rate": 9.999000751675684e-06, "loss": 3.0836, "step": 284450 }, { "epoch": 0.2802778938076811, "grad_norm": 2.3260834217071533, "learning_rate": 9.998997492553508e-06, "loss": 3.0467, "step": 284500 }, { "epoch": 0.28032715178550316, "grad_norm": 2.385972499847412, "learning_rate": 9.998994228125582e-06, "loss": 3.0741, "step": 284550 }, { "epoch": 0.2803764097633253, "grad_norm": 2.2653753757476807, "learning_rate": 9.99899095839191e-06, "loss": 3.1045, "step": 284600 }, { "epoch": 0.28042566774114736, "grad_norm": 2.2826013565063477, "learning_rate": 9.998987683352492e-06, "loss": 3.0287, "step": 284650 }, { "epoch": 0.2804749257189694, "grad_norm": 2.385037660598755, "learning_rate": 9.998984403007333e-06, "loss": 3.0685, "step": 284700 }, { "epoch": 0.28052418369679155, "grad_norm": 2.3030974864959717, "learning_rate": 9.998981117356437e-06, "loss": 3.1085, "step": 284750 }, { "epoch": 0.2805734416746136, "grad_norm": 2.5274698734283447, "learning_rate": 9.998977826399808e-06, "loss": 3.0624, "step": 284800 }, { "epoch": 0.2806226996524357, "grad_norm": 2.213287353515625, "learning_rate": 9.998974530137448e-06, "loss": 3.0693, "step": 284850 }, { "epoch": 0.2806719576302578, "grad_norm": 2.2143609523773193, "learning_rate": 9.998971228569362e-06, "loss": 3.1078, "step": 284900 }, { "epoch": 0.2807212156080799, "grad_norm": 2.7265713214874268, "learning_rate": 9.998967921695552e-06, "loss": 3.0705, "step": 284950 }, { "epoch": 0.28077047358590196, "grad_norm": 2.5114731788635254, "learning_rate": 9.998964609516025e-06, "loss": 3.1559, "step": 285000 }, { "epoch": 0.2808197315637241, "grad_norm": 2.204561233520508, "learning_rate": 9.998961292030779e-06, "loss": 3.0338, "step": 285050 }, { "epoch": 0.28086898954154615, "grad_norm": 2.443343162536621, "learning_rate": 9.998957969239821e-06, "loss": 3.1315, "step": 285100 }, { "epoch": 0.2809182475193682, "grad_norm": 2.4720842838287354, "learning_rate": 9.998954641143154e-06, "loss": 3.0679, "step": 285150 }, { "epoch": 0.28096750549719035, "grad_norm": 2.213996171951294, "learning_rate": 9.998951307740783e-06, "loss": 3.0937, "step": 285200 }, { "epoch": 0.2810167634750124, "grad_norm": 2.5104918479919434, "learning_rate": 9.998947969032709e-06, "loss": 3.0652, "step": 285250 }, { "epoch": 0.2810660214528345, "grad_norm": 2.366490364074707, "learning_rate": 9.998944625018936e-06, "loss": 3.0132, "step": 285300 }, { "epoch": 0.28111527943065656, "grad_norm": 2.5512473583221436, "learning_rate": 9.998941275699469e-06, "loss": 3.0053, "step": 285350 }, { "epoch": 0.2811645374084787, "grad_norm": 2.2868926525115967, "learning_rate": 9.99893792107431e-06, "loss": 3.0323, "step": 285400 }, { "epoch": 0.28121379538630076, "grad_norm": 2.399994134902954, "learning_rate": 9.998934561143464e-06, "loss": 3.0645, "step": 285450 }, { "epoch": 0.28126305336412283, "grad_norm": 2.1074087619781494, "learning_rate": 9.998931195906932e-06, "loss": 3.1039, "step": 285500 }, { "epoch": 0.28131231134194495, "grad_norm": 2.338667154312134, "learning_rate": 9.998927825364722e-06, "loss": 3.071, "step": 285550 }, { "epoch": 0.281361569319767, "grad_norm": 2.2370150089263916, "learning_rate": 9.998924449516832e-06, "loss": 3.0482, "step": 285600 }, { "epoch": 0.2814108272975891, "grad_norm": 2.323967933654785, "learning_rate": 9.998921068363271e-06, "loss": 3.1089, "step": 285650 }, { "epoch": 0.2814600852754112, "grad_norm": 2.2752182483673096, "learning_rate": 9.998917681904038e-06, "loss": 2.9928, "step": 285700 }, { "epoch": 0.2815093432532333, "grad_norm": 2.3620493412017822, "learning_rate": 9.99891429013914e-06, "loss": 3.0495, "step": 285750 }, { "epoch": 0.28155860123105536, "grad_norm": 2.251868724822998, "learning_rate": 9.998910893068579e-06, "loss": 3.0068, "step": 285800 }, { "epoch": 0.2816078592088775, "grad_norm": 2.176079511642456, "learning_rate": 9.998907490692359e-06, "loss": 3.0275, "step": 285850 }, { "epoch": 0.28165711718669956, "grad_norm": 2.4708309173583984, "learning_rate": 9.998904083010483e-06, "loss": 3.0241, "step": 285900 }, { "epoch": 0.2817063751645216, "grad_norm": 2.4806573390960693, "learning_rate": 9.998900670022956e-06, "loss": 3.0124, "step": 285950 }, { "epoch": 0.28175563314234375, "grad_norm": 2.2205982208251953, "learning_rate": 9.998897251729781e-06, "loss": 3.0123, "step": 286000 }, { "epoch": 0.2818048911201658, "grad_norm": 2.8278918266296387, "learning_rate": 9.99889382813096e-06, "loss": 3.0056, "step": 286050 }, { "epoch": 0.2818541490979879, "grad_norm": 3.671537399291992, "learning_rate": 9.9988903992265e-06, "loss": 3.0892, "step": 286100 }, { "epoch": 0.28190340707581, "grad_norm": 2.279270648956299, "learning_rate": 9.998886965016402e-06, "loss": 3.0301, "step": 286150 }, { "epoch": 0.2819526650536321, "grad_norm": 2.3384532928466797, "learning_rate": 9.998883525500669e-06, "loss": 3.1009, "step": 286200 }, { "epoch": 0.28200192303145416, "grad_norm": 2.4001433849334717, "learning_rate": 9.998880080679308e-06, "loss": 3.0271, "step": 286250 }, { "epoch": 0.2820511810092763, "grad_norm": 2.3616175651550293, "learning_rate": 9.998876630552318e-06, "loss": 2.9973, "step": 286300 }, { "epoch": 0.28210043898709836, "grad_norm": 2.275803327560425, "learning_rate": 9.998873175119708e-06, "loss": 3.0043, "step": 286350 }, { "epoch": 0.2821496969649204, "grad_norm": 2.025627613067627, "learning_rate": 9.998869714381476e-06, "loss": 2.9828, "step": 286400 }, { "epoch": 0.28219895494274255, "grad_norm": 2.245349884033203, "learning_rate": 9.998866248337631e-06, "loss": 3.0455, "step": 286450 }, { "epoch": 0.2822482129205646, "grad_norm": 2.2885589599609375, "learning_rate": 9.998862776988174e-06, "loss": 3.0832, "step": 286500 }, { "epoch": 0.2822974708983867, "grad_norm": 2.3451578617095947, "learning_rate": 9.998859300333109e-06, "loss": 3.1079, "step": 286550 }, { "epoch": 0.28234672887620876, "grad_norm": 2.4749865531921387, "learning_rate": 9.99885581837244e-06, "loss": 3.0313, "step": 286600 }, { "epoch": 0.2823959868540309, "grad_norm": 2.249617338180542, "learning_rate": 9.998852331106167e-06, "loss": 3.072, "step": 286650 }, { "epoch": 0.28244524483185296, "grad_norm": 2.603849172592163, "learning_rate": 9.9988488385343e-06, "loss": 2.9739, "step": 286700 }, { "epoch": 0.28249450280967503, "grad_norm": 2.364607810974121, "learning_rate": 9.998845340656839e-06, "loss": 3.0469, "step": 286750 }, { "epoch": 0.28254376078749716, "grad_norm": 2.3598315715789795, "learning_rate": 9.99884183747379e-06, "loss": 3.072, "step": 286800 }, { "epoch": 0.2825930187653192, "grad_norm": 2.743598699569702, "learning_rate": 9.998838328985154e-06, "loss": 3.0913, "step": 286850 }, { "epoch": 0.2826422767431413, "grad_norm": 2.375572443008423, "learning_rate": 9.998834815190935e-06, "loss": 3.0564, "step": 286900 }, { "epoch": 0.2826915347209634, "grad_norm": 2.207986354827881, "learning_rate": 9.998831296091139e-06, "loss": 3.0552, "step": 286950 }, { "epoch": 0.2827407926987855, "grad_norm": 2.2509355545043945, "learning_rate": 9.998827771685767e-06, "loss": 3.0795, "step": 287000 }, { "epoch": 0.28279005067660756, "grad_norm": 2.2666332721710205, "learning_rate": 9.998824241974825e-06, "loss": 3.0698, "step": 287050 }, { "epoch": 0.2828393086544297, "grad_norm": 2.177332639694214, "learning_rate": 9.998820706958317e-06, "loss": 3.0456, "step": 287100 }, { "epoch": 0.28288856663225176, "grad_norm": 2.2303922176361084, "learning_rate": 9.998817166636244e-06, "loss": 2.9761, "step": 287150 }, { "epoch": 0.28293782461007383, "grad_norm": 2.4240329265594482, "learning_rate": 9.998813621008611e-06, "loss": 3.0859, "step": 287200 }, { "epoch": 0.28298708258789596, "grad_norm": 2.3608810901641846, "learning_rate": 9.998810070075424e-06, "loss": 3.0215, "step": 287250 }, { "epoch": 0.283036340565718, "grad_norm": 2.373133420944214, "learning_rate": 9.998806513836684e-06, "loss": 3.0163, "step": 287300 }, { "epoch": 0.2830855985435401, "grad_norm": 2.2016563415527344, "learning_rate": 9.998802952292397e-06, "loss": 3.003, "step": 287350 }, { "epoch": 0.2831348565213622, "grad_norm": 2.2067315578460693, "learning_rate": 9.998799385442563e-06, "loss": 3.1107, "step": 287400 }, { "epoch": 0.2831841144991843, "grad_norm": 2.4514243602752686, "learning_rate": 9.99879581328719e-06, "loss": 3.1229, "step": 287450 }, { "epoch": 0.28323337247700636, "grad_norm": 2.3377537727355957, "learning_rate": 9.99879223582628e-06, "loss": 3.0575, "step": 287500 }, { "epoch": 0.2832826304548285, "grad_norm": 2.2953643798828125, "learning_rate": 9.998788653059835e-06, "loss": 3.0249, "step": 287550 }, { "epoch": 0.28333188843265056, "grad_norm": 2.4936139583587646, "learning_rate": 9.998785064987863e-06, "loss": 2.9752, "step": 287600 }, { "epoch": 0.28338114641047263, "grad_norm": 2.254514694213867, "learning_rate": 9.998781471610365e-06, "loss": 3.0198, "step": 287650 }, { "epoch": 0.28343040438829475, "grad_norm": 2.395435094833374, "learning_rate": 9.998777872927344e-06, "loss": 2.9857, "step": 287700 }, { "epoch": 0.2834796623661168, "grad_norm": 2.3736109733581543, "learning_rate": 9.998774268938806e-06, "loss": 3.0874, "step": 287750 }, { "epoch": 0.2835289203439389, "grad_norm": 2.3006699085235596, "learning_rate": 9.998770659644752e-06, "loss": 2.987, "step": 287800 }, { "epoch": 0.28357817832176097, "grad_norm": 2.2769694328308105, "learning_rate": 9.99876704504519e-06, "loss": 3.0428, "step": 287850 }, { "epoch": 0.2836274362995831, "grad_norm": 2.286903142929077, "learning_rate": 9.998763425140121e-06, "loss": 3.0186, "step": 287900 }, { "epoch": 0.28367669427740516, "grad_norm": 2.3983216285705566, "learning_rate": 9.998759799929551e-06, "loss": 3.053, "step": 287950 }, { "epoch": 0.28372595225522723, "grad_norm": 2.532210350036621, "learning_rate": 9.99875616941348e-06, "loss": 3.0114, "step": 288000 }, { "epoch": 0.28377521023304936, "grad_norm": 2.2508087158203125, "learning_rate": 9.998752533591915e-06, "loss": 3.0658, "step": 288050 }, { "epoch": 0.28382446821087143, "grad_norm": 2.661543130874634, "learning_rate": 9.99874889246486e-06, "loss": 3.0781, "step": 288100 }, { "epoch": 0.2838737261886935, "grad_norm": 2.1585402488708496, "learning_rate": 9.998745246032315e-06, "loss": 3.081, "step": 288150 }, { "epoch": 0.2839229841665156, "grad_norm": 2.7667500972747803, "learning_rate": 9.99874159429429e-06, "loss": 3.017, "step": 288200 }, { "epoch": 0.2839722421443377, "grad_norm": 2.31703519821167, "learning_rate": 9.998737937250783e-06, "loss": 3.0051, "step": 288250 }, { "epoch": 0.28402150012215976, "grad_norm": 2.3286850452423096, "learning_rate": 9.998734274901801e-06, "loss": 3.0532, "step": 288300 }, { "epoch": 0.2840707580999819, "grad_norm": 2.318650484085083, "learning_rate": 9.998730607247349e-06, "loss": 3.0587, "step": 288350 }, { "epoch": 0.28412001607780396, "grad_norm": 2.330873966217041, "learning_rate": 9.998726934287426e-06, "loss": 2.9778, "step": 288400 }, { "epoch": 0.28416927405562603, "grad_norm": 2.1771373748779297, "learning_rate": 9.998723256022043e-06, "loss": 3.1318, "step": 288450 }, { "epoch": 0.28421853203344816, "grad_norm": 2.530815362930298, "learning_rate": 9.998719572451198e-06, "loss": 3.0885, "step": 288500 }, { "epoch": 0.2842677900112702, "grad_norm": 2.3224003314971924, "learning_rate": 9.998715883574898e-06, "loss": 3.03, "step": 288550 }, { "epoch": 0.2843170479890923, "grad_norm": 2.365121364593506, "learning_rate": 9.998712189393144e-06, "loss": 3.0801, "step": 288600 }, { "epoch": 0.2843663059669144, "grad_norm": 2.2820188999176025, "learning_rate": 9.998708489905944e-06, "loss": 3.075, "step": 288650 }, { "epoch": 0.2844155639447365, "grad_norm": 2.341099262237549, "learning_rate": 9.998704785113298e-06, "loss": 3.0954, "step": 288700 }, { "epoch": 0.28446482192255856, "grad_norm": 2.417246103286743, "learning_rate": 9.998701075015212e-06, "loss": 3.0314, "step": 288750 }, { "epoch": 0.2845140799003807, "grad_norm": 2.5416901111602783, "learning_rate": 9.99869735961169e-06, "loss": 3.0437, "step": 288800 }, { "epoch": 0.28456333787820276, "grad_norm": 2.4126973152160645, "learning_rate": 9.998693638902736e-06, "loss": 3.0994, "step": 288850 }, { "epoch": 0.28461259585602483, "grad_norm": 2.352830410003662, "learning_rate": 9.998689912888354e-06, "loss": 2.997, "step": 288900 }, { "epoch": 0.28466185383384696, "grad_norm": 2.3088693618774414, "learning_rate": 9.998686181568546e-06, "loss": 3.0273, "step": 288950 }, { "epoch": 0.284711111811669, "grad_norm": 2.1958441734313965, "learning_rate": 9.998682444943318e-06, "loss": 3.0763, "step": 289000 }, { "epoch": 0.2847603697894911, "grad_norm": 2.1652441024780273, "learning_rate": 9.998678703012673e-06, "loss": 3.1059, "step": 289050 }, { "epoch": 0.28480962776731317, "grad_norm": 2.351027488708496, "learning_rate": 9.998674955776614e-06, "loss": 3.0375, "step": 289100 }, { "epoch": 0.2848588857451353, "grad_norm": 2.4307076930999756, "learning_rate": 9.99867120323515e-06, "loss": 3.0258, "step": 289150 }, { "epoch": 0.28490814372295736, "grad_norm": 2.421461820602417, "learning_rate": 9.998667445388278e-06, "loss": 3.1098, "step": 289200 }, { "epoch": 0.28495740170077943, "grad_norm": 2.381002187728882, "learning_rate": 9.998663682236006e-06, "loss": 3.0194, "step": 289250 }, { "epoch": 0.28500665967860156, "grad_norm": 2.201969861984253, "learning_rate": 9.99865991377834e-06, "loss": 3.1069, "step": 289300 }, { "epoch": 0.28505591765642363, "grad_norm": 2.456117868423462, "learning_rate": 9.998656140015279e-06, "loss": 3.0396, "step": 289350 }, { "epoch": 0.2851051756342457, "grad_norm": 2.3484225273132324, "learning_rate": 9.99865236094683e-06, "loss": 3.0222, "step": 289400 }, { "epoch": 0.2851544336120678, "grad_norm": 2.276754856109619, "learning_rate": 9.998648576572994e-06, "loss": 3.0406, "step": 289450 }, { "epoch": 0.2852036915898899, "grad_norm": 2.3536970615386963, "learning_rate": 9.99864478689378e-06, "loss": 2.9742, "step": 289500 }, { "epoch": 0.28525294956771197, "grad_norm": 2.249814510345459, "learning_rate": 9.998640991909189e-06, "loss": 2.9907, "step": 289550 }, { "epoch": 0.2853022075455341, "grad_norm": 2.3742475509643555, "learning_rate": 9.998637191619223e-06, "loss": 3.0442, "step": 289600 }, { "epoch": 0.28535146552335616, "grad_norm": 2.253692150115967, "learning_rate": 9.99863338602389e-06, "loss": 3.0869, "step": 289650 }, { "epoch": 0.28540072350117823, "grad_norm": 2.336601972579956, "learning_rate": 9.998629575123193e-06, "loss": 3.1209, "step": 289700 }, { "epoch": 0.28544998147900036, "grad_norm": 2.5015978813171387, "learning_rate": 9.998625758917134e-06, "loss": 3.0614, "step": 289750 }, { "epoch": 0.28549923945682243, "grad_norm": 2.152068614959717, "learning_rate": 9.998621937405722e-06, "loss": 3.0552, "step": 289800 }, { "epoch": 0.2855484974346445, "grad_norm": 2.3531506061553955, "learning_rate": 9.998618110588954e-06, "loss": 3.0623, "step": 289850 }, { "epoch": 0.2855977554124666, "grad_norm": 2.273463726043701, "learning_rate": 9.998614278466838e-06, "loss": 3.0507, "step": 289900 }, { "epoch": 0.2856470133902887, "grad_norm": 2.2851500511169434, "learning_rate": 9.99861044103938e-06, "loss": 3.1008, "step": 289950 }, { "epoch": 0.28569627136811077, "grad_norm": 2.275754690170288, "learning_rate": 9.99860659830658e-06, "loss": 2.9778, "step": 290000 }, { "epoch": 0.2857455293459329, "grad_norm": 2.3485803604125977, "learning_rate": 9.998602750268444e-06, "loss": 2.9705, "step": 290050 }, { "epoch": 0.28579478732375496, "grad_norm": 2.177736520767212, "learning_rate": 9.998598896924976e-06, "loss": 3.0499, "step": 290100 }, { "epoch": 0.28584404530157703, "grad_norm": 2.284900188446045, "learning_rate": 9.99859503827618e-06, "loss": 3.0254, "step": 290150 }, { "epoch": 0.28589330327939916, "grad_norm": 2.2090563774108887, "learning_rate": 9.99859117432206e-06, "loss": 2.9789, "step": 290200 }, { "epoch": 0.28594256125722123, "grad_norm": 2.348712921142578, "learning_rate": 9.998587305062621e-06, "loss": 3.0074, "step": 290250 }, { "epoch": 0.2859918192350433, "grad_norm": 2.168501615524292, "learning_rate": 9.998583430497866e-06, "loss": 3.022, "step": 290300 }, { "epoch": 0.28604107721286537, "grad_norm": 2.227161407470703, "learning_rate": 9.9985795506278e-06, "loss": 3.035, "step": 290350 }, { "epoch": 0.2860903351906875, "grad_norm": 2.1885433197021484, "learning_rate": 9.998575665452427e-06, "loss": 2.9717, "step": 290400 }, { "epoch": 0.28613959316850957, "grad_norm": 2.3073792457580566, "learning_rate": 9.998571774971748e-06, "loss": 3.1001, "step": 290450 }, { "epoch": 0.28618885114633164, "grad_norm": 2.255302906036377, "learning_rate": 9.998567879185772e-06, "loss": 3.0153, "step": 290500 }, { "epoch": 0.28623810912415376, "grad_norm": 2.36064076423645, "learning_rate": 9.9985639780945e-06, "loss": 2.9695, "step": 290550 }, { "epoch": 0.28628736710197583, "grad_norm": 2.3862011432647705, "learning_rate": 9.998560071697939e-06, "loss": 3.0633, "step": 290600 }, { "epoch": 0.2863366250797979, "grad_norm": 2.2919342517852783, "learning_rate": 9.99855615999609e-06, "loss": 3.0401, "step": 290650 }, { "epoch": 0.28638588305762, "grad_norm": 2.4770870208740234, "learning_rate": 9.998552242988958e-06, "loss": 3.0774, "step": 290700 }, { "epoch": 0.2864351410354421, "grad_norm": 2.1949996948242188, "learning_rate": 9.998548320676547e-06, "loss": 3.0379, "step": 290750 }, { "epoch": 0.28648439901326417, "grad_norm": 2.3981010913848877, "learning_rate": 9.998544393058863e-06, "loss": 3.0624, "step": 290800 }, { "epoch": 0.2865336569910863, "grad_norm": 2.358799457550049, "learning_rate": 9.998540460135909e-06, "loss": 3.0046, "step": 290850 }, { "epoch": 0.28658291496890836, "grad_norm": 2.4417061805725098, "learning_rate": 9.998536521907688e-06, "loss": 3.0653, "step": 290900 }, { "epoch": 0.28663217294673043, "grad_norm": 2.6389684677124023, "learning_rate": 9.998532578374205e-06, "loss": 3.0171, "step": 290950 }, { "epoch": 0.28668143092455256, "grad_norm": 2.358948230743408, "learning_rate": 9.998528629535467e-06, "loss": 2.9944, "step": 291000 }, { "epoch": 0.28673068890237463, "grad_norm": 2.3416569232940674, "learning_rate": 9.998524675391473e-06, "loss": 3.0654, "step": 291050 }, { "epoch": 0.2867799468801967, "grad_norm": 2.2462575435638428, "learning_rate": 9.998520715942231e-06, "loss": 3.0886, "step": 291100 }, { "epoch": 0.2868292048580188, "grad_norm": 2.235870122909546, "learning_rate": 9.998516751187744e-06, "loss": 3.1541, "step": 291150 }, { "epoch": 0.2868784628358409, "grad_norm": 2.026491641998291, "learning_rate": 9.998512781128016e-06, "loss": 3.0491, "step": 291200 }, { "epoch": 0.28692772081366297, "grad_norm": 2.756687641143799, "learning_rate": 9.99850880576305e-06, "loss": 2.989, "step": 291250 }, { "epoch": 0.2869769787914851, "grad_norm": 2.439873456954956, "learning_rate": 9.998504825092854e-06, "loss": 3.129, "step": 291300 }, { "epoch": 0.28702623676930716, "grad_norm": 2.2006893157958984, "learning_rate": 9.998500839117429e-06, "loss": 3.0293, "step": 291350 }, { "epoch": 0.28707549474712923, "grad_norm": 2.4875404834747314, "learning_rate": 9.99849684783678e-06, "loss": 3.068, "step": 291400 }, { "epoch": 0.28712475272495136, "grad_norm": 2.3245279788970947, "learning_rate": 9.998492851250913e-06, "loss": 3.0295, "step": 291450 }, { "epoch": 0.28717401070277343, "grad_norm": 2.655789852142334, "learning_rate": 9.998488849359828e-06, "loss": 3.0825, "step": 291500 }, { "epoch": 0.2872232686805955, "grad_norm": 2.3727099895477295, "learning_rate": 9.998484842163533e-06, "loss": 3.0384, "step": 291550 }, { "epoch": 0.28727252665841757, "grad_norm": 2.3597002029418945, "learning_rate": 9.998480829662032e-06, "loss": 3.0726, "step": 291600 }, { "epoch": 0.2873217846362397, "grad_norm": 2.3375067710876465, "learning_rate": 9.998476811855325e-06, "loss": 2.9592, "step": 291650 }, { "epoch": 0.28737104261406177, "grad_norm": 2.2842366695404053, "learning_rate": 9.998472788743424e-06, "loss": 3.0099, "step": 291700 }, { "epoch": 0.28742030059188384, "grad_norm": 2.399832248687744, "learning_rate": 9.998468760326326e-06, "loss": 3.0611, "step": 291750 }, { "epoch": 0.28746955856970596, "grad_norm": 2.417965888977051, "learning_rate": 9.99846472660404e-06, "loss": 3.0718, "step": 291800 }, { "epoch": 0.28751881654752803, "grad_norm": 2.2810895442962646, "learning_rate": 9.998460687576567e-06, "loss": 3.094, "step": 291850 }, { "epoch": 0.2875680745253501, "grad_norm": 2.367478370666504, "learning_rate": 9.998456643243914e-06, "loss": 3.0856, "step": 291900 }, { "epoch": 0.28761733250317223, "grad_norm": 2.29252290725708, "learning_rate": 9.998452593606083e-06, "loss": 2.9614, "step": 291950 }, { "epoch": 0.2876665904809943, "grad_norm": 2.439150810241699, "learning_rate": 9.99844853866308e-06, "loss": 3.0131, "step": 292000 }, { "epoch": 0.28771584845881637, "grad_norm": 2.387671709060669, "learning_rate": 9.998444478414908e-06, "loss": 3.0589, "step": 292050 }, { "epoch": 0.2877651064366385, "grad_norm": 2.462489128112793, "learning_rate": 9.998440412861572e-06, "loss": 3.0246, "step": 292100 }, { "epoch": 0.28781436441446057, "grad_norm": 2.3577253818511963, "learning_rate": 9.998436342003077e-06, "loss": 3.0819, "step": 292150 }, { "epoch": 0.28786362239228264, "grad_norm": 2.3059709072113037, "learning_rate": 9.998432265839425e-06, "loss": 3.0599, "step": 292200 }, { "epoch": 0.28791288037010476, "grad_norm": 2.184478759765625, "learning_rate": 9.998428184370623e-06, "loss": 2.9718, "step": 292250 }, { "epoch": 0.28796213834792683, "grad_norm": 2.3547329902648926, "learning_rate": 9.998424097596674e-06, "loss": 3.0426, "step": 292300 }, { "epoch": 0.2880113963257489, "grad_norm": 2.318939685821533, "learning_rate": 9.998420005517583e-06, "loss": 3.0042, "step": 292350 }, { "epoch": 0.28806065430357103, "grad_norm": 2.2254958152770996, "learning_rate": 9.998415908133354e-06, "loss": 2.9885, "step": 292400 }, { "epoch": 0.2881099122813931, "grad_norm": 2.692089557647705, "learning_rate": 9.998411805443991e-06, "loss": 3.0373, "step": 292450 }, { "epoch": 0.28815917025921517, "grad_norm": 2.802527904510498, "learning_rate": 9.998407697449498e-06, "loss": 3.0506, "step": 292500 }, { "epoch": 0.2882084282370373, "grad_norm": 2.3120856285095215, "learning_rate": 9.998403584149881e-06, "loss": 3.0394, "step": 292550 }, { "epoch": 0.28825768621485937, "grad_norm": 2.6264266967773438, "learning_rate": 9.998399465545144e-06, "loss": 3.0542, "step": 292600 }, { "epoch": 0.28830694419268144, "grad_norm": 2.3126935958862305, "learning_rate": 9.998395341635288e-06, "loss": 3.0138, "step": 292650 }, { "epoch": 0.28835620217050356, "grad_norm": 2.3678417205810547, "learning_rate": 9.998391212420323e-06, "loss": 3.0341, "step": 292700 }, { "epoch": 0.28840546014832563, "grad_norm": 2.287625789642334, "learning_rate": 9.998387077900248e-06, "loss": 3.0277, "step": 292750 }, { "epoch": 0.2884547181261477, "grad_norm": 2.3791117668151855, "learning_rate": 9.998382938075072e-06, "loss": 3.0604, "step": 292800 }, { "epoch": 0.2885039761039698, "grad_norm": 2.245936870574951, "learning_rate": 9.998378792944795e-06, "loss": 3.0577, "step": 292850 }, { "epoch": 0.2885532340817919, "grad_norm": 2.310142993927002, "learning_rate": 9.998374642509427e-06, "loss": 3.0252, "step": 292900 }, { "epoch": 0.28860249205961397, "grad_norm": 2.2812869548797607, "learning_rate": 9.998370486768966e-06, "loss": 3.0281, "step": 292950 }, { "epoch": 0.28865175003743604, "grad_norm": 2.322847366333008, "learning_rate": 9.99836632572342e-06, "loss": 3.0176, "step": 293000 }, { "epoch": 0.28870100801525816, "grad_norm": 2.3665616512298584, "learning_rate": 9.998362159372795e-06, "loss": 3.0327, "step": 293050 }, { "epoch": 0.28875026599308024, "grad_norm": 2.3478260040283203, "learning_rate": 9.998357987717091e-06, "loss": 3.0172, "step": 293100 }, { "epoch": 0.2887995239709023, "grad_norm": 2.3367061614990234, "learning_rate": 9.998353810756316e-06, "loss": 3.0149, "step": 293150 }, { "epoch": 0.28884878194872443, "grad_norm": 2.296567916870117, "learning_rate": 9.998349628490472e-06, "loss": 2.987, "step": 293200 }, { "epoch": 0.2888980399265465, "grad_norm": 2.200796604156494, "learning_rate": 9.998345440919565e-06, "loss": 3.0011, "step": 293250 }, { "epoch": 0.28894729790436857, "grad_norm": 2.3108108043670654, "learning_rate": 9.9983412480436e-06, "loss": 3.0328, "step": 293300 }, { "epoch": 0.2889965558821907, "grad_norm": 2.2771708965301514, "learning_rate": 9.99833704986258e-06, "loss": 2.968, "step": 293350 }, { "epoch": 0.28904581386001277, "grad_norm": 2.2548904418945312, "learning_rate": 9.99833284637651e-06, "loss": 3.0709, "step": 293400 }, { "epoch": 0.28909507183783484, "grad_norm": 2.316235065460205, "learning_rate": 9.998328637585393e-06, "loss": 3.0418, "step": 293450 }, { "epoch": 0.28914432981565696, "grad_norm": 2.4283809661865234, "learning_rate": 9.998324423489238e-06, "loss": 2.9466, "step": 293500 }, { "epoch": 0.28919358779347903, "grad_norm": 2.586810827255249, "learning_rate": 9.998320204088044e-06, "loss": 3.072, "step": 293550 }, { "epoch": 0.2892428457713011, "grad_norm": 2.4635560512542725, "learning_rate": 9.998315979381818e-06, "loss": 3.0213, "step": 293600 }, { "epoch": 0.28929210374912323, "grad_norm": 3.2042336463928223, "learning_rate": 9.998311749370564e-06, "loss": 2.9962, "step": 293650 }, { "epoch": 0.2893413617269453, "grad_norm": 2.270669937133789, "learning_rate": 9.998307514054288e-06, "loss": 2.9994, "step": 293700 }, { "epoch": 0.28939061970476737, "grad_norm": 2.392421007156372, "learning_rate": 9.998303273432993e-06, "loss": 3.0089, "step": 293750 }, { "epoch": 0.2894398776825895, "grad_norm": 2.2320640087127686, "learning_rate": 9.998299027506683e-06, "loss": 3.0224, "step": 293800 }, { "epoch": 0.28948913566041157, "grad_norm": 2.44762921333313, "learning_rate": 9.998294776275365e-06, "loss": 3.0424, "step": 293850 }, { "epoch": 0.28953839363823364, "grad_norm": 2.2852888107299805, "learning_rate": 9.99829051973904e-06, "loss": 3.0864, "step": 293900 }, { "epoch": 0.28958765161605576, "grad_norm": 2.5310120582580566, "learning_rate": 9.998286257897714e-06, "loss": 3.0324, "step": 293950 }, { "epoch": 0.28963690959387783, "grad_norm": 2.533622980117798, "learning_rate": 9.998281990751394e-06, "loss": 3.0093, "step": 294000 }, { "epoch": 0.2896861675716999, "grad_norm": 2.4127769470214844, "learning_rate": 9.998277718300081e-06, "loss": 3.0144, "step": 294050 }, { "epoch": 0.289735425549522, "grad_norm": 2.397904872894287, "learning_rate": 9.99827344054378e-06, "loss": 3.0353, "step": 294100 }, { "epoch": 0.2897846835273441, "grad_norm": 3.5688552856445312, "learning_rate": 9.998269157482499e-06, "loss": 3.0347, "step": 294150 }, { "epoch": 0.28983394150516617, "grad_norm": 2.3525664806365967, "learning_rate": 9.99826486911624e-06, "loss": 3.0278, "step": 294200 }, { "epoch": 0.28988319948298824, "grad_norm": 2.272430658340454, "learning_rate": 9.998260575445005e-06, "loss": 2.9651, "step": 294250 }, { "epoch": 0.28993245746081037, "grad_norm": 2.218174457550049, "learning_rate": 9.998256276468803e-06, "loss": 2.9642, "step": 294300 }, { "epoch": 0.28998171543863244, "grad_norm": 2.1877808570861816, "learning_rate": 9.998251972187637e-06, "loss": 2.9469, "step": 294350 }, { "epoch": 0.2900309734164545, "grad_norm": 2.243525743484497, "learning_rate": 9.99824766260151e-06, "loss": 3.0118, "step": 294400 }, { "epoch": 0.29008023139427663, "grad_norm": 2.282008647918701, "learning_rate": 9.998243347710428e-06, "loss": 3.0374, "step": 294450 }, { "epoch": 0.2901294893720987, "grad_norm": 2.290919780731201, "learning_rate": 9.998239027514395e-06, "loss": 3.0776, "step": 294500 }, { "epoch": 0.2901787473499208, "grad_norm": 2.4914908409118652, "learning_rate": 9.998234702013419e-06, "loss": 3.0654, "step": 294550 }, { "epoch": 0.2902280053277429, "grad_norm": 2.331376075744629, "learning_rate": 9.998230371207499e-06, "loss": 3.0268, "step": 294600 }, { "epoch": 0.29027726330556497, "grad_norm": 2.17211651802063, "learning_rate": 9.998226035096642e-06, "loss": 3.0939, "step": 294650 }, { "epoch": 0.29032652128338704, "grad_norm": 2.363924026489258, "learning_rate": 9.998221693680854e-06, "loss": 3.0348, "step": 294700 }, { "epoch": 0.29037577926120917, "grad_norm": 2.2677977085113525, "learning_rate": 9.998217346960137e-06, "loss": 3.0658, "step": 294750 }, { "epoch": 0.29042503723903124, "grad_norm": 2.41581392288208, "learning_rate": 9.998212994934498e-06, "loss": 3.0152, "step": 294800 }, { "epoch": 0.2904742952168533, "grad_norm": 2.3087527751922607, "learning_rate": 9.99820863760394e-06, "loss": 3.096, "step": 294850 }, { "epoch": 0.29052355319467543, "grad_norm": 2.3304736614227295, "learning_rate": 9.998204274968468e-06, "loss": 3.0041, "step": 294900 }, { "epoch": 0.2905728111724975, "grad_norm": 2.237182378768921, "learning_rate": 9.998199907028088e-06, "loss": 2.9745, "step": 294950 }, { "epoch": 0.2906220691503196, "grad_norm": 2.222710371017456, "learning_rate": 9.998195533782802e-06, "loss": 3.06, "step": 295000 }, { "epoch": 0.2906713271281417, "grad_norm": 2.2881669998168945, "learning_rate": 9.998191155232617e-06, "loss": 3.132, "step": 295050 }, { "epoch": 0.29072058510596377, "grad_norm": 2.479964256286621, "learning_rate": 9.998186771377538e-06, "loss": 3.0139, "step": 295100 }, { "epoch": 0.29076984308378584, "grad_norm": 2.5020108222961426, "learning_rate": 9.998182382217567e-06, "loss": 3.0226, "step": 295150 }, { "epoch": 0.29081910106160797, "grad_norm": 2.3414788246154785, "learning_rate": 9.998177987752712e-06, "loss": 3.0322, "step": 295200 }, { "epoch": 0.29086835903943004, "grad_norm": 2.2535030841827393, "learning_rate": 9.998173587982974e-06, "loss": 3.005, "step": 295250 }, { "epoch": 0.2909176170172521, "grad_norm": 2.2909293174743652, "learning_rate": 9.99816918290836e-06, "loss": 3.0692, "step": 295300 }, { "epoch": 0.2909668749950742, "grad_norm": 2.2951035499572754, "learning_rate": 9.998164772528873e-06, "loss": 3.0997, "step": 295350 }, { "epoch": 0.2910161329728963, "grad_norm": 2.302123785018921, "learning_rate": 9.99816035684452e-06, "loss": 3.0218, "step": 295400 }, { "epoch": 0.2910653909507184, "grad_norm": 2.2479300498962402, "learning_rate": 9.998155935855304e-06, "loss": 3.0235, "step": 295450 }, { "epoch": 0.29111464892854044, "grad_norm": 2.2750980854034424, "learning_rate": 9.998151509561231e-06, "loss": 3.0321, "step": 295500 }, { "epoch": 0.29116390690636257, "grad_norm": 2.3885955810546875, "learning_rate": 9.998147077962305e-06, "loss": 3.0197, "step": 295550 }, { "epoch": 0.29121316488418464, "grad_norm": 2.306899309158325, "learning_rate": 9.99814264105853e-06, "loss": 2.9751, "step": 295600 }, { "epoch": 0.2912624228620067, "grad_norm": 2.3755881786346436, "learning_rate": 9.998138198849911e-06, "loss": 2.9904, "step": 295650 }, { "epoch": 0.29131168083982883, "grad_norm": 2.2048189640045166, "learning_rate": 9.998133751336453e-06, "loss": 3.0756, "step": 295700 }, { "epoch": 0.2913609388176509, "grad_norm": 2.330815553665161, "learning_rate": 9.99812929851816e-06, "loss": 3.0836, "step": 295750 }, { "epoch": 0.291410196795473, "grad_norm": 2.3507096767425537, "learning_rate": 9.99812484039504e-06, "loss": 2.9933, "step": 295800 }, { "epoch": 0.2914594547732951, "grad_norm": 2.127321481704712, "learning_rate": 9.998120376967093e-06, "loss": 3.0235, "step": 295850 }, { "epoch": 0.29150871275111717, "grad_norm": 2.209505081176758, "learning_rate": 9.998115908234326e-06, "loss": 2.932, "step": 295900 }, { "epoch": 0.29155797072893924, "grad_norm": 2.355177640914917, "learning_rate": 9.998111434196744e-06, "loss": 3.0603, "step": 295950 }, { "epoch": 0.29160722870676137, "grad_norm": 2.3396031856536865, "learning_rate": 9.998106954854353e-06, "loss": 3.0326, "step": 296000 }, { "epoch": 0.29165648668458344, "grad_norm": 2.326965808868408, "learning_rate": 9.998102470207155e-06, "loss": 2.9948, "step": 296050 }, { "epoch": 0.2917057446624055, "grad_norm": 2.3114209175109863, "learning_rate": 9.998097980255155e-06, "loss": 3.2299, "step": 296100 }, { "epoch": 0.29175500264022763, "grad_norm": 2.303318500518799, "learning_rate": 9.998093484998361e-06, "loss": 3.0625, "step": 296150 }, { "epoch": 0.2918042606180497, "grad_norm": 2.2999136447906494, "learning_rate": 9.998088984436772e-06, "loss": 3.0399, "step": 296200 }, { "epoch": 0.2918535185958718, "grad_norm": 2.250926971435547, "learning_rate": 9.9980844785704e-06, "loss": 3.0057, "step": 296250 }, { "epoch": 0.2919027765736939, "grad_norm": 2.2653074264526367, "learning_rate": 9.998079967399244e-06, "loss": 3.0406, "step": 296300 }, { "epoch": 0.29195203455151597, "grad_norm": 2.1483070850372314, "learning_rate": 9.99807545092331e-06, "loss": 3.0727, "step": 296350 }, { "epoch": 0.29200129252933804, "grad_norm": 2.2660086154937744, "learning_rate": 9.998070929142606e-06, "loss": 3.0068, "step": 296400 }, { "epoch": 0.29205055050716017, "grad_norm": 2.323580265045166, "learning_rate": 9.998066402057133e-06, "loss": 3.0377, "step": 296450 }, { "epoch": 0.29209980848498224, "grad_norm": 2.492438316345215, "learning_rate": 9.998061869666898e-06, "loss": 3.043, "step": 296500 }, { "epoch": 0.2921490664628043, "grad_norm": 2.421921491622925, "learning_rate": 9.998057331971903e-06, "loss": 3.0073, "step": 296550 }, { "epoch": 0.2921983244406264, "grad_norm": 2.3024890422821045, "learning_rate": 9.998052788972158e-06, "loss": 2.9749, "step": 296600 }, { "epoch": 0.2922475824184485, "grad_norm": 2.280564069747925, "learning_rate": 9.998048240667662e-06, "loss": 3.039, "step": 296650 }, { "epoch": 0.2922968403962706, "grad_norm": 2.519284725189209, "learning_rate": 9.998043687058423e-06, "loss": 2.9565, "step": 296700 }, { "epoch": 0.29234609837409264, "grad_norm": 2.4160988330841064, "learning_rate": 9.998039128144447e-06, "loss": 3.0407, "step": 296750 }, { "epoch": 0.29239535635191477, "grad_norm": 2.5507562160491943, "learning_rate": 9.998034563925737e-06, "loss": 3.0224, "step": 296800 }, { "epoch": 0.29244461432973684, "grad_norm": 2.036102294921875, "learning_rate": 9.998029994402297e-06, "loss": 2.9972, "step": 296850 }, { "epoch": 0.2924938723075589, "grad_norm": 2.2396790981292725, "learning_rate": 9.998025419574131e-06, "loss": 3.0048, "step": 296900 }, { "epoch": 0.29254313028538104, "grad_norm": 2.4221136569976807, "learning_rate": 9.998020839441246e-06, "loss": 3.0073, "step": 296950 }, { "epoch": 0.2925923882632031, "grad_norm": 2.4926257133483887, "learning_rate": 9.99801625400365e-06, "loss": 3.036, "step": 297000 }, { "epoch": 0.2926416462410252, "grad_norm": 2.058344841003418, "learning_rate": 9.998011663261343e-06, "loss": 3.0355, "step": 297050 }, { "epoch": 0.2926909042188473, "grad_norm": 2.4081218242645264, "learning_rate": 9.99800706721433e-06, "loss": 3.065, "step": 297100 }, { "epoch": 0.2927401621966694, "grad_norm": 2.359699249267578, "learning_rate": 9.998002465862619e-06, "loss": 2.9612, "step": 297150 }, { "epoch": 0.29278942017449144, "grad_norm": 2.3137311935424805, "learning_rate": 9.997997859206212e-06, "loss": 2.9654, "step": 297200 }, { "epoch": 0.29283867815231357, "grad_norm": 2.3155648708343506, "learning_rate": 9.997993247245115e-06, "loss": 3.0773, "step": 297250 }, { "epoch": 0.29288793613013564, "grad_norm": 2.27374529838562, "learning_rate": 9.997988629979334e-06, "loss": 2.9492, "step": 297300 }, { "epoch": 0.2929371941079577, "grad_norm": 2.234924554824829, "learning_rate": 9.997984007408871e-06, "loss": 3.0349, "step": 297350 }, { "epoch": 0.29298645208577984, "grad_norm": 2.404151439666748, "learning_rate": 9.997979379533733e-06, "loss": 3.0389, "step": 297400 }, { "epoch": 0.2930357100636019, "grad_norm": 2.3502793312072754, "learning_rate": 9.997974746353925e-06, "loss": 3.0308, "step": 297450 }, { "epoch": 0.293084968041424, "grad_norm": 2.1905019283294678, "learning_rate": 9.997970107869452e-06, "loss": 3.0136, "step": 297500 }, { "epoch": 0.2931342260192461, "grad_norm": 2.6126620769500732, "learning_rate": 9.997965464080318e-06, "loss": 2.9793, "step": 297550 }, { "epoch": 0.2931834839970682, "grad_norm": 2.20261549949646, "learning_rate": 9.997960814986528e-06, "loss": 2.9788, "step": 297600 }, { "epoch": 0.29323274197489024, "grad_norm": 2.3823049068450928, "learning_rate": 9.997956160588088e-06, "loss": 2.9969, "step": 297650 }, { "epoch": 0.29328199995271237, "grad_norm": 2.961604595184326, "learning_rate": 9.997951500885e-06, "loss": 2.9895, "step": 297700 }, { "epoch": 0.29333125793053444, "grad_norm": 2.2704825401306152, "learning_rate": 9.997946835877275e-06, "loss": 3.0208, "step": 297750 }, { "epoch": 0.2933805159083565, "grad_norm": 2.2912776470184326, "learning_rate": 9.99794216556491e-06, "loss": 2.97, "step": 297800 }, { "epoch": 0.2934297738861786, "grad_norm": 2.3277502059936523, "learning_rate": 9.997937489947917e-06, "loss": 3.0822, "step": 297850 }, { "epoch": 0.2934790318640007, "grad_norm": 2.2956442832946777, "learning_rate": 9.997932809026298e-06, "loss": 3.0024, "step": 297900 }, { "epoch": 0.2935282898418228, "grad_norm": 2.7489371299743652, "learning_rate": 9.997928122800056e-06, "loss": 3.0273, "step": 297950 }, { "epoch": 0.29357754781964485, "grad_norm": 2.4630348682403564, "learning_rate": 9.9979234312692e-06, "loss": 2.9668, "step": 298000 }, { "epoch": 0.29362680579746697, "grad_norm": 2.318434953689575, "learning_rate": 9.997918734433733e-06, "loss": 3.0377, "step": 298050 }, { "epoch": 0.29367606377528904, "grad_norm": 2.313265085220337, "learning_rate": 9.99791403229366e-06, "loss": 3.0475, "step": 298100 }, { "epoch": 0.2937253217531111, "grad_norm": 2.316800832748413, "learning_rate": 9.997909324848983e-06, "loss": 3.0248, "step": 298150 }, { "epoch": 0.29377457973093324, "grad_norm": 2.141268014907837, "learning_rate": 9.997904612099712e-06, "loss": 3.0027, "step": 298200 }, { "epoch": 0.2938238377087553, "grad_norm": 2.2197372913360596, "learning_rate": 9.997899894045851e-06, "loss": 3.0501, "step": 298250 }, { "epoch": 0.2938730956865774, "grad_norm": 2.395688533782959, "learning_rate": 9.997895170687402e-06, "loss": 3.0357, "step": 298300 }, { "epoch": 0.2939223536643995, "grad_norm": 2.3097569942474365, "learning_rate": 9.997890442024375e-06, "loss": 3.1222, "step": 298350 }, { "epoch": 0.2939716116422216, "grad_norm": 2.4284706115722656, "learning_rate": 9.997885708056769e-06, "loss": 3.0151, "step": 298400 }, { "epoch": 0.29402086962004365, "grad_norm": 2.2458603382110596, "learning_rate": 9.997880968784592e-06, "loss": 3.0383, "step": 298450 }, { "epoch": 0.29407012759786577, "grad_norm": 2.279085159301758, "learning_rate": 9.997876224207851e-06, "loss": 3.0116, "step": 298500 }, { "epoch": 0.29411938557568784, "grad_norm": 2.117119073867798, "learning_rate": 9.997871474326548e-06, "loss": 3.0286, "step": 298550 }, { "epoch": 0.2941686435535099, "grad_norm": 2.2228713035583496, "learning_rate": 9.99786671914069e-06, "loss": 3.067, "step": 298600 }, { "epoch": 0.29421790153133204, "grad_norm": 2.3333425521850586, "learning_rate": 9.99786195865028e-06, "loss": 3.0201, "step": 298650 }, { "epoch": 0.2942671595091541, "grad_norm": 2.3131484985351562, "learning_rate": 9.997857192855325e-06, "loss": 2.988, "step": 298700 }, { "epoch": 0.2943164174869762, "grad_norm": 2.408461809158325, "learning_rate": 9.997852421755828e-06, "loss": 3.0499, "step": 298750 }, { "epoch": 0.2943656754647983, "grad_norm": 2.2897918224334717, "learning_rate": 9.997847645351795e-06, "loss": 2.9665, "step": 298800 }, { "epoch": 0.2944149334426204, "grad_norm": 2.4277658462524414, "learning_rate": 9.997842863643232e-06, "loss": 3.0504, "step": 298850 }, { "epoch": 0.29446419142044244, "grad_norm": 2.4914588928222656, "learning_rate": 9.997838076630146e-06, "loss": 2.999, "step": 298900 }, { "epoch": 0.2945134493982645, "grad_norm": 2.178740978240967, "learning_rate": 9.997833284312536e-06, "loss": 2.9704, "step": 298950 }, { "epoch": 0.29456270737608664, "grad_norm": 2.179828405380249, "learning_rate": 9.997828486690413e-06, "loss": 3.0652, "step": 299000 }, { "epoch": 0.2946119653539087, "grad_norm": 2.3757736682891846, "learning_rate": 9.997823683763778e-06, "loss": 3.089, "step": 299050 }, { "epoch": 0.2946612233317308, "grad_norm": 2.786357879638672, "learning_rate": 9.99781887553264e-06, "loss": 3.0685, "step": 299100 }, { "epoch": 0.2947104813095529, "grad_norm": 2.277616262435913, "learning_rate": 9.997814061996999e-06, "loss": 3.0256, "step": 299150 }, { "epoch": 0.294759739287375, "grad_norm": 2.2985942363739014, "learning_rate": 9.997809243156865e-06, "loss": 3.053, "step": 299200 }, { "epoch": 0.29480899726519705, "grad_norm": 2.1226439476013184, "learning_rate": 9.99780441901224e-06, "loss": 3.0519, "step": 299250 }, { "epoch": 0.2948582552430192, "grad_norm": 2.2674336433410645, "learning_rate": 9.997799589563132e-06, "loss": 3.0319, "step": 299300 }, { "epoch": 0.29490751322084124, "grad_norm": 2.2621166706085205, "learning_rate": 9.997794754809543e-06, "loss": 2.9876, "step": 299350 }, { "epoch": 0.2949567711986633, "grad_norm": 2.2113723754882812, "learning_rate": 9.99778991475148e-06, "loss": 2.9922, "step": 299400 }, { "epoch": 0.29500602917648544, "grad_norm": 2.239797353744507, "learning_rate": 9.997785069388948e-06, "loss": 3.0752, "step": 299450 }, { "epoch": 0.2950552871543075, "grad_norm": 2.2194085121154785, "learning_rate": 9.997780218721952e-06, "loss": 3.1058, "step": 299500 }, { "epoch": 0.2951045451321296, "grad_norm": 2.343980312347412, "learning_rate": 9.997775362750497e-06, "loss": 3.0814, "step": 299550 }, { "epoch": 0.2951538031099517, "grad_norm": 2.3087575435638428, "learning_rate": 9.997770501474588e-06, "loss": 3.0418, "step": 299600 }, { "epoch": 0.2952030610877738, "grad_norm": 2.37811279296875, "learning_rate": 9.99776563489423e-06, "loss": 3.0077, "step": 299650 }, { "epoch": 0.29525231906559585, "grad_norm": 2.32179594039917, "learning_rate": 9.997760763009427e-06, "loss": 2.9622, "step": 299700 }, { "epoch": 0.295301577043418, "grad_norm": 2.497282028198242, "learning_rate": 9.997755885820188e-06, "loss": 3.0178, "step": 299750 }, { "epoch": 0.29535083502124004, "grad_norm": 2.214566707611084, "learning_rate": 9.997751003326516e-06, "loss": 2.9933, "step": 299800 }, { "epoch": 0.2954000929990621, "grad_norm": 2.4989564418792725, "learning_rate": 9.997746115528414e-06, "loss": 3.0463, "step": 299850 }, { "epoch": 0.29544935097688424, "grad_norm": 2.462315320968628, "learning_rate": 9.99774122242589e-06, "loss": 3.022, "step": 299900 }, { "epoch": 0.2954986089547063, "grad_norm": 2.3170173168182373, "learning_rate": 9.997736324018948e-06, "loss": 3.0488, "step": 299950 }, { "epoch": 0.2955478669325284, "grad_norm": 2.4481375217437744, "learning_rate": 9.997731420307593e-06, "loss": 3.0538, "step": 300000 }, { "epoch": 0.2955971249103505, "grad_norm": 2.29868483543396, "learning_rate": 9.997726511291832e-06, "loss": 3.0856, "step": 300050 }, { "epoch": 0.2956463828881726, "grad_norm": 2.501415252685547, "learning_rate": 9.997721596971669e-06, "loss": 3.0964, "step": 300100 }, { "epoch": 0.29569564086599465, "grad_norm": 2.195765733718872, "learning_rate": 9.99771667734711e-06, "loss": 3.0413, "step": 300150 }, { "epoch": 0.2957448988438167, "grad_norm": 2.2101051807403564, "learning_rate": 9.997711752418157e-06, "loss": 3.0551, "step": 300200 }, { "epoch": 0.29579415682163884, "grad_norm": 2.2774300575256348, "learning_rate": 9.99770682218482e-06, "loss": 2.9889, "step": 300250 }, { "epoch": 0.2958434147994609, "grad_norm": 2.167348623275757, "learning_rate": 9.9977018866471e-06, "loss": 2.9904, "step": 300300 }, { "epoch": 0.295892672777283, "grad_norm": 2.172355890274048, "learning_rate": 9.997696945805005e-06, "loss": 2.9973, "step": 300350 }, { "epoch": 0.2959419307551051, "grad_norm": 2.1638593673706055, "learning_rate": 9.997691999658538e-06, "loss": 3.0649, "step": 300400 }, { "epoch": 0.2959911887329272, "grad_norm": 2.2954022884368896, "learning_rate": 9.997687048207708e-06, "loss": 3.0624, "step": 300450 }, { "epoch": 0.29604044671074925, "grad_norm": 2.4686570167541504, "learning_rate": 9.997682091452516e-06, "loss": 3.0161, "step": 300500 }, { "epoch": 0.2960897046885714, "grad_norm": 2.515688896179199, "learning_rate": 9.99767712939297e-06, "loss": 2.9986, "step": 300550 }, { "epoch": 0.29613896266639345, "grad_norm": 2.2803728580474854, "learning_rate": 9.997672162029074e-06, "loss": 2.9414, "step": 300600 }, { "epoch": 0.2961882206442155, "grad_norm": 2.281238555908203, "learning_rate": 9.997667189360833e-06, "loss": 3.0179, "step": 300650 }, { "epoch": 0.29623747862203764, "grad_norm": 2.2644777297973633, "learning_rate": 9.997662211388253e-06, "loss": 3.0369, "step": 300700 }, { "epoch": 0.2962867365998597, "grad_norm": 2.4472367763519287, "learning_rate": 9.997657228111341e-06, "loss": 3.0525, "step": 300750 }, { "epoch": 0.2963359945776818, "grad_norm": 2.2760369777679443, "learning_rate": 9.997652239530099e-06, "loss": 2.9871, "step": 300800 }, { "epoch": 0.2963852525555039, "grad_norm": 2.277613639831543, "learning_rate": 9.997647245644534e-06, "loss": 3.0682, "step": 300850 }, { "epoch": 0.296434510533326, "grad_norm": 2.2597336769104004, "learning_rate": 9.997642246454652e-06, "loss": 3.0561, "step": 300900 }, { "epoch": 0.29648376851114805, "grad_norm": 2.398033857345581, "learning_rate": 9.997637241960457e-06, "loss": 2.9909, "step": 300950 }, { "epoch": 0.2965330264889702, "grad_norm": 2.448765277862549, "learning_rate": 9.997632232161954e-06, "loss": 2.9499, "step": 301000 }, { "epoch": 0.29658228446679225, "grad_norm": 2.232977867126465, "learning_rate": 9.99762721705915e-06, "loss": 3.0373, "step": 301050 }, { "epoch": 0.2966315424446143, "grad_norm": 2.3032796382904053, "learning_rate": 9.997622196652047e-06, "loss": 3.0336, "step": 301100 }, { "epoch": 0.29668080042243644, "grad_norm": 2.2576305866241455, "learning_rate": 9.997617170940655e-06, "loss": 3.1268, "step": 301150 }, { "epoch": 0.2967300584002585, "grad_norm": 2.4217112064361572, "learning_rate": 9.997612139924978e-06, "loss": 3.0473, "step": 301200 }, { "epoch": 0.2967793163780806, "grad_norm": 2.3375320434570312, "learning_rate": 9.99760710360502e-06, "loss": 3.044, "step": 301250 }, { "epoch": 0.2968285743559027, "grad_norm": 2.3312928676605225, "learning_rate": 9.997602061980783e-06, "loss": 3.0174, "step": 301300 }, { "epoch": 0.2968778323337248, "grad_norm": 2.4094583988189697, "learning_rate": 9.99759701505228e-06, "loss": 3.0034, "step": 301350 }, { "epoch": 0.29692709031154685, "grad_norm": 2.443190813064575, "learning_rate": 9.997591962819511e-06, "loss": 3.1342, "step": 301400 }, { "epoch": 0.2969763482893689, "grad_norm": 2.2841074466705322, "learning_rate": 9.997586905282482e-06, "loss": 3.0267, "step": 301450 }, { "epoch": 0.29702560626719104, "grad_norm": 2.1871328353881836, "learning_rate": 9.9975818424412e-06, "loss": 3.0401, "step": 301500 }, { "epoch": 0.2970748642450131, "grad_norm": 2.2660679817199707, "learning_rate": 9.99757677429567e-06, "loss": 3.0046, "step": 301550 }, { "epoch": 0.2971241222228352, "grad_norm": 2.409891366958618, "learning_rate": 9.997571700845897e-06, "loss": 3.0434, "step": 301600 }, { "epoch": 0.2971733802006573, "grad_norm": 2.324989080429077, "learning_rate": 9.997566622091885e-06, "loss": 3.0596, "step": 301650 }, { "epoch": 0.2972226381784794, "grad_norm": 2.4707205295562744, "learning_rate": 9.997561538033641e-06, "loss": 3.0022, "step": 301700 }, { "epoch": 0.29727189615630145, "grad_norm": 2.341087579727173, "learning_rate": 9.997556448671172e-06, "loss": 3.0307, "step": 301750 }, { "epoch": 0.2973211541341236, "grad_norm": 2.1764256954193115, "learning_rate": 9.99755135400448e-06, "loss": 3.0554, "step": 301800 }, { "epoch": 0.29737041211194565, "grad_norm": 2.3018980026245117, "learning_rate": 9.997546254033571e-06, "loss": 3.0157, "step": 301850 }, { "epoch": 0.2974196700897677, "grad_norm": 2.4313461780548096, "learning_rate": 9.997541148758453e-06, "loss": 3.039, "step": 301900 }, { "epoch": 0.29746892806758984, "grad_norm": 2.457601547241211, "learning_rate": 9.997536038179126e-06, "loss": 3.0171, "step": 301950 }, { "epoch": 0.2975181860454119, "grad_norm": 2.3218934535980225, "learning_rate": 9.997530922295603e-06, "loss": 3.0604, "step": 302000 }, { "epoch": 0.297567444023234, "grad_norm": 2.5782957077026367, "learning_rate": 9.997525801107884e-06, "loss": 3.0283, "step": 302050 }, { "epoch": 0.2976167020010561, "grad_norm": 2.3540420532226562, "learning_rate": 9.997520674615977e-06, "loss": 3.0335, "step": 302100 }, { "epoch": 0.2976659599788782, "grad_norm": 2.3962111473083496, "learning_rate": 9.997515542819887e-06, "loss": 3.0302, "step": 302150 }, { "epoch": 0.29771521795670025, "grad_norm": Infinity, "learning_rate": 9.997510405719618e-06, "loss": 2.99, "step": 302200 }, { "epoch": 0.2977644759345224, "grad_norm": 2.158447027206421, "learning_rate": 9.997505263315175e-06, "loss": 3.0011, "step": 302250 }, { "epoch": 0.29781373391234445, "grad_norm": 2.385204553604126, "learning_rate": 9.997500115606565e-06, "loss": 3.002, "step": 302300 }, { "epoch": 0.2978629918901665, "grad_norm": 2.5355417728424072, "learning_rate": 9.997494962593795e-06, "loss": 3.0532, "step": 302350 }, { "epoch": 0.29791224986798864, "grad_norm": 2.195981025695801, "learning_rate": 9.997489804276867e-06, "loss": 3.019, "step": 302400 }, { "epoch": 0.2979615078458107, "grad_norm": 2.195563316345215, "learning_rate": 9.997484640655789e-06, "loss": 3.0426, "step": 302450 }, { "epoch": 0.2980107658236328, "grad_norm": 2.2413201332092285, "learning_rate": 9.997479471730568e-06, "loss": 2.9683, "step": 302500 }, { "epoch": 0.2980600238014549, "grad_norm": 2.1960880756378174, "learning_rate": 9.997474297501204e-06, "loss": 3.0467, "step": 302550 }, { "epoch": 0.298109281779277, "grad_norm": 2.2786059379577637, "learning_rate": 9.997469117967706e-06, "loss": 3.0152, "step": 302600 }, { "epoch": 0.29815853975709905, "grad_norm": 2.312403440475464, "learning_rate": 9.997463933130081e-06, "loss": 3.0685, "step": 302650 }, { "epoch": 0.2982077977349211, "grad_norm": 2.3801369667053223, "learning_rate": 9.99745874298833e-06, "loss": 2.9402, "step": 302700 }, { "epoch": 0.29825705571274325, "grad_norm": 2.3448798656463623, "learning_rate": 9.997453547542463e-06, "loss": 3.0788, "step": 302750 }, { "epoch": 0.2983063136905653, "grad_norm": 2.1296544075012207, "learning_rate": 9.997448346792483e-06, "loss": 3.0594, "step": 302800 }, { "epoch": 0.2983555716683874, "grad_norm": 2.3189103603363037, "learning_rate": 9.997443140738398e-06, "loss": 3.0242, "step": 302850 }, { "epoch": 0.2984048296462095, "grad_norm": 2.3028197288513184, "learning_rate": 9.99743792938021e-06, "loss": 3.0274, "step": 302900 }, { "epoch": 0.2984540876240316, "grad_norm": 3.1368179321289062, "learning_rate": 9.997432712717926e-06, "loss": 3.0559, "step": 302950 }, { "epoch": 0.29850334560185365, "grad_norm": 2.34022855758667, "learning_rate": 9.997427490751553e-06, "loss": 3.0227, "step": 303000 }, { "epoch": 0.2985526035796758, "grad_norm": 2.2709569931030273, "learning_rate": 9.997422263481093e-06, "loss": 3.0003, "step": 303050 }, { "epoch": 0.29860186155749785, "grad_norm": 2.143012523651123, "learning_rate": 9.997417030906557e-06, "loss": 3.0219, "step": 303100 }, { "epoch": 0.2986511195353199, "grad_norm": 2.2246391773223877, "learning_rate": 9.997411793027945e-06, "loss": 3.0016, "step": 303150 }, { "epoch": 0.29870037751314205, "grad_norm": 2.131103038787842, "learning_rate": 9.997406549845266e-06, "loss": 3.0385, "step": 303200 }, { "epoch": 0.2987496354909641, "grad_norm": 2.262812614440918, "learning_rate": 9.997401301358525e-06, "loss": 3.0711, "step": 303250 }, { "epoch": 0.2987988934687862, "grad_norm": 2.2424211502075195, "learning_rate": 9.997396047567726e-06, "loss": 2.989, "step": 303300 }, { "epoch": 0.2988481514466083, "grad_norm": 2.405256509780884, "learning_rate": 9.997390788472877e-06, "loss": 3.0722, "step": 303350 }, { "epoch": 0.2988974094244304, "grad_norm": 2.139882802963257, "learning_rate": 9.997385524073981e-06, "loss": 3.0068, "step": 303400 }, { "epoch": 0.29894666740225245, "grad_norm": 2.2563867568969727, "learning_rate": 9.997380254371047e-06, "loss": 3.0265, "step": 303450 }, { "epoch": 0.2989959253800746, "grad_norm": 2.5227653980255127, "learning_rate": 9.997374979364077e-06, "loss": 3.0312, "step": 303500 }, { "epoch": 0.29904518335789665, "grad_norm": 2.6186342239379883, "learning_rate": 9.997369699053078e-06, "loss": 3.0304, "step": 303550 }, { "epoch": 0.2990944413357187, "grad_norm": 2.1977999210357666, "learning_rate": 9.997364413438054e-06, "loss": 2.9144, "step": 303600 }, { "epoch": 0.29914369931354085, "grad_norm": 2.3784339427948, "learning_rate": 9.997359122519014e-06, "loss": 2.9665, "step": 303650 }, { "epoch": 0.2991929572913629, "grad_norm": 2.453524589538574, "learning_rate": 9.997353826295963e-06, "loss": 2.9995, "step": 303700 }, { "epoch": 0.299242215269185, "grad_norm": 2.345092535018921, "learning_rate": 9.997348524768905e-06, "loss": 2.9761, "step": 303750 }, { "epoch": 0.2992914732470071, "grad_norm": 2.3803884983062744, "learning_rate": 9.997343217937845e-06, "loss": 3.0774, "step": 303800 }, { "epoch": 0.2993407312248292, "grad_norm": 2.3345894813537598, "learning_rate": 9.99733790580279e-06, "loss": 3.053, "step": 303850 }, { "epoch": 0.29938998920265125, "grad_norm": 2.235635757446289, "learning_rate": 9.997332588363746e-06, "loss": 3.0229, "step": 303900 }, { "epoch": 0.2994392471804733, "grad_norm": 2.2956478595733643, "learning_rate": 9.997327265620719e-06, "loss": 3.085, "step": 303950 }, { "epoch": 0.29948850515829545, "grad_norm": 2.322538375854492, "learning_rate": 9.997321937573713e-06, "loss": 2.9686, "step": 304000 }, { "epoch": 0.2995377631361175, "grad_norm": 2.379397392272949, "learning_rate": 9.997316604222734e-06, "loss": 3.0073, "step": 304050 }, { "epoch": 0.2995870211139396, "grad_norm": 2.220425844192505, "learning_rate": 9.997311265567787e-06, "loss": 3.0213, "step": 304100 }, { "epoch": 0.2996362790917617, "grad_norm": 2.5949504375457764, "learning_rate": 9.997305921608881e-06, "loss": 3.0337, "step": 304150 }, { "epoch": 0.2996855370695838, "grad_norm": 2.1956796646118164, "learning_rate": 9.997300572346019e-06, "loss": 3.0389, "step": 304200 }, { "epoch": 0.29973479504740586, "grad_norm": 2.2598912715911865, "learning_rate": 9.997295217779204e-06, "loss": 3.024, "step": 304250 }, { "epoch": 0.299784053025228, "grad_norm": 2.486750364303589, "learning_rate": 9.997289857908447e-06, "loss": 3.0262, "step": 304300 }, { "epoch": 0.29983331100305005, "grad_norm": 2.367497682571411, "learning_rate": 9.997284492733753e-06, "loss": 3.0634, "step": 304350 }, { "epoch": 0.2998825689808721, "grad_norm": 2.3332293033599854, "learning_rate": 9.997279122255124e-06, "loss": 2.9679, "step": 304400 }, { "epoch": 0.29993182695869425, "grad_norm": 2.4721522331237793, "learning_rate": 9.997273746472567e-06, "loss": 3.0169, "step": 304450 }, { "epoch": 0.2999810849365163, "grad_norm": 2.169637441635132, "learning_rate": 9.99726836538609e-06, "loss": 3.0456, "step": 304500 }, { "epoch": 0.3000303429143384, "grad_norm": 2.381326675415039, "learning_rate": 9.997262978995698e-06, "loss": 3.0523, "step": 304550 }, { "epoch": 0.3000796008921605, "grad_norm": 2.2650561332702637, "learning_rate": 9.997257587301394e-06, "loss": 3.0069, "step": 304600 }, { "epoch": 0.3001288588699826, "grad_norm": 2.2628531455993652, "learning_rate": 9.997252190303187e-06, "loss": 3.0396, "step": 304650 }, { "epoch": 0.30017811684780465, "grad_norm": 2.180102825164795, "learning_rate": 9.997246788001082e-06, "loss": 3.0319, "step": 304700 }, { "epoch": 0.3002273748256268, "grad_norm": 2.3927290439605713, "learning_rate": 9.997241380395082e-06, "loss": 3.0116, "step": 304750 }, { "epoch": 0.30027663280344885, "grad_norm": 2.2854747772216797, "learning_rate": 9.997235967485197e-06, "loss": 3.0758, "step": 304800 }, { "epoch": 0.3003258907812709, "grad_norm": 2.2865076065063477, "learning_rate": 9.997230549271429e-06, "loss": 3.039, "step": 304850 }, { "epoch": 0.30037514875909305, "grad_norm": 2.3569352626800537, "learning_rate": 9.997225125753787e-06, "loss": 3.0039, "step": 304900 }, { "epoch": 0.3004244067369151, "grad_norm": 2.228971242904663, "learning_rate": 9.997219696932273e-06, "loss": 2.9715, "step": 304950 }, { "epoch": 0.3004736647147372, "grad_norm": 2.445197820663452, "learning_rate": 9.997214262806896e-06, "loss": 2.928, "step": 305000 }, { "epoch": 0.3005229226925593, "grad_norm": 2.448639392852783, "learning_rate": 9.997208823377662e-06, "loss": 3.0986, "step": 305050 }, { "epoch": 0.3005721806703814, "grad_norm": 2.1995644569396973, "learning_rate": 9.997203378644574e-06, "loss": 3.0189, "step": 305100 }, { "epoch": 0.30062143864820345, "grad_norm": 2.227952480316162, "learning_rate": 9.99719792860764e-06, "loss": 3.0393, "step": 305150 }, { "epoch": 0.3006706966260255, "grad_norm": 2.2867395877838135, "learning_rate": 9.997192473266864e-06, "loss": 2.9946, "step": 305200 }, { "epoch": 0.30071995460384765, "grad_norm": 2.5173959732055664, "learning_rate": 9.99718701262225e-06, "loss": 3.0137, "step": 305250 }, { "epoch": 0.3007692125816697, "grad_norm": 2.2430453300476074, "learning_rate": 9.997181546673812e-06, "loss": 3.0535, "step": 305300 }, { "epoch": 0.3008184705594918, "grad_norm": 2.380549430847168, "learning_rate": 9.997176075421547e-06, "loss": 3.0731, "step": 305350 }, { "epoch": 0.3008677285373139, "grad_norm": 2.1974780559539795, "learning_rate": 9.997170598865464e-06, "loss": 2.938, "step": 305400 }, { "epoch": 0.300916986515136, "grad_norm": 2.898636817932129, "learning_rate": 9.997165117005573e-06, "loss": 3.0589, "step": 305450 }, { "epoch": 0.30096624449295806, "grad_norm": 2.457097053527832, "learning_rate": 9.997159629841871e-06, "loss": 3.0604, "step": 305500 }, { "epoch": 0.3010155024707802, "grad_norm": 2.1827590465545654, "learning_rate": 9.997154137374372e-06, "loss": 3.0812, "step": 305550 }, { "epoch": 0.30106476044860225, "grad_norm": 2.3152217864990234, "learning_rate": 9.997148639603078e-06, "loss": 3.0249, "step": 305600 }, { "epoch": 0.3011140184264243, "grad_norm": 2.355703115463257, "learning_rate": 9.997143136527995e-06, "loss": 3.0395, "step": 305650 }, { "epoch": 0.30116327640424645, "grad_norm": 2.1953670978546143, "learning_rate": 9.997137628149126e-06, "loss": 2.9921, "step": 305700 }, { "epoch": 0.3012125343820685, "grad_norm": 2.4789483547210693, "learning_rate": 9.997132114466483e-06, "loss": 2.9684, "step": 305750 }, { "epoch": 0.3012617923598906, "grad_norm": 2.213282346725464, "learning_rate": 9.99712659548007e-06, "loss": 3.0086, "step": 305800 }, { "epoch": 0.3013110503377127, "grad_norm": 2.323824882507324, "learning_rate": 9.99712107118989e-06, "loss": 3.0564, "step": 305850 }, { "epoch": 0.3013603083155348, "grad_norm": 2.210498809814453, "learning_rate": 9.99711554159595e-06, "loss": 2.9832, "step": 305900 }, { "epoch": 0.30140956629335686, "grad_norm": 2.251371145248413, "learning_rate": 9.997110006698257e-06, "loss": 3.0268, "step": 305950 }, { "epoch": 0.301458824271179, "grad_norm": 2.3328423500061035, "learning_rate": 9.997104466496816e-06, "loss": 3.0216, "step": 306000 }, { "epoch": 0.30150808224900105, "grad_norm": 3.0025675296783447, "learning_rate": 9.997098920991635e-06, "loss": 3.0037, "step": 306050 }, { "epoch": 0.3015573402268231, "grad_norm": 2.229220390319824, "learning_rate": 9.997093370182716e-06, "loss": 3.067, "step": 306100 }, { "epoch": 0.30160659820464525, "grad_norm": 2.289638042449951, "learning_rate": 9.997087814070068e-06, "loss": 2.9948, "step": 306150 }, { "epoch": 0.3016558561824673, "grad_norm": 2.301940441131592, "learning_rate": 9.997082252653694e-06, "loss": 3.0275, "step": 306200 }, { "epoch": 0.3017051141602894, "grad_norm": 2.6628968715667725, "learning_rate": 9.997076685933605e-06, "loss": 2.9845, "step": 306250 }, { "epoch": 0.3017543721381115, "grad_norm": 2.418945789337158, "learning_rate": 9.9970711139098e-06, "loss": 3.0209, "step": 306300 }, { "epoch": 0.3018036301159336, "grad_norm": 2.275040864944458, "learning_rate": 9.997065536582293e-06, "loss": 3.088, "step": 306350 }, { "epoch": 0.30185288809375566, "grad_norm": 2.288247585296631, "learning_rate": 9.997059953951083e-06, "loss": 3.0621, "step": 306400 }, { "epoch": 0.3019021460715777, "grad_norm": 2.1334221363067627, "learning_rate": 9.997054366016178e-06, "loss": 3.0332, "step": 306450 }, { "epoch": 0.30195140404939985, "grad_norm": 2.259039878845215, "learning_rate": 9.997048772777584e-06, "loss": 2.9927, "step": 306500 }, { "epoch": 0.3020006620272219, "grad_norm": 2.311420202255249, "learning_rate": 9.997043174235308e-06, "loss": 3.0285, "step": 306550 }, { "epoch": 0.302049920005044, "grad_norm": 2.3487062454223633, "learning_rate": 9.997037570389356e-06, "loss": 3.0428, "step": 306600 }, { "epoch": 0.3020991779828661, "grad_norm": 2.259214162826538, "learning_rate": 9.997031961239734e-06, "loss": 3.0648, "step": 306650 }, { "epoch": 0.3021484359606882, "grad_norm": 2.426786422729492, "learning_rate": 9.997026346786444e-06, "loss": 2.9985, "step": 306700 }, { "epoch": 0.30219769393851026, "grad_norm": 2.0850160121917725, "learning_rate": 9.997020727029498e-06, "loss": 3.0229, "step": 306750 }, { "epoch": 0.3022469519163324, "grad_norm": 2.3047666549682617, "learning_rate": 9.997015101968898e-06, "loss": 3.0257, "step": 306800 }, { "epoch": 0.30229620989415446, "grad_norm": 2.293976306915283, "learning_rate": 9.997009471604649e-06, "loss": 3.0741, "step": 306850 }, { "epoch": 0.3023454678719765, "grad_norm": 2.3507769107818604, "learning_rate": 9.997003835936762e-06, "loss": 3.076, "step": 306900 }, { "epoch": 0.30239472584979865, "grad_norm": 2.404139995574951, "learning_rate": 9.99699819496524e-06, "loss": 3.0424, "step": 306950 }, { "epoch": 0.3024439838276207, "grad_norm": 2.33311128616333, "learning_rate": 9.996992548690087e-06, "loss": 2.9499, "step": 307000 }, { "epoch": 0.3024932418054428, "grad_norm": 2.1716532707214355, "learning_rate": 9.996986897111312e-06, "loss": 3.0405, "step": 307050 }, { "epoch": 0.3025424997832649, "grad_norm": 2.359008550643921, "learning_rate": 9.99698124022892e-06, "loss": 3.0739, "step": 307100 }, { "epoch": 0.302591757761087, "grad_norm": 2.294940948486328, "learning_rate": 9.996975578042916e-06, "loss": 2.9145, "step": 307150 }, { "epoch": 0.30264101573890906, "grad_norm": 2.402827501296997, "learning_rate": 9.996969910553308e-06, "loss": 3.093, "step": 307200 }, { "epoch": 0.3026902737167312, "grad_norm": 2.377148151397705, "learning_rate": 9.996964237760101e-06, "loss": 3.0259, "step": 307250 }, { "epoch": 0.30273953169455325, "grad_norm": 2.3695480823516846, "learning_rate": 9.9969585596633e-06, "loss": 3.0744, "step": 307300 }, { "epoch": 0.3027887896723753, "grad_norm": 2.151103973388672, "learning_rate": 9.996952876262914e-06, "loss": 3.0261, "step": 307350 }, { "epoch": 0.30283804765019745, "grad_norm": 2.3178980350494385, "learning_rate": 9.996947187558947e-06, "loss": 2.9997, "step": 307400 }, { "epoch": 0.3028873056280195, "grad_norm": 2.3100035190582275, "learning_rate": 9.996941493551404e-06, "loss": 3.0042, "step": 307450 }, { "epoch": 0.3029365636058416, "grad_norm": 2.260178327560425, "learning_rate": 9.99693579424029e-06, "loss": 3.0386, "step": 307500 }, { "epoch": 0.3029858215836637, "grad_norm": 2.2575743198394775, "learning_rate": 9.996930089625618e-06, "loss": 3.0735, "step": 307550 }, { "epoch": 0.3030350795614858, "grad_norm": 2.504180669784546, "learning_rate": 9.996924379707386e-06, "loss": 3.0823, "step": 307600 }, { "epoch": 0.30308433753930786, "grad_norm": 2.2986879348754883, "learning_rate": 9.996918664485604e-06, "loss": 3.0707, "step": 307650 }, { "epoch": 0.30313359551712993, "grad_norm": 2.3497161865234375, "learning_rate": 9.996912943960278e-06, "loss": 3.0328, "step": 307700 }, { "epoch": 0.30318285349495205, "grad_norm": 2.2806992530822754, "learning_rate": 9.996907218131413e-06, "loss": 2.9764, "step": 307750 }, { "epoch": 0.3032321114727741, "grad_norm": 2.3173491954803467, "learning_rate": 9.996901486999015e-06, "loss": 2.9841, "step": 307800 }, { "epoch": 0.3032813694505962, "grad_norm": 2.175394296646118, "learning_rate": 9.996895750563091e-06, "loss": 3.0262, "step": 307850 }, { "epoch": 0.3033306274284183, "grad_norm": 2.350114583969116, "learning_rate": 9.996890008823646e-06, "loss": 3.0565, "step": 307900 }, { "epoch": 0.3033798854062404, "grad_norm": 2.230470895767212, "learning_rate": 9.996884261780688e-06, "loss": 3.0323, "step": 307950 }, { "epoch": 0.30342914338406246, "grad_norm": 2.285947561264038, "learning_rate": 9.996878509434223e-06, "loss": 3.0385, "step": 308000 }, { "epoch": 0.3034784013618846, "grad_norm": 2.3150689601898193, "learning_rate": 9.996872751784254e-06, "loss": 3.0597, "step": 308050 }, { "epoch": 0.30352765933970666, "grad_norm": 2.342055320739746, "learning_rate": 9.996866988830789e-06, "loss": 2.9783, "step": 308100 }, { "epoch": 0.3035769173175287, "grad_norm": 2.3872690200805664, "learning_rate": 9.996861220573835e-06, "loss": 3.0042, "step": 308150 }, { "epoch": 0.30362617529535085, "grad_norm": 2.1792943477630615, "learning_rate": 9.996855447013397e-06, "loss": 3.0008, "step": 308200 }, { "epoch": 0.3036754332731729, "grad_norm": 2.285071849822998, "learning_rate": 9.996849668149482e-06, "loss": 3.0516, "step": 308250 }, { "epoch": 0.303724691250995, "grad_norm": 2.253110647201538, "learning_rate": 9.996843883982096e-06, "loss": 3.0555, "step": 308300 }, { "epoch": 0.3037739492288171, "grad_norm": 2.46266770362854, "learning_rate": 9.996838094511245e-06, "loss": 3.0264, "step": 308350 }, { "epoch": 0.3038232072066392, "grad_norm": 2.5955288410186768, "learning_rate": 9.996832299736931e-06, "loss": 3.0014, "step": 308400 }, { "epoch": 0.30387246518446126, "grad_norm": 2.2424843311309814, "learning_rate": 9.996826499659169e-06, "loss": 3.052, "step": 308450 }, { "epoch": 0.3039217231622834, "grad_norm": 2.156332492828369, "learning_rate": 9.996820694277957e-06, "loss": 2.9698, "step": 308500 }, { "epoch": 0.30397098114010546, "grad_norm": 2.354210376739502, "learning_rate": 9.996814883593307e-06, "loss": 3.0519, "step": 308550 }, { "epoch": 0.3040202391179275, "grad_norm": 2.365471839904785, "learning_rate": 9.996809067605222e-06, "loss": 2.9409, "step": 308600 }, { "epoch": 0.30406949709574965, "grad_norm": 2.6839261054992676, "learning_rate": 9.996803246313707e-06, "loss": 3.0125, "step": 308650 }, { "epoch": 0.3041187550735717, "grad_norm": 2.503632068634033, "learning_rate": 9.99679741971877e-06, "loss": 2.9781, "step": 308700 }, { "epoch": 0.3041680130513938, "grad_norm": 2.2489304542541504, "learning_rate": 9.996791587820419e-06, "loss": 2.9685, "step": 308750 }, { "epoch": 0.3042172710292159, "grad_norm": 2.4574649333953857, "learning_rate": 9.996785750618656e-06, "loss": 2.9733, "step": 308800 }, { "epoch": 0.304266529007038, "grad_norm": 2.730006217956543, "learning_rate": 9.996779908113492e-06, "loss": 3.015, "step": 308850 }, { "epoch": 0.30431578698486006, "grad_norm": 2.692553997039795, "learning_rate": 9.996774060304929e-06, "loss": 2.9729, "step": 308900 }, { "epoch": 0.30436504496268213, "grad_norm": 2.324758768081665, "learning_rate": 9.996768207192975e-06, "loss": 3.0601, "step": 308950 }, { "epoch": 0.30441430294050426, "grad_norm": 2.582911491394043, "learning_rate": 9.996762348777635e-06, "loss": 2.9935, "step": 309000 }, { "epoch": 0.3044635609183263, "grad_norm": 2.258347272872925, "learning_rate": 9.99675648505892e-06, "loss": 2.9806, "step": 309050 }, { "epoch": 0.3045128188961484, "grad_norm": 2.48697829246521, "learning_rate": 9.99675061603683e-06, "loss": 3.0326, "step": 309100 }, { "epoch": 0.3045620768739705, "grad_norm": 2.434758186340332, "learning_rate": 9.996744741711374e-06, "loss": 3.013, "step": 309150 }, { "epoch": 0.3046113348517926, "grad_norm": 2.227440118789673, "learning_rate": 9.996738862082557e-06, "loss": 3.0657, "step": 309200 }, { "epoch": 0.30466059282961466, "grad_norm": 2.225358486175537, "learning_rate": 9.996732977150387e-06, "loss": 2.9734, "step": 309250 }, { "epoch": 0.3047098508074368, "grad_norm": 2.424178123474121, "learning_rate": 9.99672708691487e-06, "loss": 2.9443, "step": 309300 }, { "epoch": 0.30475910878525886, "grad_norm": 2.5271685123443604, "learning_rate": 9.996721191376011e-06, "loss": 2.9516, "step": 309350 }, { "epoch": 0.30480836676308093, "grad_norm": 2.3428306579589844, "learning_rate": 9.996715290533817e-06, "loss": 3.1211, "step": 309400 }, { "epoch": 0.30485762474090305, "grad_norm": 2.671780824661255, "learning_rate": 9.996709384388295e-06, "loss": 3.0473, "step": 309450 }, { "epoch": 0.3049068827187251, "grad_norm": 2.3350250720977783, "learning_rate": 9.99670347293945e-06, "loss": 2.9777, "step": 309500 }, { "epoch": 0.3049561406965472, "grad_norm": 2.3178908824920654, "learning_rate": 9.996697556187289e-06, "loss": 2.9643, "step": 309550 }, { "epoch": 0.3050053986743693, "grad_norm": 2.3414082527160645, "learning_rate": 9.996691634131816e-06, "loss": 3.0202, "step": 309600 }, { "epoch": 0.3050546566521914, "grad_norm": 2.3130717277526855, "learning_rate": 9.996685706773042e-06, "loss": 3.0475, "step": 309650 }, { "epoch": 0.30510391463001346, "grad_norm": 2.318556547164917, "learning_rate": 9.99667977411097e-06, "loss": 2.9556, "step": 309700 }, { "epoch": 0.3051531726078356, "grad_norm": 2.3123040199279785, "learning_rate": 9.996673836145607e-06, "loss": 2.9607, "step": 309750 }, { "epoch": 0.30520243058565766, "grad_norm": 2.2077224254608154, "learning_rate": 9.996667892876959e-06, "loss": 3.0973, "step": 309800 }, { "epoch": 0.30525168856347973, "grad_norm": 2.268465042114258, "learning_rate": 9.996661944305032e-06, "loss": 2.9428, "step": 309850 }, { "epoch": 0.30530094654130185, "grad_norm": 2.3785126209259033, "learning_rate": 9.996655990429834e-06, "loss": 2.968, "step": 309900 }, { "epoch": 0.3053502045191239, "grad_norm": 2.592348337173462, "learning_rate": 9.99665003125137e-06, "loss": 3.022, "step": 309950 }, { "epoch": 0.305399462496946, "grad_norm": 2.1196916103363037, "learning_rate": 9.996644066769646e-06, "loss": 3.0511, "step": 310000 }, { "epoch": 0.3054487204747681, "grad_norm": 2.401073694229126, "learning_rate": 9.99663809698467e-06, "loss": 3.0028, "step": 310050 }, { "epoch": 0.3054979784525902, "grad_norm": 2.4942500591278076, "learning_rate": 9.996632121896446e-06, "loss": 3.076, "step": 310100 }, { "epoch": 0.30554723643041226, "grad_norm": 2.3009376525878906, "learning_rate": 9.996626141504982e-06, "loss": 3.0589, "step": 310150 }, { "epoch": 0.30559649440823433, "grad_norm": 2.46113657951355, "learning_rate": 9.996620155810282e-06, "loss": 3.026, "step": 310200 }, { "epoch": 0.30564575238605646, "grad_norm": 2.284552812576294, "learning_rate": 9.996614164812357e-06, "loss": 3.023, "step": 310250 }, { "epoch": 0.3056950103638785, "grad_norm": 2.466646194458008, "learning_rate": 9.996608168511208e-06, "loss": 2.9531, "step": 310300 }, { "epoch": 0.3057442683417006, "grad_norm": 2.287456512451172, "learning_rate": 9.996602166906847e-06, "loss": 3.0496, "step": 310350 }, { "epoch": 0.3057935263195227, "grad_norm": 2.4064130783081055, "learning_rate": 9.996596159999276e-06, "loss": 2.8812, "step": 310400 }, { "epoch": 0.3058427842973448, "grad_norm": 2.314246892929077, "learning_rate": 9.9965901477885e-06, "loss": 3.0286, "step": 310450 }, { "epoch": 0.30589204227516686, "grad_norm": 2.4322733879089355, "learning_rate": 9.996584130274531e-06, "loss": 3.095, "step": 310500 }, { "epoch": 0.305941300252989, "grad_norm": 2.4454193115234375, "learning_rate": 9.996578107457373e-06, "loss": 2.9616, "step": 310550 }, { "epoch": 0.30599055823081106, "grad_norm": 2.106123208999634, "learning_rate": 9.996572079337031e-06, "loss": 3.031, "step": 310600 }, { "epoch": 0.30603981620863313, "grad_norm": 2.356388807296753, "learning_rate": 9.996566045913511e-06, "loss": 3.0367, "step": 310650 }, { "epoch": 0.30608907418645526, "grad_norm": 2.699507236480713, "learning_rate": 9.996560007186821e-06, "loss": 3.0133, "step": 310700 }, { "epoch": 0.3061383321642773, "grad_norm": 2.3565239906311035, "learning_rate": 9.996553963156968e-06, "loss": 3.0959, "step": 310750 }, { "epoch": 0.3061875901420994, "grad_norm": 2.350118398666382, "learning_rate": 9.996547913823957e-06, "loss": 2.9957, "step": 310800 }, { "epoch": 0.3062368481199215, "grad_norm": 2.19903564453125, "learning_rate": 9.996541859187796e-06, "loss": 3.0337, "step": 310850 }, { "epoch": 0.3062861060977436, "grad_norm": 2.132171392440796, "learning_rate": 9.996535799248489e-06, "loss": 3.0084, "step": 310900 }, { "epoch": 0.30633536407556566, "grad_norm": 2.3159096240997314, "learning_rate": 9.996529734006044e-06, "loss": 3.0251, "step": 310950 }, { "epoch": 0.3063846220533878, "grad_norm": 2.138986587524414, "learning_rate": 9.996523663460466e-06, "loss": 3.1059, "step": 311000 }, { "epoch": 0.30643388003120986, "grad_norm": 2.599769353866577, "learning_rate": 9.996517587611766e-06, "loss": 3.0324, "step": 311050 }, { "epoch": 0.30648313800903193, "grad_norm": 2.2664358615875244, "learning_rate": 9.996511506459944e-06, "loss": 2.976, "step": 311100 }, { "epoch": 0.30653239598685406, "grad_norm": 2.405214548110962, "learning_rate": 9.99650542000501e-06, "loss": 2.9534, "step": 311150 }, { "epoch": 0.3065816539646761, "grad_norm": 2.411836624145508, "learning_rate": 9.99649932824697e-06, "loss": 3.0028, "step": 311200 }, { "epoch": 0.3066309119424982, "grad_norm": 2.4898228645324707, "learning_rate": 9.996493231185831e-06, "loss": 3.0254, "step": 311250 }, { "epoch": 0.3066801699203203, "grad_norm": 2.412057399749756, "learning_rate": 9.9964871288216e-06, "loss": 3.0769, "step": 311300 }, { "epoch": 0.3067294278981424, "grad_norm": 2.5325610637664795, "learning_rate": 9.996481021154282e-06, "loss": 3.0989, "step": 311350 }, { "epoch": 0.30677868587596446, "grad_norm": 2.1537461280822754, "learning_rate": 9.996474908183883e-06, "loss": 2.9702, "step": 311400 }, { "epoch": 0.30682794385378653, "grad_norm": 2.4412622451782227, "learning_rate": 9.996468789910411e-06, "loss": 2.9731, "step": 311450 }, { "epoch": 0.30687720183160866, "grad_norm": 2.3032448291778564, "learning_rate": 9.996462666333871e-06, "loss": 2.9668, "step": 311500 }, { "epoch": 0.30692645980943073, "grad_norm": 2.5099568367004395, "learning_rate": 9.996456537454272e-06, "loss": 2.9738, "step": 311550 }, { "epoch": 0.3069757177872528, "grad_norm": 2.254779815673828, "learning_rate": 9.996450403271617e-06, "loss": 2.9675, "step": 311600 }, { "epoch": 0.3070249757650749, "grad_norm": 2.2271130084991455, "learning_rate": 9.996444263785914e-06, "loss": 3.0024, "step": 311650 }, { "epoch": 0.307074233742897, "grad_norm": 2.1509151458740234, "learning_rate": 9.996438118997172e-06, "loss": 3.0283, "step": 311700 }, { "epoch": 0.30712349172071907, "grad_norm": 2.706993341445923, "learning_rate": 9.996431968905394e-06, "loss": 2.9996, "step": 311750 }, { "epoch": 0.3071727496985412, "grad_norm": 2.38284969329834, "learning_rate": 9.99642581351059e-06, "loss": 3.0083, "step": 311800 }, { "epoch": 0.30722200767636326, "grad_norm": 2.423383951187134, "learning_rate": 9.996419652812761e-06, "loss": 3.0453, "step": 311850 }, { "epoch": 0.30727126565418533, "grad_norm": 2.354200839996338, "learning_rate": 9.996413486811919e-06, "loss": 2.9777, "step": 311900 }, { "epoch": 0.30732052363200746, "grad_norm": 2.4652881622314453, "learning_rate": 9.996407315508069e-06, "loss": 3.0205, "step": 311950 }, { "epoch": 0.30736978160982953, "grad_norm": 2.443474769592285, "learning_rate": 9.996401138901217e-06, "loss": 3.0216, "step": 312000 }, { "epoch": 0.3074190395876516, "grad_norm": 2.311983108520508, "learning_rate": 9.99639495699137e-06, "loss": 3.0313, "step": 312050 }, { "epoch": 0.3074682975654737, "grad_norm": 2.1913750171661377, "learning_rate": 9.996388769778533e-06, "loss": 3.0571, "step": 312100 }, { "epoch": 0.3075175555432958, "grad_norm": 2.2754499912261963, "learning_rate": 9.996382577262715e-06, "loss": 3.0225, "step": 312150 }, { "epoch": 0.30756681352111787, "grad_norm": 2.28769850730896, "learning_rate": 9.99637637944392e-06, "loss": 3.0175, "step": 312200 }, { "epoch": 0.30761607149894, "grad_norm": 2.184319257736206, "learning_rate": 9.996370176322157e-06, "loss": 3.0385, "step": 312250 }, { "epoch": 0.30766532947676206, "grad_norm": 2.3818931579589844, "learning_rate": 9.99636396789743e-06, "loss": 3.0349, "step": 312300 }, { "epoch": 0.30771458745458413, "grad_norm": 2.262977361679077, "learning_rate": 9.99635775416975e-06, "loss": 2.9937, "step": 312350 }, { "epoch": 0.30776384543240626, "grad_norm": 2.317479133605957, "learning_rate": 9.996351535139118e-06, "loss": 3.0471, "step": 312400 }, { "epoch": 0.30781310341022833, "grad_norm": 2.4597971439361572, "learning_rate": 9.996345310805544e-06, "loss": 3.0236, "step": 312450 }, { "epoch": 0.3078623613880504, "grad_norm": 2.1973822116851807, "learning_rate": 9.996339081169036e-06, "loss": 3.0292, "step": 312500 }, { "epoch": 0.3079116193658725, "grad_norm": 2.278440475463867, "learning_rate": 9.996332846229596e-06, "loss": 2.9371, "step": 312550 }, { "epoch": 0.3079608773436946, "grad_norm": 2.3865201473236084, "learning_rate": 9.996326605987234e-06, "loss": 2.9635, "step": 312600 }, { "epoch": 0.30801013532151666, "grad_norm": 2.779576063156128, "learning_rate": 9.996320360441956e-06, "loss": 3.0836, "step": 312650 }, { "epoch": 0.30805939329933874, "grad_norm": 2.180945634841919, "learning_rate": 9.996314109593769e-06, "loss": 3.0043, "step": 312700 }, { "epoch": 0.30810865127716086, "grad_norm": 2.250586986541748, "learning_rate": 9.996307853442678e-06, "loss": 3.0129, "step": 312750 }, { "epoch": 0.30815790925498293, "grad_norm": 2.2202727794647217, "learning_rate": 9.99630159198869e-06, "loss": 3.0258, "step": 312800 }, { "epoch": 0.308207167232805, "grad_norm": 2.4258713722229004, "learning_rate": 9.996295325231814e-06, "loss": 2.9999, "step": 312850 }, { "epoch": 0.3082564252106271, "grad_norm": 2.327939748764038, "learning_rate": 9.996289053172054e-06, "loss": 3.0463, "step": 312900 }, { "epoch": 0.3083056831884492, "grad_norm": 2.2418766021728516, "learning_rate": 9.996282775809418e-06, "loss": 3.0356, "step": 312950 }, { "epoch": 0.30835494116627127, "grad_norm": 2.2699077129364014, "learning_rate": 9.996276493143912e-06, "loss": 2.9843, "step": 313000 }, { "epoch": 0.3084041991440934, "grad_norm": 2.306989908218384, "learning_rate": 9.996270205175543e-06, "loss": 3.0128, "step": 313050 }, { "epoch": 0.30845345712191546, "grad_norm": 2.2682242393493652, "learning_rate": 9.996263911904317e-06, "loss": 3.0425, "step": 313100 }, { "epoch": 0.30850271509973753, "grad_norm": 2.287558078765869, "learning_rate": 9.996257613330244e-06, "loss": 2.936, "step": 313150 }, { "epoch": 0.30855197307755966, "grad_norm": 2.1980414390563965, "learning_rate": 9.996251309453325e-06, "loss": 2.9992, "step": 313200 }, { "epoch": 0.30860123105538173, "grad_norm": 2.264953136444092, "learning_rate": 9.996245000273572e-06, "loss": 3.0522, "step": 313250 }, { "epoch": 0.3086504890332038, "grad_norm": 2.4297373294830322, "learning_rate": 9.996238685790988e-06, "loss": 2.9921, "step": 313300 }, { "epoch": 0.3086997470110259, "grad_norm": 2.0938663482666016, "learning_rate": 9.996232366005581e-06, "loss": 2.984, "step": 313350 }, { "epoch": 0.308749004988848, "grad_norm": 2.3178112506866455, "learning_rate": 9.996226040917358e-06, "loss": 3.0335, "step": 313400 }, { "epoch": 0.30879826296667007, "grad_norm": 2.244946241378784, "learning_rate": 9.996219710526325e-06, "loss": 2.9682, "step": 313450 }, { "epoch": 0.3088475209444922, "grad_norm": 2.359304189682007, "learning_rate": 9.99621337483249e-06, "loss": 3.0522, "step": 313500 }, { "epoch": 0.30889677892231426, "grad_norm": 2.2672181129455566, "learning_rate": 9.99620703383586e-06, "loss": 2.9686, "step": 313550 }, { "epoch": 0.30894603690013633, "grad_norm": 2.4422049522399902, "learning_rate": 9.99620068753644e-06, "loss": 3.0565, "step": 313600 }, { "epoch": 0.30899529487795846, "grad_norm": 2.1991586685180664, "learning_rate": 9.996194335934235e-06, "loss": 3.0782, "step": 313650 }, { "epoch": 0.30904455285578053, "grad_norm": 2.3003005981445312, "learning_rate": 9.996187979029256e-06, "loss": 3.0834, "step": 313700 }, { "epoch": 0.3090938108336026, "grad_norm": 2.208747625350952, "learning_rate": 9.99618161682151e-06, "loss": 3.0066, "step": 313750 }, { "epoch": 0.3091430688114247, "grad_norm": 2.2679004669189453, "learning_rate": 9.996175249311e-06, "loss": 2.9955, "step": 313800 }, { "epoch": 0.3091923267892468, "grad_norm": 2.1517398357391357, "learning_rate": 9.996168876497734e-06, "loss": 2.9432, "step": 313850 }, { "epoch": 0.30924158476706887, "grad_norm": 2.1666457653045654, "learning_rate": 9.99616249838172e-06, "loss": 3.0211, "step": 313900 }, { "epoch": 0.30929084274489094, "grad_norm": 2.2385599613189697, "learning_rate": 9.996156114962963e-06, "loss": 3.0453, "step": 313950 }, { "epoch": 0.30934010072271306, "grad_norm": 2.192492961883545, "learning_rate": 9.996149726241471e-06, "loss": 3.0573, "step": 314000 }, { "epoch": 0.30938935870053513, "grad_norm": 2.2824437618255615, "learning_rate": 9.99614333221725e-06, "loss": 3.0015, "step": 314050 }, { "epoch": 0.3094386166783572, "grad_norm": 2.2464230060577393, "learning_rate": 9.996136932890308e-06, "loss": 3.0042, "step": 314100 }, { "epoch": 0.30948787465617933, "grad_norm": 2.2020175457000732, "learning_rate": 9.996130528260652e-06, "loss": 2.9732, "step": 314150 }, { "epoch": 0.3095371326340014, "grad_norm": 2.2911062240600586, "learning_rate": 9.996124118328287e-06, "loss": 3.0953, "step": 314200 }, { "epoch": 0.30958639061182347, "grad_norm": 2.2962448596954346, "learning_rate": 9.99611770309322e-06, "loss": 3.0144, "step": 314250 }, { "epoch": 0.3096356485896456, "grad_norm": 2.3657443523406982, "learning_rate": 9.99611128255546e-06, "loss": 3.0084, "step": 314300 }, { "epoch": 0.30968490656746767, "grad_norm": 2.2462432384490967, "learning_rate": 9.996104856715012e-06, "loss": 2.9702, "step": 314350 }, { "epoch": 0.30973416454528974, "grad_norm": 2.4032435417175293, "learning_rate": 9.99609842557188e-06, "loss": 3.0407, "step": 314400 }, { "epoch": 0.30978342252311186, "grad_norm": 2.3730244636535645, "learning_rate": 9.996091989126078e-06, "loss": 2.9775, "step": 314450 }, { "epoch": 0.30983268050093393, "grad_norm": 2.1583681106567383, "learning_rate": 9.996085547377608e-06, "loss": 3.0215, "step": 314500 }, { "epoch": 0.309881938478756, "grad_norm": 2.6782078742980957, "learning_rate": 9.996079100326477e-06, "loss": 3.0175, "step": 314550 }, { "epoch": 0.30993119645657813, "grad_norm": 2.002676010131836, "learning_rate": 9.996072647972692e-06, "loss": 3.0156, "step": 314600 }, { "epoch": 0.3099804544344002, "grad_norm": 2.3512744903564453, "learning_rate": 9.99606619031626e-06, "loss": 3.0528, "step": 314650 }, { "epoch": 0.31002971241222227, "grad_norm": 2.251086473464966, "learning_rate": 9.996059727357189e-06, "loss": 3.0309, "step": 314700 }, { "epoch": 0.3100789703900444, "grad_norm": 2.2585465908050537, "learning_rate": 9.996053259095485e-06, "loss": 3.0315, "step": 314750 }, { "epoch": 0.31012822836786647, "grad_norm": 2.2087645530700684, "learning_rate": 9.996046785531157e-06, "loss": 3.0499, "step": 314800 }, { "epoch": 0.31017748634568854, "grad_norm": 2.345472812652588, "learning_rate": 9.996040306664206e-06, "loss": 3.0331, "step": 314850 }, { "epoch": 0.31022674432351066, "grad_norm": 2.2506842613220215, "learning_rate": 9.996033822494644e-06, "loss": 3.0033, "step": 314900 }, { "epoch": 0.31027600230133273, "grad_norm": 2.4905457496643066, "learning_rate": 9.996027333022478e-06, "loss": 2.9611, "step": 314950 }, { "epoch": 0.3103252602791548, "grad_norm": 2.3499624729156494, "learning_rate": 9.996020838247711e-06, "loss": 3.0723, "step": 315000 }, { "epoch": 0.3103745182569769, "grad_norm": 2.2633299827575684, "learning_rate": 9.996014338170354e-06, "loss": 3.0467, "step": 315050 }, { "epoch": 0.310423776234799, "grad_norm": 2.244985818862915, "learning_rate": 9.996007832790413e-06, "loss": 2.9855, "step": 315100 }, { "epoch": 0.31047303421262107, "grad_norm": 2.754910945892334, "learning_rate": 9.996001322107892e-06, "loss": 3.0338, "step": 315150 }, { "epoch": 0.31052229219044314, "grad_norm": 2.358602523803711, "learning_rate": 9.995994806122802e-06, "loss": 2.9972, "step": 315200 }, { "epoch": 0.31057155016826526, "grad_norm": 2.564812183380127, "learning_rate": 9.995988284835146e-06, "loss": 3.0275, "step": 315250 }, { "epoch": 0.31062080814608733, "grad_norm": 2.3292109966278076, "learning_rate": 9.995981758244935e-06, "loss": 2.9447, "step": 315300 }, { "epoch": 0.3106700661239094, "grad_norm": 2.4661121368408203, "learning_rate": 9.995975226352171e-06, "loss": 3.0452, "step": 315350 }, { "epoch": 0.31071932410173153, "grad_norm": 2.216602087020874, "learning_rate": 9.995968689156866e-06, "loss": 2.9841, "step": 315400 }, { "epoch": 0.3107685820795536, "grad_norm": 2.1377151012420654, "learning_rate": 9.995962146659023e-06, "loss": 3.0245, "step": 315450 }, { "epoch": 0.31081784005737567, "grad_norm": 2.236062526702881, "learning_rate": 9.995955598858652e-06, "loss": 2.9695, "step": 315500 }, { "epoch": 0.3108670980351978, "grad_norm": 2.3402137756347656, "learning_rate": 9.995949045755759e-06, "loss": 3.0175, "step": 315550 }, { "epoch": 0.31091635601301987, "grad_norm": 2.406885862350464, "learning_rate": 9.99594248735035e-06, "loss": 2.9877, "step": 315600 }, { "epoch": 0.31096561399084194, "grad_norm": 2.3934450149536133, "learning_rate": 9.995935923642432e-06, "loss": 2.9834, "step": 315650 }, { "epoch": 0.31101487196866406, "grad_norm": 2.196258306503296, "learning_rate": 9.995929354632013e-06, "loss": 3.0542, "step": 315700 }, { "epoch": 0.31106412994648613, "grad_norm": 2.127100944519043, "learning_rate": 9.9959227803191e-06, "loss": 2.929, "step": 315750 }, { "epoch": 0.3111133879243082, "grad_norm": 2.441338539123535, "learning_rate": 9.995916200703697e-06, "loss": 3.0144, "step": 315800 }, { "epoch": 0.31116264590213033, "grad_norm": 2.4567346572875977, "learning_rate": 9.995909615785814e-06, "loss": 3.0162, "step": 315850 }, { "epoch": 0.3112119038799524, "grad_norm": 2.27217960357666, "learning_rate": 9.995903025565459e-06, "loss": 2.9754, "step": 315900 }, { "epoch": 0.31126116185777447, "grad_norm": 2.2861671447753906, "learning_rate": 9.995896430042637e-06, "loss": 3.0349, "step": 315950 }, { "epoch": 0.3113104198355966, "grad_norm": 2.234182834625244, "learning_rate": 9.995889829217354e-06, "loss": 3.0451, "step": 316000 }, { "epoch": 0.31135967781341867, "grad_norm": 2.1110923290252686, "learning_rate": 9.995883223089619e-06, "loss": 3.0071, "step": 316050 }, { "epoch": 0.31140893579124074, "grad_norm": 2.4154622554779053, "learning_rate": 9.99587661165944e-06, "loss": 3.054, "step": 316100 }, { "epoch": 0.31145819376906286, "grad_norm": 2.633284091949463, "learning_rate": 9.99586999492682e-06, "loss": 3.0022, "step": 316150 }, { "epoch": 0.31150745174688493, "grad_norm": 2.7270987033843994, "learning_rate": 9.99586337289177e-06, "loss": 2.9218, "step": 316200 }, { "epoch": 0.311556709724707, "grad_norm": 2.2962069511413574, "learning_rate": 9.995856745554294e-06, "loss": 2.9438, "step": 316250 }, { "epoch": 0.3116059677025291, "grad_norm": 2.396324634552002, "learning_rate": 9.995850112914401e-06, "loss": 3.0248, "step": 316300 }, { "epoch": 0.3116552256803512, "grad_norm": 2.4210610389709473, "learning_rate": 9.995843474972098e-06, "loss": 3.0113, "step": 316350 }, { "epoch": 0.31170448365817327, "grad_norm": 2.3294830322265625, "learning_rate": 9.995836831727392e-06, "loss": 3.0268, "step": 316400 }, { "epoch": 0.31175374163599534, "grad_norm": 2.487356424331665, "learning_rate": 9.99583018318029e-06, "loss": 2.9862, "step": 316450 }, { "epoch": 0.31180299961381747, "grad_norm": 2.4091708660125732, "learning_rate": 9.995823529330797e-06, "loss": 3.0267, "step": 316500 }, { "epoch": 0.31185225759163954, "grad_norm": 2.2486772537231445, "learning_rate": 9.995816870178923e-06, "loss": 3.0764, "step": 316550 }, { "epoch": 0.3119015155694616, "grad_norm": 2.421083688735962, "learning_rate": 9.995810205724673e-06, "loss": 3.0502, "step": 316600 }, { "epoch": 0.31195077354728373, "grad_norm": 2.528093099594116, "learning_rate": 9.995803535968054e-06, "loss": 3.0208, "step": 316650 }, { "epoch": 0.3120000315251058, "grad_norm": 2.3876194953918457, "learning_rate": 9.995796860909076e-06, "loss": 3.0479, "step": 316700 }, { "epoch": 0.3120492895029279, "grad_norm": 2.363154411315918, "learning_rate": 9.995790180547742e-06, "loss": 3.0245, "step": 316750 }, { "epoch": 0.31209854748075, "grad_norm": 2.1681394577026367, "learning_rate": 9.995783494884063e-06, "loss": 2.9921, "step": 316800 }, { "epoch": 0.31214780545857207, "grad_norm": 2.349562406539917, "learning_rate": 9.995776803918043e-06, "loss": 3.0644, "step": 316850 }, { "epoch": 0.31219706343639414, "grad_norm": 2.398301839828491, "learning_rate": 9.995770107649691e-06, "loss": 2.9774, "step": 316900 }, { "epoch": 0.31224632141421627, "grad_norm": 2.3575966358184814, "learning_rate": 9.995763406079013e-06, "loss": 2.993, "step": 316950 }, { "epoch": 0.31229557939203834, "grad_norm": 2.2219886779785156, "learning_rate": 9.995756699206018e-06, "loss": 2.9969, "step": 317000 }, { "epoch": 0.3123448373698604, "grad_norm": 2.3304412364959717, "learning_rate": 9.995749987030711e-06, "loss": 2.9615, "step": 317050 }, { "epoch": 0.31239409534768253, "grad_norm": 2.2850918769836426, "learning_rate": 9.995743269553098e-06, "loss": 3.0284, "step": 317100 }, { "epoch": 0.3124433533255046, "grad_norm": 2.4318318367004395, "learning_rate": 9.99573654677319e-06, "loss": 2.9997, "step": 317150 }, { "epoch": 0.3124926113033267, "grad_norm": 2.356834650039673, "learning_rate": 9.99572981869099e-06, "loss": 3.02, "step": 317200 }, { "epoch": 0.3125418692811488, "grad_norm": 2.3789877891540527, "learning_rate": 9.995723085306509e-06, "loss": 3.0594, "step": 317250 }, { "epoch": 0.31259112725897087, "grad_norm": 2.407999277114868, "learning_rate": 9.995716346619752e-06, "loss": 2.9978, "step": 317300 }, { "epoch": 0.31264038523679294, "grad_norm": 2.4253487586975098, "learning_rate": 9.995709602630724e-06, "loss": 2.9876, "step": 317350 }, { "epoch": 0.31268964321461507, "grad_norm": 2.380905866622925, "learning_rate": 9.995702853339438e-06, "loss": 2.9729, "step": 317400 }, { "epoch": 0.31273890119243714, "grad_norm": 2.5474369525909424, "learning_rate": 9.995696098745897e-06, "loss": 3.0358, "step": 317450 }, { "epoch": 0.3127881591702592, "grad_norm": 2.7295117378234863, "learning_rate": 9.99568933885011e-06, "loss": 2.9899, "step": 317500 }, { "epoch": 0.3128374171480813, "grad_norm": 2.3716928958892822, "learning_rate": 9.995682573652082e-06, "loss": 2.9854, "step": 317550 }, { "epoch": 0.3128866751259034, "grad_norm": 2.4168429374694824, "learning_rate": 9.99567580315182e-06, "loss": 3.0011, "step": 317600 }, { "epoch": 0.31293593310372547, "grad_norm": 2.353118896484375, "learning_rate": 9.995669027349332e-06, "loss": 3.119, "step": 317650 }, { "epoch": 0.31298519108154754, "grad_norm": 2.571105718612671, "learning_rate": 9.995662246244629e-06, "loss": 2.9504, "step": 317700 }, { "epoch": 0.31303444905936967, "grad_norm": 2.723294734954834, "learning_rate": 9.995655459837714e-06, "loss": 3.0455, "step": 317750 }, { "epoch": 0.31308370703719174, "grad_norm": 2.4551429748535156, "learning_rate": 9.995648668128594e-06, "loss": 2.9658, "step": 317800 }, { "epoch": 0.3131329650150138, "grad_norm": 2.3365747928619385, "learning_rate": 9.995641871117277e-06, "loss": 2.958, "step": 317850 }, { "epoch": 0.31318222299283593, "grad_norm": 2.144131898880005, "learning_rate": 9.995635068803772e-06, "loss": 3.0209, "step": 317900 }, { "epoch": 0.313231480970658, "grad_norm": 2.3323097229003906, "learning_rate": 9.995628261188084e-06, "loss": 3.0051, "step": 317950 }, { "epoch": 0.3132807389484801, "grad_norm": 2.1606509685516357, "learning_rate": 9.99562144827022e-06, "loss": 3.0537, "step": 318000 }, { "epoch": 0.3133299969263022, "grad_norm": 2.247515916824341, "learning_rate": 9.995614630050189e-06, "loss": 2.9701, "step": 318050 }, { "epoch": 0.31337925490412427, "grad_norm": 2.201364755630493, "learning_rate": 9.995607806527998e-06, "loss": 3.05, "step": 318100 }, { "epoch": 0.31342851288194634, "grad_norm": 2.3642578125, "learning_rate": 9.995600977703654e-06, "loss": 2.9976, "step": 318150 }, { "epoch": 0.31347777085976847, "grad_norm": 2.337104558944702, "learning_rate": 9.995594143577163e-06, "loss": 3.0535, "step": 318200 }, { "epoch": 0.31352702883759054, "grad_norm": 2.352006435394287, "learning_rate": 9.995587304148534e-06, "loss": 3.0028, "step": 318250 }, { "epoch": 0.3135762868154126, "grad_norm": 2.178617000579834, "learning_rate": 9.995580459417772e-06, "loss": 2.9971, "step": 318300 }, { "epoch": 0.31362554479323473, "grad_norm": 2.3368468284606934, "learning_rate": 9.995573609384888e-06, "loss": 3.016, "step": 318350 }, { "epoch": 0.3136748027710568, "grad_norm": 2.3799073696136475, "learning_rate": 9.995566754049885e-06, "loss": 2.9897, "step": 318400 }, { "epoch": 0.3137240607488789, "grad_norm": 2.2801501750946045, "learning_rate": 9.995559893412774e-06, "loss": 3.0281, "step": 318450 }, { "epoch": 0.313773318726701, "grad_norm": 2.289668083190918, "learning_rate": 9.995553027473559e-06, "loss": 3.0158, "step": 318500 }, { "epoch": 0.31382257670452307, "grad_norm": 2.1270036697387695, "learning_rate": 9.99554615623225e-06, "loss": 2.9879, "step": 318550 }, { "epoch": 0.31387183468234514, "grad_norm": 2.274127960205078, "learning_rate": 9.995539279688852e-06, "loss": 3.0791, "step": 318600 }, { "epoch": 0.31392109266016727, "grad_norm": 2.2569169998168945, "learning_rate": 9.995532397843373e-06, "loss": 2.9817, "step": 318650 }, { "epoch": 0.31397035063798934, "grad_norm": 2.4550509452819824, "learning_rate": 9.995525510695821e-06, "loss": 2.9885, "step": 318700 }, { "epoch": 0.3140196086158114, "grad_norm": 2.195941925048828, "learning_rate": 9.995518618246204e-06, "loss": 2.9544, "step": 318750 }, { "epoch": 0.3140688665936335, "grad_norm": 2.394542932510376, "learning_rate": 9.995511720494529e-06, "loss": 3.0316, "step": 318800 }, { "epoch": 0.3141181245714556, "grad_norm": 2.3514695167541504, "learning_rate": 9.995504817440801e-06, "loss": 3.0383, "step": 318850 }, { "epoch": 0.3141673825492777, "grad_norm": 2.2090342044830322, "learning_rate": 9.99549790908503e-06, "loss": 2.9955, "step": 318900 }, { "epoch": 0.31421664052709974, "grad_norm": 2.5291476249694824, "learning_rate": 9.995490995427221e-06, "loss": 2.9626, "step": 318950 }, { "epoch": 0.31426589850492187, "grad_norm": 2.275815486907959, "learning_rate": 9.995484076467386e-06, "loss": 2.9422, "step": 319000 }, { "epoch": 0.31431515648274394, "grad_norm": 2.208284854888916, "learning_rate": 9.995477152205525e-06, "loss": 3.0076, "step": 319050 }, { "epoch": 0.314364414460566, "grad_norm": 4.213912010192871, "learning_rate": 9.995470222641652e-06, "loss": 3.0265, "step": 319100 }, { "epoch": 0.31441367243838814, "grad_norm": 2.2220160961151123, "learning_rate": 9.995463287775771e-06, "loss": 3.0089, "step": 319150 }, { "epoch": 0.3144629304162102, "grad_norm": 2.312147378921509, "learning_rate": 9.995456347607889e-06, "loss": 2.9944, "step": 319200 }, { "epoch": 0.3145121883940323, "grad_norm": 2.451925754547119, "learning_rate": 9.995449402138016e-06, "loss": 2.9944, "step": 319250 }, { "epoch": 0.3145614463718544, "grad_norm": 2.4694437980651855, "learning_rate": 9.995442451366157e-06, "loss": 3.0126, "step": 319300 }, { "epoch": 0.3146107043496765, "grad_norm": 2.281668186187744, "learning_rate": 9.995435495292321e-06, "loss": 3.0263, "step": 319350 }, { "epoch": 0.31465996232749854, "grad_norm": 2.482325315475464, "learning_rate": 9.995428533916514e-06, "loss": 3.017, "step": 319400 }, { "epoch": 0.31470922030532067, "grad_norm": 2.202923536300659, "learning_rate": 9.995421567238744e-06, "loss": 3.0441, "step": 319450 }, { "epoch": 0.31475847828314274, "grad_norm": 2.2867085933685303, "learning_rate": 9.995414595259018e-06, "loss": 3.0196, "step": 319500 }, { "epoch": 0.3148077362609648, "grad_norm": 2.4636571407318115, "learning_rate": 9.995407617977344e-06, "loss": 3.0766, "step": 319550 }, { "epoch": 0.31485699423878694, "grad_norm": 2.9409708976745605, "learning_rate": 9.99540063539373e-06, "loss": 3.0228, "step": 319600 }, { "epoch": 0.314906252216609, "grad_norm": 2.1873981952667236, "learning_rate": 9.99539364750818e-06, "loss": 3.0106, "step": 319650 }, { "epoch": 0.3149555101944311, "grad_norm": 2.3958446979522705, "learning_rate": 9.995386654320706e-06, "loss": 3.0242, "step": 319700 }, { "epoch": 0.3150047681722532, "grad_norm": 2.2478067874908447, "learning_rate": 9.995379655831312e-06, "loss": 2.9735, "step": 319750 }, { "epoch": 0.3150540261500753, "grad_norm": 2.314419746398926, "learning_rate": 9.99537265204001e-06, "loss": 3.003, "step": 319800 }, { "epoch": 0.31510328412789734, "grad_norm": 2.2649142742156982, "learning_rate": 9.995365642946803e-06, "loss": 3.0239, "step": 319850 }, { "epoch": 0.31515254210571947, "grad_norm": 2.521195411682129, "learning_rate": 9.995358628551699e-06, "loss": 3.0075, "step": 319900 }, { "epoch": 0.31520180008354154, "grad_norm": 2.329045534133911, "learning_rate": 9.995351608854706e-06, "loss": 3.0193, "step": 319950 }, { "epoch": 0.3152510580613636, "grad_norm": 2.2379143238067627, "learning_rate": 9.995344583855832e-06, "loss": 2.9969, "step": 320000 }, { "epoch": 0.3153003160391857, "grad_norm": 2.5926640033721924, "learning_rate": 9.995337553555083e-06, "loss": 2.9851, "step": 320050 }, { "epoch": 0.3153495740170078, "grad_norm": 2.437302350997925, "learning_rate": 9.99533051795247e-06, "loss": 2.9829, "step": 320100 }, { "epoch": 0.3153988319948299, "grad_norm": 2.139824151992798, "learning_rate": 9.995323477047994e-06, "loss": 2.9494, "step": 320150 }, { "epoch": 0.31544808997265195, "grad_norm": 2.424853563308716, "learning_rate": 9.99531643084167e-06, "loss": 3.0112, "step": 320200 }, { "epoch": 0.31549734795047407, "grad_norm": 2.3947649002075195, "learning_rate": 9.9953093793335e-06, "loss": 3.0941, "step": 320250 }, { "epoch": 0.31554660592829614, "grad_norm": 2.379643201828003, "learning_rate": 9.995302322523495e-06, "loss": 3.0199, "step": 320300 }, { "epoch": 0.3155958639061182, "grad_norm": 2.3067097663879395, "learning_rate": 9.99529526041166e-06, "loss": 3.1057, "step": 320350 }, { "epoch": 0.31564512188394034, "grad_norm": 2.2447855472564697, "learning_rate": 9.995288192998005e-06, "loss": 3.0004, "step": 320400 }, { "epoch": 0.3156943798617624, "grad_norm": 2.2591230869293213, "learning_rate": 9.995281120282535e-06, "loss": 2.9962, "step": 320450 }, { "epoch": 0.3157436378395845, "grad_norm": 2.517709493637085, "learning_rate": 9.995274042265256e-06, "loss": 2.9756, "step": 320500 }, { "epoch": 0.3157928958174066, "grad_norm": 2.4261748790740967, "learning_rate": 9.99526695894618e-06, "loss": 2.9947, "step": 320550 }, { "epoch": 0.3158421537952287, "grad_norm": 2.3525118827819824, "learning_rate": 9.995259870325313e-06, "loss": 2.9925, "step": 320600 }, { "epoch": 0.31589141177305075, "grad_norm": 2.4597692489624023, "learning_rate": 9.995252776402662e-06, "loss": 3.0298, "step": 320650 }, { "epoch": 0.31594066975087287, "grad_norm": 2.3268790245056152, "learning_rate": 9.995245677178234e-06, "loss": 3.0111, "step": 320700 }, { "epoch": 0.31598992772869494, "grad_norm": 2.135472059249878, "learning_rate": 9.995238572652036e-06, "loss": 3.0403, "step": 320750 }, { "epoch": 0.316039185706517, "grad_norm": 2.268904447555542, "learning_rate": 9.995231462824078e-06, "loss": 3.0308, "step": 320800 }, { "epoch": 0.31608844368433914, "grad_norm": 2.2873711585998535, "learning_rate": 9.995224347694365e-06, "loss": 3.0155, "step": 320850 }, { "epoch": 0.3161377016621612, "grad_norm": 2.263529062271118, "learning_rate": 9.995217227262906e-06, "loss": 2.9999, "step": 320900 }, { "epoch": 0.3161869596399833, "grad_norm": 2.3661818504333496, "learning_rate": 9.995210101529709e-06, "loss": 3.0459, "step": 320950 }, { "epoch": 0.3162362176178054, "grad_norm": 2.3859496116638184, "learning_rate": 9.995202970494782e-06, "loss": 2.9802, "step": 321000 }, { "epoch": 0.3162854755956275, "grad_norm": 2.3529341220855713, "learning_rate": 9.995195834158128e-06, "loss": 2.989, "step": 321050 }, { "epoch": 0.31633473357344954, "grad_norm": 2.301541328430176, "learning_rate": 9.99518869251976e-06, "loss": 2.9636, "step": 321100 }, { "epoch": 0.31638399155127167, "grad_norm": 2.297841787338257, "learning_rate": 9.995181545579683e-06, "loss": 2.9542, "step": 321150 }, { "epoch": 0.31643324952909374, "grad_norm": 2.3120968341827393, "learning_rate": 9.995174393337905e-06, "loss": 2.963, "step": 321200 }, { "epoch": 0.3164825075069158, "grad_norm": 2.342954635620117, "learning_rate": 9.995167235794433e-06, "loss": 2.9842, "step": 321250 }, { "epoch": 0.3165317654847379, "grad_norm": 2.352522611618042, "learning_rate": 9.995160072949277e-06, "loss": 2.9765, "step": 321300 }, { "epoch": 0.31658102346256, "grad_norm": 2.2417845726013184, "learning_rate": 9.995152904802443e-06, "loss": 3.0048, "step": 321350 }, { "epoch": 0.3166302814403821, "grad_norm": 2.1343228816986084, "learning_rate": 9.995145731353937e-06, "loss": 2.9975, "step": 321400 }, { "epoch": 0.31667953941820415, "grad_norm": 2.7334096431732178, "learning_rate": 9.995138552603769e-06, "loss": 2.9545, "step": 321450 }, { "epoch": 0.3167287973960263, "grad_norm": 2.294802188873291, "learning_rate": 9.995131368551947e-06, "loss": 2.9884, "step": 321500 }, { "epoch": 0.31677805537384834, "grad_norm": 2.2811436653137207, "learning_rate": 9.995124179198476e-06, "loss": 2.967, "step": 321550 }, { "epoch": 0.3168273133516704, "grad_norm": 2.640312910079956, "learning_rate": 9.995116984543365e-06, "loss": 3.04, "step": 321600 }, { "epoch": 0.31687657132949254, "grad_norm": 2.3102548122406006, "learning_rate": 9.99510978458662e-06, "loss": 2.9984, "step": 321650 }, { "epoch": 0.3169258293073146, "grad_norm": 2.1529226303100586, "learning_rate": 9.995102579328253e-06, "loss": 2.9703, "step": 321700 }, { "epoch": 0.3169750872851367, "grad_norm": 2.3430707454681396, "learning_rate": 9.995095368768267e-06, "loss": 3.0283, "step": 321750 }, { "epoch": 0.3170243452629588, "grad_norm": 2.2311439514160156, "learning_rate": 9.995088152906673e-06, "loss": 3.0448, "step": 321800 }, { "epoch": 0.3170736032407809, "grad_norm": 2.359165906906128, "learning_rate": 9.995080931743478e-06, "loss": 3.0147, "step": 321850 }, { "epoch": 0.31712286121860295, "grad_norm": 2.383981704711914, "learning_rate": 9.995073705278687e-06, "loss": 2.9731, "step": 321900 }, { "epoch": 0.3171721191964251, "grad_norm": 2.0704076290130615, "learning_rate": 9.99506647351231e-06, "loss": 3.0084, "step": 321950 }, { "epoch": 0.31722137717424714, "grad_norm": 2.310384750366211, "learning_rate": 9.995059236444353e-06, "loss": 2.9468, "step": 322000 }, { "epoch": 0.3172706351520692, "grad_norm": 2.28782320022583, "learning_rate": 9.995051994074826e-06, "loss": 2.9596, "step": 322050 }, { "epoch": 0.31731989312989134, "grad_norm": 2.2441258430480957, "learning_rate": 9.995044746403738e-06, "loss": 3.0287, "step": 322100 }, { "epoch": 0.3173691511077134, "grad_norm": 2.2825212478637695, "learning_rate": 9.995037493431093e-06, "loss": 3.0142, "step": 322150 }, { "epoch": 0.3174184090855355, "grad_norm": 2.242141008377075, "learning_rate": 9.995030235156897e-06, "loss": 3.0395, "step": 322200 }, { "epoch": 0.3174676670633576, "grad_norm": 2.3794503211975098, "learning_rate": 9.995022971581163e-06, "loss": 2.963, "step": 322250 }, { "epoch": 0.3175169250411797, "grad_norm": 2.3367199897766113, "learning_rate": 9.995015702703898e-06, "loss": 2.9844, "step": 322300 }, { "epoch": 0.31756618301900175, "grad_norm": 2.353916645050049, "learning_rate": 9.995008428525105e-06, "loss": 3.0147, "step": 322350 }, { "epoch": 0.3176154409968239, "grad_norm": 2.3322248458862305, "learning_rate": 9.995001149044798e-06, "loss": 3.018, "step": 322400 }, { "epoch": 0.31766469897464594, "grad_norm": 2.142261505126953, "learning_rate": 9.994993864262979e-06, "loss": 3.0281, "step": 322450 }, { "epoch": 0.317713956952468, "grad_norm": 2.2486965656280518, "learning_rate": 9.994986574179659e-06, "loss": 2.9248, "step": 322500 }, { "epoch": 0.3177632149302901, "grad_norm": 2.243344306945801, "learning_rate": 9.994979278794844e-06, "loss": 3.008, "step": 322550 }, { "epoch": 0.3178124729081122, "grad_norm": 2.05446457862854, "learning_rate": 9.994971978108545e-06, "loss": 2.9652, "step": 322600 }, { "epoch": 0.3178617308859343, "grad_norm": 2.305025100708008, "learning_rate": 9.994964672120767e-06, "loss": 3.0106, "step": 322650 }, { "epoch": 0.31791098886375635, "grad_norm": 2.3543810844421387, "learning_rate": 9.994957360831518e-06, "loss": 3.0716, "step": 322700 }, { "epoch": 0.3179602468415785, "grad_norm": 2.408717155456543, "learning_rate": 9.994950044240805e-06, "loss": 3.0546, "step": 322750 }, { "epoch": 0.31800950481940055, "grad_norm": 2.444916248321533, "learning_rate": 9.994942722348638e-06, "loss": 2.9957, "step": 322800 }, { "epoch": 0.3180587627972226, "grad_norm": 2.5337023735046387, "learning_rate": 9.994935395155024e-06, "loss": 3.01, "step": 322850 }, { "epoch": 0.31810802077504474, "grad_norm": 2.414316177368164, "learning_rate": 9.994928062659968e-06, "loss": 2.9439, "step": 322900 }, { "epoch": 0.3181572787528668, "grad_norm": 2.371917247772217, "learning_rate": 9.994920724863484e-06, "loss": 3.006, "step": 322950 }, { "epoch": 0.3182065367306889, "grad_norm": 2.2066924571990967, "learning_rate": 9.994913381765573e-06, "loss": 2.9917, "step": 323000 }, { "epoch": 0.318255794708511, "grad_norm": 2.157283306121826, "learning_rate": 9.994906033366249e-06, "loss": 3.0114, "step": 323050 }, { "epoch": 0.3183050526863331, "grad_norm": 2.162775754928589, "learning_rate": 9.994898679665513e-06, "loss": 2.9932, "step": 323100 }, { "epoch": 0.31835431066415515, "grad_norm": 2.223466634750366, "learning_rate": 9.994891320663377e-06, "loss": 2.9806, "step": 323150 }, { "epoch": 0.3184035686419773, "grad_norm": 2.2770650386810303, "learning_rate": 9.994883956359849e-06, "loss": 2.9578, "step": 323200 }, { "epoch": 0.31845282661979935, "grad_norm": 2.8201406002044678, "learning_rate": 9.994876586754934e-06, "loss": 2.9771, "step": 323250 }, { "epoch": 0.3185020845976214, "grad_norm": 2.286628007888794, "learning_rate": 9.994869211848644e-06, "loss": 3.0722, "step": 323300 }, { "epoch": 0.31855134257544354, "grad_norm": 2.3103508949279785, "learning_rate": 9.994861831640983e-06, "loss": 2.9665, "step": 323350 }, { "epoch": 0.3186006005532656, "grad_norm": 2.4238879680633545, "learning_rate": 9.994854446131963e-06, "loss": 2.9776, "step": 323400 }, { "epoch": 0.3186498585310877, "grad_norm": 2.266263484954834, "learning_rate": 9.994847055321587e-06, "loss": 2.9405, "step": 323450 }, { "epoch": 0.3186991165089098, "grad_norm": 2.386289596557617, "learning_rate": 9.994839659209867e-06, "loss": 3.0086, "step": 323500 }, { "epoch": 0.3187483744867319, "grad_norm": 2.2134878635406494, "learning_rate": 9.994832257796807e-06, "loss": 3.0269, "step": 323550 }, { "epoch": 0.31879763246455395, "grad_norm": 2.228789806365967, "learning_rate": 9.994824851082419e-06, "loss": 3.0477, "step": 323600 }, { "epoch": 0.3188468904423761, "grad_norm": 2.457951545715332, "learning_rate": 9.994817439066707e-06, "loss": 3.0154, "step": 323650 }, { "epoch": 0.31889614842019814, "grad_norm": 2.3145902156829834, "learning_rate": 9.994810021749679e-06, "loss": 3.0082, "step": 323700 }, { "epoch": 0.3189454063980202, "grad_norm": 2.356865406036377, "learning_rate": 9.994802599131347e-06, "loss": 3.045, "step": 323750 }, { "epoch": 0.3189946643758423, "grad_norm": 2.2982535362243652, "learning_rate": 9.994795171211716e-06, "loss": 3.0441, "step": 323800 }, { "epoch": 0.3190439223536644, "grad_norm": 2.2874183654785156, "learning_rate": 9.994787737990795e-06, "loss": 3.0277, "step": 323850 }, { "epoch": 0.3190931803314865, "grad_norm": 2.1828739643096924, "learning_rate": 9.994780299468588e-06, "loss": 2.9924, "step": 323900 }, { "epoch": 0.31914243830930855, "grad_norm": 2.1970157623291016, "learning_rate": 9.994772855645109e-06, "loss": 3.0073, "step": 323950 }, { "epoch": 0.3191916962871307, "grad_norm": 2.3162572383880615, "learning_rate": 9.994765406520362e-06, "loss": 2.9827, "step": 324000 }, { "epoch": 0.31924095426495275, "grad_norm": 2.37149977684021, "learning_rate": 9.994757952094356e-06, "loss": 3.0302, "step": 324050 }, { "epoch": 0.3192902122427748, "grad_norm": 2.2363533973693848, "learning_rate": 9.994750492367097e-06, "loss": 2.986, "step": 324100 }, { "epoch": 0.31933947022059694, "grad_norm": 2.352264642715454, "learning_rate": 9.994743027338595e-06, "loss": 2.9581, "step": 324150 }, { "epoch": 0.319388728198419, "grad_norm": 2.160536289215088, "learning_rate": 9.99473555700886e-06, "loss": 3.0578, "step": 324200 }, { "epoch": 0.3194379861762411, "grad_norm": 2.2979745864868164, "learning_rate": 9.994728081377897e-06, "loss": 3.043, "step": 324250 }, { "epoch": 0.3194872441540632, "grad_norm": 2.311696767807007, "learning_rate": 9.994720600445713e-06, "loss": 2.97, "step": 324300 }, { "epoch": 0.3195365021318853, "grad_norm": 2.3789725303649902, "learning_rate": 9.994713114212317e-06, "loss": 2.9619, "step": 324350 }, { "epoch": 0.31958576010970735, "grad_norm": 2.232785701751709, "learning_rate": 9.994705622677717e-06, "loss": 3.0087, "step": 324400 }, { "epoch": 0.3196350180875295, "grad_norm": 2.22043514251709, "learning_rate": 9.994698125841924e-06, "loss": 2.9973, "step": 324450 }, { "epoch": 0.31968427606535155, "grad_norm": 2.3424551486968994, "learning_rate": 9.99469062370494e-06, "loss": 3.0119, "step": 324500 }, { "epoch": 0.3197335340431736, "grad_norm": 2.381495475769043, "learning_rate": 9.994683116266778e-06, "loss": 3.0328, "step": 324550 }, { "epoch": 0.31978279202099574, "grad_norm": 2.4834647178649902, "learning_rate": 9.994675603527444e-06, "loss": 3.0523, "step": 324600 }, { "epoch": 0.3198320499988178, "grad_norm": 2.2913899421691895, "learning_rate": 9.994668085486946e-06, "loss": 2.9658, "step": 324650 }, { "epoch": 0.3198813079766399, "grad_norm": 2.4016404151916504, "learning_rate": 9.994660562145292e-06, "loss": 2.9555, "step": 324700 }, { "epoch": 0.319930565954462, "grad_norm": 2.1837029457092285, "learning_rate": 9.99465303350249e-06, "loss": 2.9312, "step": 324750 }, { "epoch": 0.3199798239322841, "grad_norm": 2.323606252670288, "learning_rate": 9.994645499558547e-06, "loss": 2.9735, "step": 324800 }, { "epoch": 0.32002908191010615, "grad_norm": 2.3567721843719482, "learning_rate": 9.994637960313474e-06, "loss": 3.011, "step": 324850 }, { "epoch": 0.3200783398879283, "grad_norm": 2.3108835220336914, "learning_rate": 9.994630415767277e-06, "loss": 3.0423, "step": 324900 }, { "epoch": 0.32012759786575035, "grad_norm": 2.3268239498138428, "learning_rate": 9.994622865919962e-06, "loss": 2.997, "step": 324950 }, { "epoch": 0.3201768558435724, "grad_norm": 2.180349826812744, "learning_rate": 9.99461531077154e-06, "loss": 2.973, "step": 325000 }, { "epoch": 0.3202261138213945, "grad_norm": 2.347105026245117, "learning_rate": 9.994607750322019e-06, "loss": 2.9955, "step": 325050 }, { "epoch": 0.3202753717992166, "grad_norm": 2.4370474815368652, "learning_rate": 9.994600184571406e-06, "loss": 3.0519, "step": 325100 }, { "epoch": 0.3203246297770387, "grad_norm": 2.381134033203125, "learning_rate": 9.994592613519708e-06, "loss": 3.0755, "step": 325150 }, { "epoch": 0.32037388775486075, "grad_norm": 2.1476330757141113, "learning_rate": 9.994585037166935e-06, "loss": 2.9089, "step": 325200 }, { "epoch": 0.3204231457326829, "grad_norm": 2.3896710872650146, "learning_rate": 9.994577455513094e-06, "loss": 3.0204, "step": 325250 }, { "epoch": 0.32047240371050495, "grad_norm": 2.370788335800171, "learning_rate": 9.994569868558193e-06, "loss": 2.9453, "step": 325300 }, { "epoch": 0.320521661688327, "grad_norm": 2.2425074577331543, "learning_rate": 9.99456227630224e-06, "loss": 2.9871, "step": 325350 }, { "epoch": 0.32057091966614915, "grad_norm": 2.576815605163574, "learning_rate": 9.994554678745244e-06, "loss": 3.0331, "step": 325400 }, { "epoch": 0.3206201776439712, "grad_norm": 2.1372947692871094, "learning_rate": 9.994547075887214e-06, "loss": 2.9531, "step": 325450 }, { "epoch": 0.3206694356217933, "grad_norm": 2.3413589000701904, "learning_rate": 9.994539467728155e-06, "loss": 2.9451, "step": 325500 }, { "epoch": 0.3207186935996154, "grad_norm": 2.2747602462768555, "learning_rate": 9.994531854268078e-06, "loss": 3.0007, "step": 325550 }, { "epoch": 0.3207679515774375, "grad_norm": 2.32114315032959, "learning_rate": 9.994524235506987e-06, "loss": 2.9767, "step": 325600 }, { "epoch": 0.32081720955525955, "grad_norm": 2.346999406814575, "learning_rate": 9.994516611444893e-06, "loss": 3.0036, "step": 325650 }, { "epoch": 0.3208664675330817, "grad_norm": 2.3094329833984375, "learning_rate": 9.994508982081806e-06, "loss": 2.911, "step": 325700 }, { "epoch": 0.32091572551090375, "grad_norm": 2.427722215652466, "learning_rate": 9.99450134741773e-06, "loss": 3.0851, "step": 325750 }, { "epoch": 0.3209649834887258, "grad_norm": 2.242171049118042, "learning_rate": 9.994493707452676e-06, "loss": 3.0294, "step": 325800 }, { "epoch": 0.32101424146654794, "grad_norm": 2.292050361633301, "learning_rate": 9.99448606218665e-06, "loss": 2.9837, "step": 325850 }, { "epoch": 0.32106349944437, "grad_norm": 2.536051034927368, "learning_rate": 9.994478411619664e-06, "loss": 2.9535, "step": 325900 }, { "epoch": 0.3211127574221921, "grad_norm": 2.266040325164795, "learning_rate": 9.994470755751722e-06, "loss": 3.1034, "step": 325950 }, { "epoch": 0.3211620154000142, "grad_norm": 2.4025213718414307, "learning_rate": 9.994463094582834e-06, "loss": 3.0084, "step": 326000 }, { "epoch": 0.3212112733778363, "grad_norm": 2.29390025138855, "learning_rate": 9.994455428113005e-06, "loss": 2.9867, "step": 326050 }, { "epoch": 0.32126053135565835, "grad_norm": 2.29315185546875, "learning_rate": 9.994447756342248e-06, "loss": 2.956, "step": 326100 }, { "epoch": 0.3213097893334805, "grad_norm": 2.208200216293335, "learning_rate": 9.994440079270568e-06, "loss": 3.0173, "step": 326150 }, { "epoch": 0.32135904731130255, "grad_norm": 2.4082369804382324, "learning_rate": 9.994432396897975e-06, "loss": 2.9984, "step": 326200 }, { "epoch": 0.3214083052891246, "grad_norm": 2.4058754444122314, "learning_rate": 9.994424709224477e-06, "loss": 2.9854, "step": 326250 }, { "epoch": 0.3214575632669467, "grad_norm": 2.2343225479125977, "learning_rate": 9.99441701625008e-06, "loss": 3.0, "step": 326300 }, { "epoch": 0.3215068212447688, "grad_norm": 2.204411029815674, "learning_rate": 9.994409317974795e-06, "loss": 3.0461, "step": 326350 }, { "epoch": 0.3215560792225909, "grad_norm": 2.4737579822540283, "learning_rate": 9.994401614398626e-06, "loss": 2.9469, "step": 326400 }, { "epoch": 0.32160533720041296, "grad_norm": 2.3459532260894775, "learning_rate": 9.994393905521587e-06, "loss": 3.0485, "step": 326450 }, { "epoch": 0.3216545951782351, "grad_norm": 2.244621992111206, "learning_rate": 9.994386191343681e-06, "loss": 2.9854, "step": 326500 }, { "epoch": 0.32170385315605715, "grad_norm": 2.1877098083496094, "learning_rate": 9.99437847186492e-06, "loss": 3.0816, "step": 326550 }, { "epoch": 0.3217531111338792, "grad_norm": 2.503509759902954, "learning_rate": 9.994370747085307e-06, "loss": 2.9971, "step": 326600 }, { "epoch": 0.32180236911170135, "grad_norm": 2.2942562103271484, "learning_rate": 9.994363017004858e-06, "loss": 2.9995, "step": 326650 }, { "epoch": 0.3218516270895234, "grad_norm": 2.210972309112549, "learning_rate": 9.994355281623573e-06, "loss": 3.0095, "step": 326700 }, { "epoch": 0.3219008850673455, "grad_norm": 2.2744314670562744, "learning_rate": 9.994347540941468e-06, "loss": 2.975, "step": 326750 }, { "epoch": 0.3219501430451676, "grad_norm": 2.2309682369232178, "learning_rate": 9.994339794958544e-06, "loss": 2.9718, "step": 326800 }, { "epoch": 0.3219994010229897, "grad_norm": 2.408513069152832, "learning_rate": 9.994332043674816e-06, "loss": 3.0469, "step": 326850 }, { "epoch": 0.32204865900081175, "grad_norm": 2.2341365814208984, "learning_rate": 9.994324287090285e-06, "loss": 2.967, "step": 326900 }, { "epoch": 0.3220979169786339, "grad_norm": 2.3177425861358643, "learning_rate": 9.994316525204965e-06, "loss": 3.0808, "step": 326950 }, { "epoch": 0.32214717495645595, "grad_norm": 2.3534317016601562, "learning_rate": 9.994308758018862e-06, "loss": 2.9999, "step": 327000 }, { "epoch": 0.322196432934278, "grad_norm": 2.245447874069214, "learning_rate": 9.994300985531984e-06, "loss": 3.0175, "step": 327050 }, { "epoch": 0.32224569091210015, "grad_norm": 2.216843843460083, "learning_rate": 9.994293207744338e-06, "loss": 2.9541, "step": 327100 }, { "epoch": 0.3222949488899222, "grad_norm": 2.202080249786377, "learning_rate": 9.994285424655939e-06, "loss": 2.9572, "step": 327150 }, { "epoch": 0.3223442068677443, "grad_norm": 2.3912792205810547, "learning_rate": 9.994277636266785e-06, "loss": 2.9878, "step": 327200 }, { "epoch": 0.3223934648455664, "grad_norm": 2.3706064224243164, "learning_rate": 9.994269842576892e-06, "loss": 2.9844, "step": 327250 }, { "epoch": 0.3224427228233885, "grad_norm": 2.697843313217163, "learning_rate": 9.994262043586267e-06, "loss": 3.0056, "step": 327300 }, { "epoch": 0.32249198080121055, "grad_norm": 2.0959384441375732, "learning_rate": 9.994254239294915e-06, "loss": 3.0166, "step": 327350 }, { "epoch": 0.3225412387790327, "grad_norm": 2.400178909301758, "learning_rate": 9.994246429702846e-06, "loss": 2.8883, "step": 327400 }, { "epoch": 0.32259049675685475, "grad_norm": 2.2338175773620605, "learning_rate": 9.99423861481007e-06, "loss": 3.0509, "step": 327450 }, { "epoch": 0.3226397547346768, "grad_norm": 2.32521653175354, "learning_rate": 9.994230794616592e-06, "loss": 2.9591, "step": 327500 }, { "epoch": 0.3226890127124989, "grad_norm": 2.2035601139068604, "learning_rate": 9.994222969122425e-06, "loss": 3.0502, "step": 327550 }, { "epoch": 0.322738270690321, "grad_norm": 2.258697032928467, "learning_rate": 9.994215138327573e-06, "loss": 2.9917, "step": 327600 }, { "epoch": 0.3227875286681431, "grad_norm": 2.2851362228393555, "learning_rate": 9.994207302232044e-06, "loss": 3.0219, "step": 327650 }, { "epoch": 0.32283678664596516, "grad_norm": 2.271369457244873, "learning_rate": 9.994199460835851e-06, "loss": 2.9621, "step": 327700 }, { "epoch": 0.3228860446237873, "grad_norm": 2.2642946243286133, "learning_rate": 9.994191614139e-06, "loss": 3.0231, "step": 327750 }, { "epoch": 0.32293530260160935, "grad_norm": 2.332162618637085, "learning_rate": 9.994183762141496e-06, "loss": 3.0123, "step": 327800 }, { "epoch": 0.3229845605794314, "grad_norm": 2.2626237869262695, "learning_rate": 9.99417590484335e-06, "loss": 3.011, "step": 327850 }, { "epoch": 0.32303381855725355, "grad_norm": 2.1220316886901855, "learning_rate": 9.994168042244573e-06, "loss": 3.0021, "step": 327900 }, { "epoch": 0.3230830765350756, "grad_norm": 2.387272596359253, "learning_rate": 9.99416017434517e-06, "loss": 3.0009, "step": 327950 }, { "epoch": 0.3231323345128977, "grad_norm": 2.328430652618408, "learning_rate": 9.99415230114515e-06, "loss": 2.9337, "step": 328000 }, { "epoch": 0.3231815924907198, "grad_norm": 2.342526435852051, "learning_rate": 9.994144422644521e-06, "loss": 3.0253, "step": 328050 }, { "epoch": 0.3232308504685419, "grad_norm": 2.1986277103424072, "learning_rate": 9.994136538843292e-06, "loss": 2.9378, "step": 328100 }, { "epoch": 0.32328010844636396, "grad_norm": 2.2318062782287598, "learning_rate": 9.994128649741472e-06, "loss": 3.0628, "step": 328150 }, { "epoch": 0.3233293664241861, "grad_norm": 2.562345504760742, "learning_rate": 9.994120755339067e-06, "loss": 3.0457, "step": 328200 }, { "epoch": 0.32337862440200815, "grad_norm": 2.267371416091919, "learning_rate": 9.994112855636089e-06, "loss": 2.9136, "step": 328250 }, { "epoch": 0.3234278823798302, "grad_norm": 2.4698474407196045, "learning_rate": 9.994104950632543e-06, "loss": 2.9654, "step": 328300 }, { "epoch": 0.32347714035765235, "grad_norm": 2.4881293773651123, "learning_rate": 9.99409704032844e-06, "loss": 3.0255, "step": 328350 }, { "epoch": 0.3235263983354744, "grad_norm": 2.2579379081726074, "learning_rate": 9.994089124723787e-06, "loss": 2.9906, "step": 328400 }, { "epoch": 0.3235756563132965, "grad_norm": 2.533039093017578, "learning_rate": 9.994081203818592e-06, "loss": 3.0264, "step": 328450 }, { "epoch": 0.3236249142911186, "grad_norm": 2.626873731613159, "learning_rate": 9.994073277612862e-06, "loss": 3.0112, "step": 328500 }, { "epoch": 0.3236741722689407, "grad_norm": 2.190354824066162, "learning_rate": 9.994065346106611e-06, "loss": 2.9717, "step": 328550 }, { "epoch": 0.32372343024676276, "grad_norm": 2.3194122314453125, "learning_rate": 9.994057409299843e-06, "loss": 3.009, "step": 328600 }, { "epoch": 0.3237726882245849, "grad_norm": 2.4748988151550293, "learning_rate": 9.994049467192566e-06, "loss": 3.0041, "step": 328650 }, { "epoch": 0.32382194620240695, "grad_norm": 2.3168745040893555, "learning_rate": 9.994041519784789e-06, "loss": 2.9442, "step": 328700 }, { "epoch": 0.323871204180229, "grad_norm": 2.9261860847473145, "learning_rate": 9.994033567076522e-06, "loss": 3.0194, "step": 328750 }, { "epoch": 0.3239204621580511, "grad_norm": 2.301459312438965, "learning_rate": 9.994025609067773e-06, "loss": 3.0219, "step": 328800 }, { "epoch": 0.3239697201358732, "grad_norm": 2.138746500015259, "learning_rate": 9.99401764575855e-06, "loss": 3.0181, "step": 328850 }, { "epoch": 0.3240189781136953, "grad_norm": 2.104782819747925, "learning_rate": 9.99400967714886e-06, "loss": 3.0074, "step": 328900 }, { "epoch": 0.32406823609151736, "grad_norm": 2.301400899887085, "learning_rate": 9.994001703238715e-06, "loss": 2.9688, "step": 328950 }, { "epoch": 0.3241174940693395, "grad_norm": 2.5314807891845703, "learning_rate": 9.99399372402812e-06, "loss": 2.9774, "step": 329000 }, { "epoch": 0.32416675204716155, "grad_norm": 2.420837163925171, "learning_rate": 9.993985739517087e-06, "loss": 2.9931, "step": 329050 }, { "epoch": 0.3242160100249836, "grad_norm": 2.3666465282440186, "learning_rate": 9.993977749705619e-06, "loss": 2.9487, "step": 329100 }, { "epoch": 0.32426526800280575, "grad_norm": 2.3122732639312744, "learning_rate": 9.99396975459373e-06, "loss": 2.9995, "step": 329150 }, { "epoch": 0.3243145259806278, "grad_norm": 2.2921929359436035, "learning_rate": 9.993961754181425e-06, "loss": 3.0405, "step": 329200 }, { "epoch": 0.3243637839584499, "grad_norm": 2.3132262229919434, "learning_rate": 9.993953748468715e-06, "loss": 3.0025, "step": 329250 }, { "epoch": 0.324413041936272, "grad_norm": 2.2428135871887207, "learning_rate": 9.993945737455608e-06, "loss": 3.0351, "step": 329300 }, { "epoch": 0.3244622999140941, "grad_norm": 2.5213520526885986, "learning_rate": 9.99393772114211e-06, "loss": 2.99, "step": 329350 }, { "epoch": 0.32451155789191616, "grad_norm": 2.218778371810913, "learning_rate": 9.993929699528231e-06, "loss": 2.956, "step": 329400 }, { "epoch": 0.3245608158697383, "grad_norm": 2.276928186416626, "learning_rate": 9.99392167261398e-06, "loss": 2.9863, "step": 329450 }, { "epoch": 0.32461007384756035, "grad_norm": 2.3644790649414062, "learning_rate": 9.993913640399365e-06, "loss": 3.0067, "step": 329500 }, { "epoch": 0.3246593318253824, "grad_norm": 2.350954294204712, "learning_rate": 9.993905602884397e-06, "loss": 2.943, "step": 329550 }, { "epoch": 0.32470858980320455, "grad_norm": 2.3388078212738037, "learning_rate": 9.99389756006908e-06, "loss": 3.0474, "step": 329600 }, { "epoch": 0.3247578477810266, "grad_norm": 2.451012372970581, "learning_rate": 9.993889511953426e-06, "loss": 2.9841, "step": 329650 }, { "epoch": 0.3248071057588487, "grad_norm": 2.3123908042907715, "learning_rate": 9.993881458537443e-06, "loss": 2.9834, "step": 329700 }, { "epoch": 0.3248563637366708, "grad_norm": 2.227527141571045, "learning_rate": 9.993873399821137e-06, "loss": 3.0144, "step": 329750 }, { "epoch": 0.3249056217144929, "grad_norm": 2.359307289123535, "learning_rate": 9.993865335804522e-06, "loss": 3.0508, "step": 329800 }, { "epoch": 0.32495487969231496, "grad_norm": 2.3206188678741455, "learning_rate": 9.993857266487599e-06, "loss": 3.0415, "step": 329850 }, { "epoch": 0.325004137670137, "grad_norm": 2.269857406616211, "learning_rate": 9.993849191870382e-06, "loss": 2.9873, "step": 329900 }, { "epoch": 0.32505339564795915, "grad_norm": 2.2724006175994873, "learning_rate": 9.99384111195288e-06, "loss": 3.0562, "step": 329950 }, { "epoch": 0.3251026536257812, "grad_norm": 2.170011043548584, "learning_rate": 9.993833026735098e-06, "loss": 2.985, "step": 330000 }, { "epoch": 0.3251519116036033, "grad_norm": 2.2785377502441406, "learning_rate": 9.993824936217048e-06, "loss": 2.9524, "step": 330050 }, { "epoch": 0.3252011695814254, "grad_norm": 2.688688039779663, "learning_rate": 9.993816840398736e-06, "loss": 3.0086, "step": 330100 }, { "epoch": 0.3252504275592475, "grad_norm": 2.4050838947296143, "learning_rate": 9.993808739280171e-06, "loss": 2.9078, "step": 330150 }, { "epoch": 0.32529968553706956, "grad_norm": 2.536747455596924, "learning_rate": 9.993800632861364e-06, "loss": 2.9702, "step": 330200 }, { "epoch": 0.3253489435148917, "grad_norm": 2.2469213008880615, "learning_rate": 9.993792521142319e-06, "loss": 3.0417, "step": 330250 }, { "epoch": 0.32539820149271376, "grad_norm": 2.398123264312744, "learning_rate": 9.993784404123048e-06, "loss": 3.0302, "step": 330300 }, { "epoch": 0.3254474594705358, "grad_norm": 2.3504843711853027, "learning_rate": 9.993776281803559e-06, "loss": 2.9986, "step": 330350 }, { "epoch": 0.32549671744835795, "grad_norm": 2.286226987838745, "learning_rate": 9.993768154183864e-06, "loss": 2.9825, "step": 330400 }, { "epoch": 0.32554597542618, "grad_norm": 2.2099859714508057, "learning_rate": 9.993760021263964e-06, "loss": 2.9895, "step": 330450 }, { "epoch": 0.3255952334040021, "grad_norm": 2.3747777938842773, "learning_rate": 9.993751883043874e-06, "loss": 3.043, "step": 330500 }, { "epoch": 0.3256444913818242, "grad_norm": 2.2541325092315674, "learning_rate": 9.9937437395236e-06, "loss": 2.9894, "step": 330550 }, { "epoch": 0.3256937493596463, "grad_norm": 2.2578492164611816, "learning_rate": 9.993735590703152e-06, "loss": 2.9132, "step": 330600 }, { "epoch": 0.32574300733746836, "grad_norm": 2.53883695602417, "learning_rate": 9.993727436582536e-06, "loss": 3.005, "step": 330650 }, { "epoch": 0.3257922653152905, "grad_norm": 2.362668514251709, "learning_rate": 9.993719277161763e-06, "loss": 3.0294, "step": 330700 }, { "epoch": 0.32584152329311256, "grad_norm": 2.2556464672088623, "learning_rate": 9.993711112440843e-06, "loss": 3.0243, "step": 330750 }, { "epoch": 0.3258907812709346, "grad_norm": 2.4334092140197754, "learning_rate": 9.99370294241978e-06, "loss": 3.0062, "step": 330800 }, { "epoch": 0.32594003924875675, "grad_norm": 2.1844818592071533, "learning_rate": 9.993694767098588e-06, "loss": 2.9643, "step": 330850 }, { "epoch": 0.3259892972265788, "grad_norm": 2.3118185997009277, "learning_rate": 9.993686586477272e-06, "loss": 2.9467, "step": 330900 }, { "epoch": 0.3260385552044009, "grad_norm": 2.7873620986938477, "learning_rate": 9.993678400555842e-06, "loss": 2.9624, "step": 330950 }, { "epoch": 0.326087813182223, "grad_norm": 2.245900869369507, "learning_rate": 9.993670209334305e-06, "loss": 2.915, "step": 331000 }, { "epoch": 0.3261370711600451, "grad_norm": 2.3242104053497314, "learning_rate": 9.993662012812674e-06, "loss": 3.0562, "step": 331050 }, { "epoch": 0.32618632913786716, "grad_norm": 2.4527854919433594, "learning_rate": 9.993653810990952e-06, "loss": 3.0276, "step": 331100 }, { "epoch": 0.32623558711568923, "grad_norm": 2.298565149307251, "learning_rate": 9.993645603869151e-06, "loss": 3.0112, "step": 331150 }, { "epoch": 0.32628484509351136, "grad_norm": 2.499424934387207, "learning_rate": 9.993637391447281e-06, "loss": 2.954, "step": 331200 }, { "epoch": 0.3263341030713334, "grad_norm": 2.1946020126342773, "learning_rate": 9.993629173725348e-06, "loss": 3.0103, "step": 331250 }, { "epoch": 0.3263833610491555, "grad_norm": 2.367494583129883, "learning_rate": 9.993620950703363e-06, "loss": 3.0019, "step": 331300 }, { "epoch": 0.3264326190269776, "grad_norm": 2.3858416080474854, "learning_rate": 9.993612722381332e-06, "loss": 3.0011, "step": 331350 }, { "epoch": 0.3264818770047997, "grad_norm": 2.383852005004883, "learning_rate": 9.993604488759266e-06, "loss": 2.9786, "step": 331400 }, { "epoch": 0.32653113498262176, "grad_norm": 2.4190244674682617, "learning_rate": 9.993596249837173e-06, "loss": 2.9784, "step": 331450 }, { "epoch": 0.3265803929604439, "grad_norm": 2.48496675491333, "learning_rate": 9.993588005615062e-06, "loss": 3.0069, "step": 331500 }, { "epoch": 0.32662965093826596, "grad_norm": 2.1755332946777344, "learning_rate": 9.99357975609294e-06, "loss": 3.0153, "step": 331550 }, { "epoch": 0.32667890891608803, "grad_norm": 2.2785990238189697, "learning_rate": 9.993571501270818e-06, "loss": 2.9896, "step": 331600 }, { "epoch": 0.32672816689391015, "grad_norm": 2.293120861053467, "learning_rate": 9.993563241148704e-06, "loss": 3.0331, "step": 331650 }, { "epoch": 0.3267774248717322, "grad_norm": 2.425532102584839, "learning_rate": 9.993554975726608e-06, "loss": 3.0257, "step": 331700 }, { "epoch": 0.3268266828495543, "grad_norm": 2.5319900512695312, "learning_rate": 9.993546705004535e-06, "loss": 2.9868, "step": 331750 }, { "epoch": 0.3268759408273764, "grad_norm": 2.21345853805542, "learning_rate": 9.993538428982497e-06, "loss": 2.9728, "step": 331800 }, { "epoch": 0.3269251988051985, "grad_norm": 2.329190731048584, "learning_rate": 9.993530147660503e-06, "loss": 2.9663, "step": 331850 }, { "epoch": 0.32697445678302056, "grad_norm": 2.462146520614624, "learning_rate": 9.99352186103856e-06, "loss": 3.0214, "step": 331900 }, { "epoch": 0.3270237147608427, "grad_norm": 2.3075993061065674, "learning_rate": 9.993513569116678e-06, "loss": 2.9472, "step": 331950 }, { "epoch": 0.32707297273866476, "grad_norm": 2.2807559967041016, "learning_rate": 9.993505271894864e-06, "loss": 3.0052, "step": 332000 }, { "epoch": 0.32712223071648683, "grad_norm": 2.3136658668518066, "learning_rate": 9.993496969373132e-06, "loss": 3.055, "step": 332050 }, { "epoch": 0.32717148869430895, "grad_norm": 2.2581095695495605, "learning_rate": 9.993488661551484e-06, "loss": 2.9854, "step": 332100 }, { "epoch": 0.327220746672131, "grad_norm": 2.321136951446533, "learning_rate": 9.993480348429932e-06, "loss": 2.9882, "step": 332150 }, { "epoch": 0.3272700046499531, "grad_norm": 2.277092695236206, "learning_rate": 9.993472030008487e-06, "loss": 2.9918, "step": 332200 }, { "epoch": 0.3273192626277752, "grad_norm": 2.2012381553649902, "learning_rate": 9.993463706287153e-06, "loss": 2.9087, "step": 332250 }, { "epoch": 0.3273685206055973, "grad_norm": 2.537729501724243, "learning_rate": 9.993455377265942e-06, "loss": 2.9835, "step": 332300 }, { "epoch": 0.32741777858341936, "grad_norm": 2.158907413482666, "learning_rate": 9.993447042944864e-06, "loss": 2.9736, "step": 332350 }, { "epoch": 0.32746703656124143, "grad_norm": 4.529309272766113, "learning_rate": 9.993438703323925e-06, "loss": 2.9802, "step": 332400 }, { "epoch": 0.32751629453906356, "grad_norm": 2.182084798812866, "learning_rate": 9.993430358403134e-06, "loss": 2.9914, "step": 332450 }, { "epoch": 0.3275655525168856, "grad_norm": 2.4692330360412598, "learning_rate": 9.993422008182501e-06, "loss": 2.8843, "step": 332500 }, { "epoch": 0.3276148104947077, "grad_norm": 2.5865678787231445, "learning_rate": 9.993413652662034e-06, "loss": 2.9429, "step": 332550 }, { "epoch": 0.3276640684725298, "grad_norm": 2.6665000915527344, "learning_rate": 9.993405291841743e-06, "loss": 2.9283, "step": 332600 }, { "epoch": 0.3277133264503519, "grad_norm": 2.3242952823638916, "learning_rate": 9.993396925721638e-06, "loss": 3.0044, "step": 332650 }, { "epoch": 0.32776258442817396, "grad_norm": 2.2576026916503906, "learning_rate": 9.993388554301726e-06, "loss": 3.0406, "step": 332700 }, { "epoch": 0.3278118424059961, "grad_norm": 2.295717716217041, "learning_rate": 9.993380177582014e-06, "loss": 3.0268, "step": 332750 }, { "epoch": 0.32786110038381816, "grad_norm": 2.3236746788024902, "learning_rate": 9.993371795562515e-06, "loss": 2.9486, "step": 332800 }, { "epoch": 0.32791035836164023, "grad_norm": 2.175859212875366, "learning_rate": 9.993363408243235e-06, "loss": 3.0295, "step": 332850 }, { "epoch": 0.32795961633946236, "grad_norm": 2.1589910984039307, "learning_rate": 9.993355015624184e-06, "loss": 3.0078, "step": 332900 }, { "epoch": 0.3280088743172844, "grad_norm": 2.3910140991210938, "learning_rate": 9.99334661770537e-06, "loss": 2.9863, "step": 332950 }, { "epoch": 0.3280581322951065, "grad_norm": 2.3442163467407227, "learning_rate": 9.993338214486803e-06, "loss": 2.9962, "step": 333000 }, { "epoch": 0.3281073902729286, "grad_norm": 2.379298686981201, "learning_rate": 9.993329805968493e-06, "loss": 2.9801, "step": 333050 }, { "epoch": 0.3281566482507507, "grad_norm": 2.2941434383392334, "learning_rate": 9.993321392150444e-06, "loss": 2.9357, "step": 333100 }, { "epoch": 0.32820590622857276, "grad_norm": 2.4453113079071045, "learning_rate": 9.993312973032672e-06, "loss": 3.0427, "step": 333150 }, { "epoch": 0.3282551642063949, "grad_norm": 2.39605975151062, "learning_rate": 9.993304548615182e-06, "loss": 3.0097, "step": 333200 }, { "epoch": 0.32830442218421696, "grad_norm": 2.7117645740509033, "learning_rate": 9.993296118897981e-06, "loss": 3.0343, "step": 333250 }, { "epoch": 0.32835368016203903, "grad_norm": 2.379652976989746, "learning_rate": 9.993287683881082e-06, "loss": 2.9877, "step": 333300 }, { "epoch": 0.32840293813986116, "grad_norm": 2.298321008682251, "learning_rate": 9.993279243564492e-06, "loss": 3.0388, "step": 333350 }, { "epoch": 0.3284521961176832, "grad_norm": 2.3403446674346924, "learning_rate": 9.99327079794822e-06, "loss": 2.9447, "step": 333400 }, { "epoch": 0.3285014540955053, "grad_norm": 2.15140700340271, "learning_rate": 9.993262347032276e-06, "loss": 2.937, "step": 333450 }, { "epoch": 0.3285507120733274, "grad_norm": 2.1999704837799072, "learning_rate": 9.993253890816666e-06, "loss": 3.0101, "step": 333500 }, { "epoch": 0.3285999700511495, "grad_norm": 2.4294421672821045, "learning_rate": 9.993245429301403e-06, "loss": 2.9623, "step": 333550 }, { "epoch": 0.32864922802897156, "grad_norm": 2.753049373626709, "learning_rate": 9.993236962486494e-06, "loss": 2.9858, "step": 333600 }, { "epoch": 0.32869848600679363, "grad_norm": 2.5858705043792725, "learning_rate": 9.993228490371947e-06, "loss": 2.9057, "step": 333650 }, { "epoch": 0.32874774398461576, "grad_norm": 2.4165945053100586, "learning_rate": 9.993220012957772e-06, "loss": 3.0272, "step": 333700 }, { "epoch": 0.32879700196243783, "grad_norm": 2.4367964267730713, "learning_rate": 9.993211530243978e-06, "loss": 2.9823, "step": 333750 }, { "epoch": 0.3288462599402599, "grad_norm": 2.544750213623047, "learning_rate": 9.993203042230575e-06, "loss": 3.0554, "step": 333800 }, { "epoch": 0.328895517918082, "grad_norm": 2.3746838569641113, "learning_rate": 9.99319454891757e-06, "loss": 3.0241, "step": 333850 }, { "epoch": 0.3289447758959041, "grad_norm": 2.3621301651000977, "learning_rate": 9.993186050304975e-06, "loss": 2.9568, "step": 333900 }, { "epoch": 0.32899403387372617, "grad_norm": 2.2982468605041504, "learning_rate": 9.993177546392795e-06, "loss": 2.951, "step": 333950 }, { "epoch": 0.3290432918515483, "grad_norm": 2.4338607788085938, "learning_rate": 9.993169037181041e-06, "loss": 3.0279, "step": 334000 }, { "epoch": 0.32909254982937036, "grad_norm": 2.1829757690429688, "learning_rate": 9.993160522669724e-06, "loss": 2.9909, "step": 334050 }, { "epoch": 0.32914180780719243, "grad_norm": 2.2956037521362305, "learning_rate": 9.993152002858848e-06, "loss": 2.9786, "step": 334100 }, { "epoch": 0.32919106578501456, "grad_norm": 2.537494421005249, "learning_rate": 9.993143477748427e-06, "loss": 3.0373, "step": 334150 }, { "epoch": 0.32924032376283663, "grad_norm": 2.2307798862457275, "learning_rate": 9.99313494733847e-06, "loss": 2.9364, "step": 334200 }, { "epoch": 0.3292895817406587, "grad_norm": 2.4112865924835205, "learning_rate": 9.993126411628983e-06, "loss": 3.0184, "step": 334250 }, { "epoch": 0.3293388397184808, "grad_norm": 2.1872291564941406, "learning_rate": 9.993117870619975e-06, "loss": 3.0146, "step": 334300 }, { "epoch": 0.3293880976963029, "grad_norm": 2.160231351852417, "learning_rate": 9.993109324311457e-06, "loss": 2.9874, "step": 334350 }, { "epoch": 0.32943735567412497, "grad_norm": 2.2773516178131104, "learning_rate": 9.993100772703439e-06, "loss": 2.9393, "step": 334400 }, { "epoch": 0.3294866136519471, "grad_norm": 2.1903815269470215, "learning_rate": 9.993092215795928e-06, "loss": 2.9746, "step": 334450 }, { "epoch": 0.32953587162976916, "grad_norm": 2.2268595695495605, "learning_rate": 9.993083653588933e-06, "loss": 3.0054, "step": 334500 }, { "epoch": 0.32958512960759123, "grad_norm": 2.305537223815918, "learning_rate": 9.993075086082463e-06, "loss": 3.0616, "step": 334550 }, { "epoch": 0.32963438758541336, "grad_norm": 2.455787181854248, "learning_rate": 9.99306651327653e-06, "loss": 3.0117, "step": 334600 }, { "epoch": 0.32968364556323543, "grad_norm": 2.1642446517944336, "learning_rate": 9.99305793517114e-06, "loss": 3.0309, "step": 334650 }, { "epoch": 0.3297329035410575, "grad_norm": 2.3401710987091064, "learning_rate": 9.993049351766303e-06, "loss": 2.9131, "step": 334700 }, { "epoch": 0.3297821615188796, "grad_norm": 2.343400239944458, "learning_rate": 9.993040763062028e-06, "loss": 3.0278, "step": 334750 }, { "epoch": 0.3298314194967017, "grad_norm": 2.202497959136963, "learning_rate": 9.993032169058325e-06, "loss": 2.9572, "step": 334800 }, { "epoch": 0.32988067747452376, "grad_norm": 2.4046151638031006, "learning_rate": 9.9930235697552e-06, "loss": 2.9749, "step": 334850 }, { "epoch": 0.32992993545234583, "grad_norm": 2.2465648651123047, "learning_rate": 9.993014965152667e-06, "loss": 2.9361, "step": 334900 }, { "epoch": 0.32997919343016796, "grad_norm": 2.385225296020508, "learning_rate": 9.993006355250731e-06, "loss": 3.0206, "step": 334950 }, { "epoch": 0.33002845140799003, "grad_norm": 2.2685365676879883, "learning_rate": 9.992997740049404e-06, "loss": 2.9295, "step": 335000 }, { "epoch": 0.3300777093858121, "grad_norm": 2.42248797416687, "learning_rate": 9.992989119548693e-06, "loss": 2.9187, "step": 335050 }, { "epoch": 0.3301269673636342, "grad_norm": 2.1209042072296143, "learning_rate": 9.99298049374861e-06, "loss": 2.9867, "step": 335100 }, { "epoch": 0.3301762253414563, "grad_norm": 2.285369634628296, "learning_rate": 9.99297186264916e-06, "loss": 2.9813, "step": 335150 }, { "epoch": 0.33022548331927837, "grad_norm": 2.674556016921997, "learning_rate": 9.992963226250355e-06, "loss": 2.9998, "step": 335200 }, { "epoch": 0.3302747412971005, "grad_norm": 2.219677209854126, "learning_rate": 9.992954584552204e-06, "loss": 2.964, "step": 335250 }, { "epoch": 0.33032399927492256, "grad_norm": 2.4326205253601074, "learning_rate": 9.992945937554715e-06, "loss": 2.9898, "step": 335300 }, { "epoch": 0.33037325725274463, "grad_norm": 2.233905792236328, "learning_rate": 9.9929372852579e-06, "loss": 2.9781, "step": 335350 }, { "epoch": 0.33042251523056676, "grad_norm": 2.197079658508301, "learning_rate": 9.992928627661763e-06, "loss": 3.0171, "step": 335400 }, { "epoch": 0.33047177320838883, "grad_norm": 2.5884103775024414, "learning_rate": 9.992919964766318e-06, "loss": 2.9444, "step": 335450 }, { "epoch": 0.3305210311862109, "grad_norm": 2.2713305950164795, "learning_rate": 9.992911296571572e-06, "loss": 3.0155, "step": 335500 }, { "epoch": 0.330570289164033, "grad_norm": 2.3652918338775635, "learning_rate": 9.992902623077535e-06, "loss": 3.0359, "step": 335550 }, { "epoch": 0.3306195471418551, "grad_norm": 2.208735704421997, "learning_rate": 9.992893944284217e-06, "loss": 2.9373, "step": 335600 }, { "epoch": 0.33066880511967717, "grad_norm": 2.757791519165039, "learning_rate": 9.992885260191624e-06, "loss": 2.9903, "step": 335650 }, { "epoch": 0.3307180630974993, "grad_norm": 2.39674973487854, "learning_rate": 9.992876570799769e-06, "loss": 3.0387, "step": 335700 }, { "epoch": 0.33076732107532136, "grad_norm": 2.2821710109710693, "learning_rate": 9.99286787610866e-06, "loss": 2.9802, "step": 335750 }, { "epoch": 0.33081657905314343, "grad_norm": 2.258312463760376, "learning_rate": 9.992859176118304e-06, "loss": 3.0403, "step": 335800 }, { "epoch": 0.33086583703096556, "grad_norm": 2.3026468753814697, "learning_rate": 9.992850470828712e-06, "loss": 2.9715, "step": 335850 }, { "epoch": 0.33091509500878763, "grad_norm": 2.3452036380767822, "learning_rate": 9.992841760239894e-06, "loss": 2.983, "step": 335900 }, { "epoch": 0.3309643529866097, "grad_norm": 2.1054975986480713, "learning_rate": 9.992833044351859e-06, "loss": 2.9429, "step": 335950 }, { "epoch": 0.3310136109644318, "grad_norm": 2.4448728561401367, "learning_rate": 9.992824323164615e-06, "loss": 2.9888, "step": 336000 }, { "epoch": 0.3310628689422539, "grad_norm": 2.36635422706604, "learning_rate": 9.992815596678172e-06, "loss": 3.0021, "step": 336050 }, { "epoch": 0.33111212692007597, "grad_norm": 2.2863500118255615, "learning_rate": 9.992806864892539e-06, "loss": 2.9728, "step": 336100 }, { "epoch": 0.33116138489789804, "grad_norm": 2.253915309906006, "learning_rate": 9.992798127807727e-06, "loss": 3.01, "step": 336150 }, { "epoch": 0.33121064287572016, "grad_norm": 2.2907164096832275, "learning_rate": 9.99278938542374e-06, "loss": 3.0121, "step": 336200 }, { "epoch": 0.33125990085354223, "grad_norm": 2.4763290882110596, "learning_rate": 9.992780637740595e-06, "loss": 2.9711, "step": 336250 }, { "epoch": 0.3313091588313643, "grad_norm": 2.458893060684204, "learning_rate": 9.992771884758296e-06, "loss": 2.9681, "step": 336300 }, { "epoch": 0.33135841680918643, "grad_norm": 2.313654899597168, "learning_rate": 9.992763126476855e-06, "loss": 3.0046, "step": 336350 }, { "epoch": 0.3314076747870085, "grad_norm": 2.5787501335144043, "learning_rate": 9.992754362896278e-06, "loss": 3.0679, "step": 336400 }, { "epoch": 0.33145693276483057, "grad_norm": 2.196828842163086, "learning_rate": 9.992745594016577e-06, "loss": 2.9476, "step": 336450 }, { "epoch": 0.3315061907426527, "grad_norm": 2.227865219116211, "learning_rate": 9.992736819837759e-06, "loss": 2.9764, "step": 336500 }, { "epoch": 0.33155544872047477, "grad_norm": 2.3986427783966064, "learning_rate": 9.992728040359838e-06, "loss": 3.0366, "step": 336550 }, { "epoch": 0.33160470669829684, "grad_norm": 2.258951187133789, "learning_rate": 9.992719255582817e-06, "loss": 2.9206, "step": 336600 }, { "epoch": 0.33165396467611896, "grad_norm": 2.221583843231201, "learning_rate": 9.992710465506711e-06, "loss": 2.9891, "step": 336650 }, { "epoch": 0.33170322265394103, "grad_norm": 2.284433126449585, "learning_rate": 9.992701670131525e-06, "loss": 3.0005, "step": 336700 }, { "epoch": 0.3317524806317631, "grad_norm": 2.5204062461853027, "learning_rate": 9.992692869457272e-06, "loss": 2.9252, "step": 336750 }, { "epoch": 0.33180173860958523, "grad_norm": 2.286022663116455, "learning_rate": 9.992684063483959e-06, "loss": 3.0085, "step": 336800 }, { "epoch": 0.3318509965874073, "grad_norm": 2.324477434158325, "learning_rate": 9.992675252211595e-06, "loss": 2.9513, "step": 336850 }, { "epoch": 0.33190025456522937, "grad_norm": 2.9260926246643066, "learning_rate": 9.99266643564019e-06, "loss": 3.0207, "step": 336900 }, { "epoch": 0.3319495125430515, "grad_norm": 2.2520790100097656, "learning_rate": 9.992657613769756e-06, "loss": 2.9518, "step": 336950 }, { "epoch": 0.33199877052087357, "grad_norm": 2.1295948028564453, "learning_rate": 9.992648786600297e-06, "loss": 3.0661, "step": 337000 }, { "epoch": 0.33204802849869564, "grad_norm": 2.4285457134246826, "learning_rate": 9.992639954131827e-06, "loss": 3.046, "step": 337050 }, { "epoch": 0.33209728647651776, "grad_norm": 2.2319424152374268, "learning_rate": 9.992631116364353e-06, "loss": 2.9649, "step": 337100 }, { "epoch": 0.33214654445433983, "grad_norm": 2.3819668292999268, "learning_rate": 9.992622273297886e-06, "loss": 2.9942, "step": 337150 }, { "epoch": 0.3321958024321619, "grad_norm": 2.414590835571289, "learning_rate": 9.992613424932432e-06, "loss": 2.9575, "step": 337200 }, { "epoch": 0.332245060409984, "grad_norm": 2.330918312072754, "learning_rate": 9.992604571268004e-06, "loss": 2.9352, "step": 337250 }, { "epoch": 0.3322943183878061, "grad_norm": 2.272796154022217, "learning_rate": 9.992595712304611e-06, "loss": 3.0256, "step": 337300 }, { "epoch": 0.33234357636562817, "grad_norm": 2.175579071044922, "learning_rate": 9.992586848042261e-06, "loss": 3.0496, "step": 337350 }, { "epoch": 0.33239283434345024, "grad_norm": 2.2883782386779785, "learning_rate": 9.992577978480965e-06, "loss": 2.9649, "step": 337400 }, { "epoch": 0.33244209232127236, "grad_norm": 2.390902042388916, "learning_rate": 9.992569103620732e-06, "loss": 3.0285, "step": 337450 }, { "epoch": 0.33249135029909443, "grad_norm": 2.321103811264038, "learning_rate": 9.992560223461568e-06, "loss": 3.0092, "step": 337500 }, { "epoch": 0.3325406082769165, "grad_norm": 2.2887067794799805, "learning_rate": 9.992551338003487e-06, "loss": 2.9408, "step": 337550 }, { "epoch": 0.33258986625473863, "grad_norm": 2.3040037155151367, "learning_rate": 9.992542447246498e-06, "loss": 2.921, "step": 337600 }, { "epoch": 0.3326391242325607, "grad_norm": 2.2887332439422607, "learning_rate": 9.992533551190607e-06, "loss": 3.0094, "step": 337650 }, { "epoch": 0.33268838221038277, "grad_norm": 2.275895357131958, "learning_rate": 9.992524649835827e-06, "loss": 3.0255, "step": 337700 }, { "epoch": 0.3327376401882049, "grad_norm": 2.317335605621338, "learning_rate": 9.992515743182167e-06, "loss": 3.0139, "step": 337750 }, { "epoch": 0.33278689816602697, "grad_norm": 2.305271625518799, "learning_rate": 9.992506831229633e-06, "loss": 2.9861, "step": 337800 }, { "epoch": 0.33283615614384904, "grad_norm": 2.385145664215088, "learning_rate": 9.992497913978238e-06, "loss": 2.9699, "step": 337850 }, { "epoch": 0.33288541412167116, "grad_norm": 2.1932711601257324, "learning_rate": 9.992488991427989e-06, "loss": 2.9783, "step": 337900 }, { "epoch": 0.33293467209949323, "grad_norm": 2.3467745780944824, "learning_rate": 9.992480063578898e-06, "loss": 2.9499, "step": 337950 }, { "epoch": 0.3329839300773153, "grad_norm": 2.316917896270752, "learning_rate": 9.992471130430975e-06, "loss": 3.0035, "step": 338000 }, { "epoch": 0.33303318805513743, "grad_norm": 2.439969062805176, "learning_rate": 9.992462191984226e-06, "loss": 2.9711, "step": 338050 }, { "epoch": 0.3330824460329595, "grad_norm": 2.2748117446899414, "learning_rate": 9.992453248238663e-06, "loss": 2.9137, "step": 338100 }, { "epoch": 0.33313170401078157, "grad_norm": 2.2765555381774902, "learning_rate": 9.992444299194295e-06, "loss": 2.9763, "step": 338150 }, { "epoch": 0.3331809619886037, "grad_norm": 2.1811654567718506, "learning_rate": 9.992435344851131e-06, "loss": 3.0055, "step": 338200 }, { "epoch": 0.33323021996642577, "grad_norm": 2.1943769454956055, "learning_rate": 9.99242638520918e-06, "loss": 2.9652, "step": 338250 }, { "epoch": 0.33327947794424784, "grad_norm": 2.296527862548828, "learning_rate": 9.992417420268454e-06, "loss": 3.0374, "step": 338300 }, { "epoch": 0.33332873592206996, "grad_norm": 2.221212387084961, "learning_rate": 9.99240845002896e-06, "loss": 2.9582, "step": 338350 }, { "epoch": 0.33337799389989203, "grad_norm": 2.460963249206543, "learning_rate": 9.99239947449071e-06, "loss": 3.0013, "step": 338400 }, { "epoch": 0.3334272518777141, "grad_norm": 2.313725233078003, "learning_rate": 9.992390493653709e-06, "loss": 2.9468, "step": 338450 }, { "epoch": 0.33347650985553623, "grad_norm": 2.2442595958709717, "learning_rate": 9.992381507517972e-06, "loss": 3.017, "step": 338500 }, { "epoch": 0.3335257678333583, "grad_norm": 2.3330509662628174, "learning_rate": 9.992372516083505e-06, "loss": 3.0032, "step": 338550 }, { "epoch": 0.33357502581118037, "grad_norm": 2.2215068340301514, "learning_rate": 9.992363519350318e-06, "loss": 2.9773, "step": 338600 }, { "epoch": 0.33362428378900244, "grad_norm": 2.300535202026367, "learning_rate": 9.992354517318422e-06, "loss": 2.9411, "step": 338650 }, { "epoch": 0.33367354176682457, "grad_norm": 2.1731655597686768, "learning_rate": 9.992345509987825e-06, "loss": 3.0383, "step": 338700 }, { "epoch": 0.33372279974464664, "grad_norm": 2.399803400039673, "learning_rate": 9.992336497358537e-06, "loss": 2.9773, "step": 338750 }, { "epoch": 0.3337720577224687, "grad_norm": 2.5427157878875732, "learning_rate": 9.992327479430567e-06, "loss": 2.959, "step": 338800 }, { "epoch": 0.33382131570029083, "grad_norm": 2.1830692291259766, "learning_rate": 9.992318456203927e-06, "loss": 3.0222, "step": 338850 }, { "epoch": 0.3338705736781129, "grad_norm": 2.279541015625, "learning_rate": 9.992309427678625e-06, "loss": 3.0038, "step": 338900 }, { "epoch": 0.333919831655935, "grad_norm": 2.3429744243621826, "learning_rate": 9.99230039385467e-06, "loss": 3.0199, "step": 338950 }, { "epoch": 0.3339690896337571, "grad_norm": 2.258209705352783, "learning_rate": 9.992291354732072e-06, "loss": 2.9547, "step": 339000 }, { "epoch": 0.33401834761157917, "grad_norm": 2.2841849327087402, "learning_rate": 9.99228231031084e-06, "loss": 2.9849, "step": 339050 }, { "epoch": 0.33406760558940124, "grad_norm": 2.160092830657959, "learning_rate": 9.992273260590984e-06, "loss": 3.0307, "step": 339100 }, { "epoch": 0.33411686356722337, "grad_norm": 2.0771827697753906, "learning_rate": 9.992264205572515e-06, "loss": 3.0561, "step": 339150 }, { "epoch": 0.33416612154504544, "grad_norm": 2.414069414138794, "learning_rate": 9.992255145255442e-06, "loss": 2.9715, "step": 339200 }, { "epoch": 0.3342153795228675, "grad_norm": 2.228687047958374, "learning_rate": 9.992246079639772e-06, "loss": 2.9653, "step": 339250 }, { "epoch": 0.33426463750068963, "grad_norm": 2.508601188659668, "learning_rate": 9.992237008725517e-06, "loss": 2.9967, "step": 339300 }, { "epoch": 0.3343138954785117, "grad_norm": 2.3654191493988037, "learning_rate": 9.992227932512688e-06, "loss": 2.9933, "step": 339350 }, { "epoch": 0.3343631534563338, "grad_norm": 2.3782734870910645, "learning_rate": 9.992218851001293e-06, "loss": 3.0212, "step": 339400 }, { "epoch": 0.3344124114341559, "grad_norm": 2.282581090927124, "learning_rate": 9.992209764191341e-06, "loss": 3.0301, "step": 339450 }, { "epoch": 0.33446166941197797, "grad_norm": 2.1989800930023193, "learning_rate": 9.992200672082841e-06, "loss": 3.0548, "step": 339500 }, { "epoch": 0.33451092738980004, "grad_norm": 2.2526304721832275, "learning_rate": 9.992191574675806e-06, "loss": 2.9066, "step": 339550 }, { "epoch": 0.33456018536762216, "grad_norm": 2.2149620056152344, "learning_rate": 9.992182471970243e-06, "loss": 2.9809, "step": 339600 }, { "epoch": 0.33460944334544424, "grad_norm": 2.231394052505493, "learning_rate": 9.992173363966161e-06, "loss": 2.9904, "step": 339650 }, { "epoch": 0.3346587013232663, "grad_norm": 2.4639899730682373, "learning_rate": 9.992164250663574e-06, "loss": 2.9557, "step": 339700 }, { "epoch": 0.33470795930108843, "grad_norm": 2.4430172443389893, "learning_rate": 9.992155132062485e-06, "loss": 2.9861, "step": 339750 }, { "epoch": 0.3347572172789105, "grad_norm": 2.40785813331604, "learning_rate": 9.992146008162909e-06, "loss": 2.9839, "step": 339800 }, { "epoch": 0.33480647525673257, "grad_norm": 2.2311995029449463, "learning_rate": 9.992136878964853e-06, "loss": 2.9751, "step": 339850 }, { "epoch": 0.33485573323455464, "grad_norm": 2.3457515239715576, "learning_rate": 9.992127744468329e-06, "loss": 3.0134, "step": 339900 }, { "epoch": 0.33490499121237677, "grad_norm": 2.1657660007476807, "learning_rate": 9.992118604673343e-06, "loss": 3.028, "step": 339950 }, { "epoch": 0.33495424919019884, "grad_norm": 2.1902859210968018, "learning_rate": 9.99210945957991e-06, "loss": 3.0152, "step": 340000 }, { "epoch": 0.3350035071680209, "grad_norm": 2.5864670276641846, "learning_rate": 9.992100309188034e-06, "loss": 2.9547, "step": 340050 }, { "epoch": 0.33505276514584303, "grad_norm": 2.47171688079834, "learning_rate": 9.99209115349773e-06, "loss": 2.9517, "step": 340100 }, { "epoch": 0.3351020231236651, "grad_norm": 2.3506343364715576, "learning_rate": 9.992081992509003e-06, "loss": 3.0074, "step": 340150 }, { "epoch": 0.3351512811014872, "grad_norm": 2.48714280128479, "learning_rate": 9.992072826221865e-06, "loss": 3.0341, "step": 340200 }, { "epoch": 0.3352005390793093, "grad_norm": 2.6056833267211914, "learning_rate": 9.992063654636328e-06, "loss": 3.0036, "step": 340250 }, { "epoch": 0.33524979705713137, "grad_norm": 2.325521469116211, "learning_rate": 9.992054477752396e-06, "loss": 2.8992, "step": 340300 }, { "epoch": 0.33529905503495344, "grad_norm": 2.306194305419922, "learning_rate": 9.992045295570084e-06, "loss": 2.9489, "step": 340350 }, { "epoch": 0.33534831301277557, "grad_norm": 2.2836101055145264, "learning_rate": 9.992036108089399e-06, "loss": 2.9678, "step": 340400 }, { "epoch": 0.33539757099059764, "grad_norm": 2.2939021587371826, "learning_rate": 9.992026915310353e-06, "loss": 3.0101, "step": 340450 }, { "epoch": 0.3354468289684197, "grad_norm": 2.3505148887634277, "learning_rate": 9.992017717232953e-06, "loss": 2.9949, "step": 340500 }, { "epoch": 0.33549608694624183, "grad_norm": 2.2477991580963135, "learning_rate": 9.99200851385721e-06, "loss": 3.0493, "step": 340550 }, { "epoch": 0.3355453449240639, "grad_norm": 2.4496474266052246, "learning_rate": 9.991999305183136e-06, "loss": 2.9898, "step": 340600 }, { "epoch": 0.335594602901886, "grad_norm": 2.573664665222168, "learning_rate": 9.991990091210737e-06, "loss": 2.9632, "step": 340650 }, { "epoch": 0.3356438608797081, "grad_norm": 2.534057855606079, "learning_rate": 9.991980871940024e-06, "loss": 3.0137, "step": 340700 }, { "epoch": 0.33569311885753017, "grad_norm": 2.2583706378936768, "learning_rate": 9.991971647371008e-06, "loss": 2.9227, "step": 340750 }, { "epoch": 0.33574237683535224, "grad_norm": 2.1019530296325684, "learning_rate": 9.991962417503698e-06, "loss": 3.0158, "step": 340800 }, { "epoch": 0.33579163481317437, "grad_norm": 2.041778564453125, "learning_rate": 9.991953182338103e-06, "loss": 2.9953, "step": 340850 }, { "epoch": 0.33584089279099644, "grad_norm": 2.1527388095855713, "learning_rate": 9.991943941874235e-06, "loss": 2.9835, "step": 340900 }, { "epoch": 0.3358901507688185, "grad_norm": 2.728191614151001, "learning_rate": 9.9919346961121e-06, "loss": 3.038, "step": 340950 }, { "epoch": 0.33593940874664063, "grad_norm": 2.285217523574829, "learning_rate": 9.991925445051712e-06, "loss": 3.0423, "step": 341000 }, { "epoch": 0.3359886667244627, "grad_norm": 2.379448652267456, "learning_rate": 9.991916188693078e-06, "loss": 2.938, "step": 341050 }, { "epoch": 0.3360379247022848, "grad_norm": 2.3929026126861572, "learning_rate": 9.99190692703621e-06, "loss": 2.9996, "step": 341100 }, { "epoch": 0.33608718268010684, "grad_norm": 2.437197685241699, "learning_rate": 9.991897660081117e-06, "loss": 2.9674, "step": 341150 }, { "epoch": 0.33613644065792897, "grad_norm": 2.381213426589966, "learning_rate": 9.991888387827808e-06, "loss": 2.9947, "step": 341200 }, { "epoch": 0.33618569863575104, "grad_norm": 2.200547933578491, "learning_rate": 9.991879110276293e-06, "loss": 2.9466, "step": 341250 }, { "epoch": 0.3362349566135731, "grad_norm": 2.6876845359802246, "learning_rate": 9.991869827426583e-06, "loss": 2.9949, "step": 341300 }, { "epoch": 0.33628421459139524, "grad_norm": 2.347085952758789, "learning_rate": 9.991860539278685e-06, "loss": 2.9573, "step": 341350 }, { "epoch": 0.3363334725692173, "grad_norm": 2.265566349029541, "learning_rate": 9.991851245832613e-06, "loss": 2.9714, "step": 341400 }, { "epoch": 0.3363827305470394, "grad_norm": 2.13972544670105, "learning_rate": 9.991841947088372e-06, "loss": 2.896, "step": 341450 }, { "epoch": 0.3364319885248615, "grad_norm": 2.360980749130249, "learning_rate": 9.991832643045978e-06, "loss": 2.9817, "step": 341500 }, { "epoch": 0.3364812465026836, "grad_norm": 2.1735458374023438, "learning_rate": 9.991823333705437e-06, "loss": 2.907, "step": 341550 }, { "epoch": 0.33653050448050564, "grad_norm": 2.348052740097046, "learning_rate": 9.991814019066759e-06, "loss": 3.0406, "step": 341600 }, { "epoch": 0.33657976245832777, "grad_norm": 2.6593024730682373, "learning_rate": 9.991804699129953e-06, "loss": 3.0218, "step": 341650 }, { "epoch": 0.33662902043614984, "grad_norm": 2.3228237628936768, "learning_rate": 9.991795373895032e-06, "loss": 3.0031, "step": 341700 }, { "epoch": 0.3366782784139719, "grad_norm": 2.2855095863342285, "learning_rate": 9.991786043362003e-06, "loss": 2.9777, "step": 341750 }, { "epoch": 0.33672753639179404, "grad_norm": 2.4082634449005127, "learning_rate": 9.991776707530877e-06, "loss": 2.94, "step": 341800 }, { "epoch": 0.3367767943696161, "grad_norm": 2.3405725955963135, "learning_rate": 9.991767366401665e-06, "loss": 2.9253, "step": 341850 }, { "epoch": 0.3368260523474382, "grad_norm": 2.2966246604919434, "learning_rate": 9.991758019974375e-06, "loss": 3.0294, "step": 341900 }, { "epoch": 0.3368753103252603, "grad_norm": 2.2345314025878906, "learning_rate": 9.991748668249016e-06, "loss": 2.9859, "step": 341950 }, { "epoch": 0.3369245683030824, "grad_norm": 2.359309196472168, "learning_rate": 9.991739311225602e-06, "loss": 3.0152, "step": 342000 }, { "epoch": 0.33697382628090444, "grad_norm": 2.2795088291168213, "learning_rate": 9.99172994890414e-06, "loss": 3.0463, "step": 342050 }, { "epoch": 0.33702308425872657, "grad_norm": 2.2686636447906494, "learning_rate": 9.99172058128464e-06, "loss": 3.0019, "step": 342100 }, { "epoch": 0.33707234223654864, "grad_norm": 2.2810771465301514, "learning_rate": 9.991711208367113e-06, "loss": 2.9185, "step": 342150 }, { "epoch": 0.3371216002143707, "grad_norm": 2.1937739849090576, "learning_rate": 9.99170183015157e-06, "loss": 2.9691, "step": 342200 }, { "epoch": 0.33717085819219283, "grad_norm": 2.1052417755126953, "learning_rate": 9.991692446638016e-06, "loss": 2.9574, "step": 342250 }, { "epoch": 0.3372201161700149, "grad_norm": 2.460070848464966, "learning_rate": 9.991683057826466e-06, "loss": 2.984, "step": 342300 }, { "epoch": 0.337269374147837, "grad_norm": 2.656853675842285, "learning_rate": 9.991673663716929e-06, "loss": 2.9973, "step": 342350 }, { "epoch": 0.33731863212565905, "grad_norm": 2.3854920864105225, "learning_rate": 9.991664264309414e-06, "loss": 3.05, "step": 342400 }, { "epoch": 0.33736789010348117, "grad_norm": 2.235658884048462, "learning_rate": 9.991654859603931e-06, "loss": 2.9794, "step": 342450 }, { "epoch": 0.33741714808130324, "grad_norm": 2.260119676589966, "learning_rate": 9.99164544960049e-06, "loss": 3.028, "step": 342500 }, { "epoch": 0.3374664060591253, "grad_norm": 2.274001121520996, "learning_rate": 9.991636034299101e-06, "loss": 3.0098, "step": 342550 }, { "epoch": 0.33751566403694744, "grad_norm": 2.380709171295166, "learning_rate": 9.991626613699774e-06, "loss": 2.9799, "step": 342600 }, { "epoch": 0.3375649220147695, "grad_norm": 2.1756722927093506, "learning_rate": 9.991617187802521e-06, "loss": 3.0065, "step": 342650 }, { "epoch": 0.3376141799925916, "grad_norm": 2.6460258960723877, "learning_rate": 9.991607756607349e-06, "loss": 2.9689, "step": 342700 }, { "epoch": 0.3376634379704137, "grad_norm": 2.381195068359375, "learning_rate": 9.991598320114268e-06, "loss": 3.0193, "step": 342750 }, { "epoch": 0.3377126959482358, "grad_norm": 2.544562339782715, "learning_rate": 9.991588878323291e-06, "loss": 3.0257, "step": 342800 }, { "epoch": 0.33776195392605785, "grad_norm": 2.401815414428711, "learning_rate": 9.991579431234426e-06, "loss": 2.9748, "step": 342850 }, { "epoch": 0.33781121190387997, "grad_norm": 2.247227907180786, "learning_rate": 9.991569978847682e-06, "loss": 2.9835, "step": 342900 }, { "epoch": 0.33786046988170204, "grad_norm": 2.6301238536834717, "learning_rate": 9.991560521163072e-06, "loss": 3.0296, "step": 342950 }, { "epoch": 0.3379097278595241, "grad_norm": 2.393620729446411, "learning_rate": 9.991551058180603e-06, "loss": 2.9969, "step": 343000 }, { "epoch": 0.33795898583734624, "grad_norm": 2.2301530838012695, "learning_rate": 9.991541589900287e-06, "loss": 3.0286, "step": 343050 }, { "epoch": 0.3380082438151683, "grad_norm": 2.2680397033691406, "learning_rate": 9.991532116322133e-06, "loss": 2.9995, "step": 343100 }, { "epoch": 0.3380575017929904, "grad_norm": 2.36830472946167, "learning_rate": 9.991522637446152e-06, "loss": 2.9533, "step": 343150 }, { "epoch": 0.3381067597708125, "grad_norm": 2.1988606452941895, "learning_rate": 9.991513153272355e-06, "loss": 3.0933, "step": 343200 }, { "epoch": 0.3381560177486346, "grad_norm": 2.226529836654663, "learning_rate": 9.991503663800748e-06, "loss": 3.0916, "step": 343250 }, { "epoch": 0.33820527572645664, "grad_norm": 2.073350191116333, "learning_rate": 9.991494169031345e-06, "loss": 2.9887, "step": 343300 }, { "epoch": 0.33825453370427877, "grad_norm": 2.2130415439605713, "learning_rate": 9.991484668964154e-06, "loss": 2.972, "step": 343350 }, { "epoch": 0.33830379168210084, "grad_norm": 2.3737876415252686, "learning_rate": 9.991475163599185e-06, "loss": 2.9696, "step": 343400 }, { "epoch": 0.3383530496599229, "grad_norm": 2.4101626873016357, "learning_rate": 9.991465652936452e-06, "loss": 2.986, "step": 343450 }, { "epoch": 0.33840230763774504, "grad_norm": 2.3866686820983887, "learning_rate": 9.991456136975962e-06, "loss": 2.9786, "step": 343500 }, { "epoch": 0.3384515656155671, "grad_norm": 2.165193557739258, "learning_rate": 9.99144661571772e-06, "loss": 3.0427, "step": 343550 }, { "epoch": 0.3385008235933892, "grad_norm": 2.1200273036956787, "learning_rate": 9.991437089161746e-06, "loss": 2.9087, "step": 343600 }, { "epoch": 0.33855008157121125, "grad_norm": 2.2995500564575195, "learning_rate": 9.991427557308044e-06, "loss": 3.0072, "step": 343650 }, { "epoch": 0.3385993395490334, "grad_norm": 2.128190517425537, "learning_rate": 9.991418020156626e-06, "loss": 2.9251, "step": 343700 }, { "epoch": 0.33864859752685544, "grad_norm": 2.2819156646728516, "learning_rate": 9.991408477707501e-06, "loss": 2.9678, "step": 343750 }, { "epoch": 0.3386978555046775, "grad_norm": 2.254160165786743, "learning_rate": 9.99139892996068e-06, "loss": 2.9411, "step": 343800 }, { "epoch": 0.33874711348249964, "grad_norm": 2.727442741394043, "learning_rate": 9.991389376916174e-06, "loss": 3.0159, "step": 343850 }, { "epoch": 0.3387963714603217, "grad_norm": 2.2122743129730225, "learning_rate": 9.99137981857399e-06, "loss": 2.9543, "step": 343900 }, { "epoch": 0.3388456294381438, "grad_norm": 2.5268361568450928, "learning_rate": 9.99137025493414e-06, "loss": 2.9789, "step": 343950 }, { "epoch": 0.3388948874159659, "grad_norm": 2.298715829849243, "learning_rate": 9.991360685996635e-06, "loss": 2.9972, "step": 344000 }, { "epoch": 0.338944145393788, "grad_norm": 3.052751302719116, "learning_rate": 9.991351111761486e-06, "loss": 2.9869, "step": 344050 }, { "epoch": 0.33899340337161005, "grad_norm": 2.4139404296875, "learning_rate": 9.991341532228701e-06, "loss": 2.9462, "step": 344100 }, { "epoch": 0.3390426613494322, "grad_norm": 2.348548412322998, "learning_rate": 9.99133194739829e-06, "loss": 3.0253, "step": 344150 }, { "epoch": 0.33909191932725424, "grad_norm": 2.281170606613159, "learning_rate": 9.991322357270263e-06, "loss": 2.9611, "step": 344200 }, { "epoch": 0.3391411773050763, "grad_norm": 2.292881727218628, "learning_rate": 9.991312761844633e-06, "loss": 3.021, "step": 344250 }, { "epoch": 0.33919043528289844, "grad_norm": 2.2033727169036865, "learning_rate": 9.991303161121406e-06, "loss": 2.9998, "step": 344300 }, { "epoch": 0.3392396932607205, "grad_norm": 2.2920420169830322, "learning_rate": 9.991293555100597e-06, "loss": 2.9707, "step": 344350 }, { "epoch": 0.3392889512385426, "grad_norm": 2.1283485889434814, "learning_rate": 9.991283943782213e-06, "loss": 2.9976, "step": 344400 }, { "epoch": 0.3393382092163647, "grad_norm": 2.3616037368774414, "learning_rate": 9.991274327166263e-06, "loss": 3.0106, "step": 344450 }, { "epoch": 0.3393874671941868, "grad_norm": 2.487152338027954, "learning_rate": 9.991264705252761e-06, "loss": 2.9713, "step": 344500 }, { "epoch": 0.33943672517200885, "grad_norm": 2.0717523097991943, "learning_rate": 9.991255078041715e-06, "loss": 2.9369, "step": 344550 }, { "epoch": 0.33948598314983097, "grad_norm": 2.4102656841278076, "learning_rate": 9.991245445533137e-06, "loss": 3.0063, "step": 344600 }, { "epoch": 0.33953524112765304, "grad_norm": 2.2053208351135254, "learning_rate": 9.991235807727033e-06, "loss": 3.012, "step": 344650 }, { "epoch": 0.3395844991054751, "grad_norm": 2.2586183547973633, "learning_rate": 9.991226164623418e-06, "loss": 2.9085, "step": 344700 }, { "epoch": 0.33963375708329724, "grad_norm": 2.250734567642212, "learning_rate": 9.991216516222299e-06, "loss": 3.0182, "step": 344750 }, { "epoch": 0.3396830150611193, "grad_norm": 2.422473430633545, "learning_rate": 9.991206862523687e-06, "loss": 3.0291, "step": 344800 }, { "epoch": 0.3397322730389414, "grad_norm": 2.2623679637908936, "learning_rate": 9.991197203527594e-06, "loss": 2.9889, "step": 344850 }, { "epoch": 0.33978153101676345, "grad_norm": 2.159167528152466, "learning_rate": 9.99118753923403e-06, "loss": 3.0104, "step": 344900 }, { "epoch": 0.3398307889945856, "grad_norm": 2.2729225158691406, "learning_rate": 9.991177869643002e-06, "loss": 2.9349, "step": 344950 }, { "epoch": 0.33988004697240765, "grad_norm": 2.531769037246704, "learning_rate": 9.991168194754522e-06, "loss": 3.0387, "step": 345000 }, { "epoch": 0.3399293049502297, "grad_norm": 2.2669620513916016, "learning_rate": 9.991158514568604e-06, "loss": 2.9609, "step": 345050 }, { "epoch": 0.33997856292805184, "grad_norm": 2.2080652713775635, "learning_rate": 9.991148829085252e-06, "loss": 2.8926, "step": 345100 }, { "epoch": 0.3400278209058739, "grad_norm": 2.1584978103637695, "learning_rate": 9.99113913830448e-06, "loss": 2.9824, "step": 345150 }, { "epoch": 0.340077078883696, "grad_norm": 2.4344892501831055, "learning_rate": 9.991129442226299e-06, "loss": 2.924, "step": 345200 }, { "epoch": 0.3401263368615181, "grad_norm": 2.0717360973358154, "learning_rate": 9.991119740850717e-06, "loss": 2.9767, "step": 345250 }, { "epoch": 0.3401755948393402, "grad_norm": 2.165419340133667, "learning_rate": 9.991110034177744e-06, "loss": 3.0201, "step": 345300 }, { "epoch": 0.34022485281716225, "grad_norm": 2.2159440517425537, "learning_rate": 9.991100322207393e-06, "loss": 2.9541, "step": 345350 }, { "epoch": 0.3402741107949844, "grad_norm": 2.2055206298828125, "learning_rate": 9.991090604939673e-06, "loss": 2.9292, "step": 345400 }, { "epoch": 0.34032336877280644, "grad_norm": 2.1135871410369873, "learning_rate": 9.991080882374594e-06, "loss": 2.9486, "step": 345450 }, { "epoch": 0.3403726267506285, "grad_norm": 2.3916714191436768, "learning_rate": 9.991071154512165e-06, "loss": 2.9154, "step": 345500 }, { "epoch": 0.34042188472845064, "grad_norm": 2.391805410385132, "learning_rate": 9.9910614213524e-06, "loss": 3.0383, "step": 345550 }, { "epoch": 0.3404711427062727, "grad_norm": 2.3449625968933105, "learning_rate": 9.991051682895303e-06, "loss": 3.0472, "step": 345600 }, { "epoch": 0.3405204006840948, "grad_norm": 2.4201056957244873, "learning_rate": 9.991041939140892e-06, "loss": 2.9282, "step": 345650 }, { "epoch": 0.3405696586619169, "grad_norm": 2.3704781532287598, "learning_rate": 9.991032190089173e-06, "loss": 2.9662, "step": 345700 }, { "epoch": 0.340618916639739, "grad_norm": 2.3098537921905518, "learning_rate": 9.991022435740158e-06, "loss": 2.9859, "step": 345750 }, { "epoch": 0.34066817461756105, "grad_norm": 2.3515007495880127, "learning_rate": 9.991012676093854e-06, "loss": 2.9755, "step": 345800 }, { "epoch": 0.3407174325953832, "grad_norm": 2.5803990364074707, "learning_rate": 9.991002911150277e-06, "loss": 2.9669, "step": 345850 }, { "epoch": 0.34076669057320524, "grad_norm": 2.27885365486145, "learning_rate": 9.990993140909432e-06, "loss": 2.9829, "step": 345900 }, { "epoch": 0.3408159485510273, "grad_norm": 2.536454677581787, "learning_rate": 9.990983365371331e-06, "loss": 3.0492, "step": 345950 }, { "epoch": 0.3408652065288494, "grad_norm": 2.3045566082000732, "learning_rate": 9.990973584535988e-06, "loss": 2.9135, "step": 346000 }, { "epoch": 0.3409144645066715, "grad_norm": 2.2855710983276367, "learning_rate": 9.990963798403407e-06, "loss": 2.9932, "step": 346050 }, { "epoch": 0.3409637224844936, "grad_norm": 2.2748939990997314, "learning_rate": 9.990954006973602e-06, "loss": 3.0366, "step": 346100 }, { "epoch": 0.34101298046231565, "grad_norm": 2.356858968734741, "learning_rate": 9.990944210246584e-06, "loss": 2.9417, "step": 346150 }, { "epoch": 0.3410622384401378, "grad_norm": 2.4118850231170654, "learning_rate": 9.990934408222363e-06, "loss": 2.9645, "step": 346200 }, { "epoch": 0.34111149641795985, "grad_norm": 2.181513786315918, "learning_rate": 9.990924600900948e-06, "loss": 2.9969, "step": 346250 }, { "epoch": 0.3411607543957819, "grad_norm": 2.583486318588257, "learning_rate": 9.99091478828235e-06, "loss": 2.9521, "step": 346300 }, { "epoch": 0.34121001237360404, "grad_norm": 2.0625128746032715, "learning_rate": 9.990904970366581e-06, "loss": 2.9324, "step": 346350 }, { "epoch": 0.3412592703514261, "grad_norm": 2.168313980102539, "learning_rate": 9.99089514715365e-06, "loss": 2.9682, "step": 346400 }, { "epoch": 0.3413085283292482, "grad_norm": 2.3183281421661377, "learning_rate": 9.990885318643568e-06, "loss": 2.9616, "step": 346450 }, { "epoch": 0.3413577863070703, "grad_norm": 2.278223991394043, "learning_rate": 9.990875484836342e-06, "loss": 2.9376, "step": 346500 }, { "epoch": 0.3414070442848924, "grad_norm": 2.4263968467712402, "learning_rate": 9.990865645731988e-06, "loss": 2.9985, "step": 346550 }, { "epoch": 0.34145630226271445, "grad_norm": 2.1062047481536865, "learning_rate": 9.990855801330514e-06, "loss": 2.9926, "step": 346600 }, { "epoch": 0.3415055602405366, "grad_norm": 2.567962169647217, "learning_rate": 9.99084595163193e-06, "loss": 2.917, "step": 346650 }, { "epoch": 0.34155481821835865, "grad_norm": 2.2755391597747803, "learning_rate": 9.990836096636247e-06, "loss": 2.9384, "step": 346700 }, { "epoch": 0.3416040761961807, "grad_norm": 2.205108404159546, "learning_rate": 9.990826236343474e-06, "loss": 2.9385, "step": 346750 }, { "epoch": 0.34165333417400284, "grad_norm": 2.5039007663726807, "learning_rate": 9.990816370753622e-06, "loss": 2.975, "step": 346800 }, { "epoch": 0.3417025921518249, "grad_norm": 2.2418606281280518, "learning_rate": 9.990806499866705e-06, "loss": 2.9599, "step": 346850 }, { "epoch": 0.341751850129647, "grad_norm": 2.300130605697632, "learning_rate": 9.990796623682729e-06, "loss": 2.9688, "step": 346900 }, { "epoch": 0.3418011081074691, "grad_norm": 2.713865041732788, "learning_rate": 9.990786742201706e-06, "loss": 2.9462, "step": 346950 }, { "epoch": 0.3418503660852912, "grad_norm": 2.27046275138855, "learning_rate": 9.990776855423646e-06, "loss": 2.9429, "step": 347000 }, { "epoch": 0.34189962406311325, "grad_norm": 2.3259341716766357, "learning_rate": 9.990766963348562e-06, "loss": 2.996, "step": 347050 }, { "epoch": 0.3419488820409354, "grad_norm": 2.387178897857666, "learning_rate": 9.990757065976462e-06, "loss": 2.9538, "step": 347100 }, { "epoch": 0.34199814001875745, "grad_norm": 2.416694164276123, "learning_rate": 9.990747163307355e-06, "loss": 2.9741, "step": 347150 }, { "epoch": 0.3420473979965795, "grad_norm": 2.373018980026245, "learning_rate": 9.990737255341257e-06, "loss": 2.969, "step": 347200 }, { "epoch": 0.3420966559744016, "grad_norm": 2.6071646213531494, "learning_rate": 9.990727342078174e-06, "loss": 2.9791, "step": 347250 }, { "epoch": 0.3421459139522237, "grad_norm": 2.2662038803100586, "learning_rate": 9.990717423518116e-06, "loss": 2.9652, "step": 347300 }, { "epoch": 0.3421951719300458, "grad_norm": 2.372051954269409, "learning_rate": 9.990707499661097e-06, "loss": 2.9571, "step": 347350 }, { "epoch": 0.34224442990786785, "grad_norm": 2.258258104324341, "learning_rate": 9.990697570507125e-06, "loss": 2.9576, "step": 347400 }, { "epoch": 0.34229368788569, "grad_norm": 2.3307583332061768, "learning_rate": 9.99068763605621e-06, "loss": 2.9759, "step": 347450 }, { "epoch": 0.34234294586351205, "grad_norm": 2.2210052013397217, "learning_rate": 9.990677696308365e-06, "loss": 2.9892, "step": 347500 }, { "epoch": 0.3423922038413341, "grad_norm": 2.3302032947540283, "learning_rate": 9.990667751263601e-06, "loss": 2.9552, "step": 347550 }, { "epoch": 0.34244146181915625, "grad_norm": 2.4955410957336426, "learning_rate": 9.990657800921925e-06, "loss": 3.0297, "step": 347600 }, { "epoch": 0.3424907197969783, "grad_norm": 2.330674409866333, "learning_rate": 9.990647845283349e-06, "loss": 2.9302, "step": 347650 }, { "epoch": 0.3425399777748004, "grad_norm": 2.3072350025177, "learning_rate": 9.990637884347886e-06, "loss": 2.9392, "step": 347700 }, { "epoch": 0.3425892357526225, "grad_norm": 2.4104738235473633, "learning_rate": 9.990627918115543e-06, "loss": 2.9468, "step": 347750 }, { "epoch": 0.3426384937304446, "grad_norm": 2.2761361598968506, "learning_rate": 9.990617946586333e-06, "loss": 2.9145, "step": 347800 }, { "epoch": 0.34268775170826665, "grad_norm": 2.0653271675109863, "learning_rate": 9.990607969760266e-06, "loss": 3.0178, "step": 347850 }, { "epoch": 0.3427370096860888, "grad_norm": 2.2013192176818848, "learning_rate": 9.990597987637351e-06, "loss": 2.9825, "step": 347900 }, { "epoch": 0.34278626766391085, "grad_norm": 2.375685691833496, "learning_rate": 9.9905880002176e-06, "loss": 2.9935, "step": 347950 }, { "epoch": 0.3428355256417329, "grad_norm": 2.183912515640259, "learning_rate": 9.990578007501026e-06, "loss": 2.9071, "step": 348000 }, { "epoch": 0.34288478361955504, "grad_norm": 2.5142245292663574, "learning_rate": 9.990568009487634e-06, "loss": 2.9355, "step": 348050 }, { "epoch": 0.3429340415973771, "grad_norm": 2.176879405975342, "learning_rate": 9.990558006177439e-06, "loss": 3.0129, "step": 348100 }, { "epoch": 0.3429832995751992, "grad_norm": 2.2777137756347656, "learning_rate": 9.99054799757045e-06, "loss": 2.9364, "step": 348150 }, { "epoch": 0.3430325575530213, "grad_norm": 2.2690072059631348, "learning_rate": 9.99053798366668e-06, "loss": 2.922, "step": 348200 }, { "epoch": 0.3430818155308434, "grad_norm": 2.2960798740386963, "learning_rate": 9.990527964466136e-06, "loss": 2.9948, "step": 348250 }, { "epoch": 0.34313107350866545, "grad_norm": 2.2886600494384766, "learning_rate": 9.99051793996883e-06, "loss": 2.9709, "step": 348300 }, { "epoch": 0.3431803314864876, "grad_norm": 2.4524452686309814, "learning_rate": 9.990507910174774e-06, "loss": 3.0488, "step": 348350 }, { "epoch": 0.34322958946430965, "grad_norm": 2.2020339965820312, "learning_rate": 9.990497875083977e-06, "loss": 2.9439, "step": 348400 }, { "epoch": 0.3432788474421317, "grad_norm": 2.388333797454834, "learning_rate": 9.990487834696449e-06, "loss": 2.9929, "step": 348450 }, { "epoch": 0.3433281054199538, "grad_norm": 2.8616886138916016, "learning_rate": 9.990477789012204e-06, "loss": 2.9613, "step": 348500 }, { "epoch": 0.3433773633977759, "grad_norm": 2.3279268741607666, "learning_rate": 9.990467738031249e-06, "loss": 2.9514, "step": 348550 }, { "epoch": 0.343426621375598, "grad_norm": 2.2998714447021484, "learning_rate": 9.990457681753598e-06, "loss": 2.947, "step": 348600 }, { "epoch": 0.34347587935342005, "grad_norm": 2.3863558769226074, "learning_rate": 9.990447620179259e-06, "loss": 2.9124, "step": 348650 }, { "epoch": 0.3435251373312422, "grad_norm": 2.3414969444274902, "learning_rate": 9.990437553308241e-06, "loss": 2.9197, "step": 348700 }, { "epoch": 0.34357439530906425, "grad_norm": 2.135282039642334, "learning_rate": 9.990427481140561e-06, "loss": 2.947, "step": 348750 }, { "epoch": 0.3436236532868863, "grad_norm": 2.227498769760132, "learning_rate": 9.990417403676224e-06, "loss": 2.9427, "step": 348800 }, { "epoch": 0.34367291126470845, "grad_norm": 2.262646436691284, "learning_rate": 9.990407320915243e-06, "loss": 2.9491, "step": 348850 }, { "epoch": 0.3437221692425305, "grad_norm": 2.4980483055114746, "learning_rate": 9.990397232857628e-06, "loss": 2.9484, "step": 348900 }, { "epoch": 0.3437714272203526, "grad_norm": 2.377901315689087, "learning_rate": 9.99038713950339e-06, "loss": 2.9863, "step": 348950 }, { "epoch": 0.3438206851981747, "grad_norm": 2.206224203109741, "learning_rate": 9.99037704085254e-06, "loss": 2.971, "step": 349000 }, { "epoch": 0.3438699431759968, "grad_norm": 2.311054229736328, "learning_rate": 9.99036693690509e-06, "loss": 2.9825, "step": 349050 }, { "epoch": 0.34391920115381885, "grad_norm": 2.1996545791625977, "learning_rate": 9.990356827661046e-06, "loss": 2.9799, "step": 349100 }, { "epoch": 0.343968459131641, "grad_norm": 2.248378276824951, "learning_rate": 9.990346713120425e-06, "loss": 3.0003, "step": 349150 }, { "epoch": 0.34401771710946305, "grad_norm": 2.123173475265503, "learning_rate": 9.990336593283233e-06, "loss": 2.9852, "step": 349200 }, { "epoch": 0.3440669750872851, "grad_norm": 2.492102861404419, "learning_rate": 9.990326468149483e-06, "loss": 3.0217, "step": 349250 }, { "epoch": 0.34411623306510725, "grad_norm": 2.264329671859741, "learning_rate": 9.990316337719184e-06, "loss": 2.999, "step": 349300 }, { "epoch": 0.3441654910429293, "grad_norm": 2.2310173511505127, "learning_rate": 9.990306201992348e-06, "loss": 2.9608, "step": 349350 }, { "epoch": 0.3442147490207514, "grad_norm": 2.173473596572876, "learning_rate": 9.990296060968988e-06, "loss": 3.0081, "step": 349400 }, { "epoch": 0.3442640069985735, "grad_norm": 2.2602334022521973, "learning_rate": 9.99028591464911e-06, "loss": 2.9541, "step": 349450 }, { "epoch": 0.3443132649763956, "grad_norm": 2.287379741668701, "learning_rate": 9.990275763032727e-06, "loss": 2.9274, "step": 349500 }, { "epoch": 0.34436252295421765, "grad_norm": 2.1116716861724854, "learning_rate": 9.99026560611985e-06, "loss": 2.9572, "step": 349550 }, { "epoch": 0.3444117809320398, "grad_norm": 2.2709805965423584, "learning_rate": 9.990255443910493e-06, "loss": 2.9859, "step": 349600 }, { "epoch": 0.34446103890986185, "grad_norm": 2.3558762073516846, "learning_rate": 9.99024527640466e-06, "loss": 2.9577, "step": 349650 }, { "epoch": 0.3445102968876839, "grad_norm": 2.6220345497131348, "learning_rate": 9.990235103602367e-06, "loss": 3.0072, "step": 349700 }, { "epoch": 0.344559554865506, "grad_norm": 2.1461992263793945, "learning_rate": 9.990224925503623e-06, "loss": 2.8849, "step": 349750 }, { "epoch": 0.3446088128433281, "grad_norm": 2.4202895164489746, "learning_rate": 9.990214742108438e-06, "loss": 3.01, "step": 349800 }, { "epoch": 0.3446580708211502, "grad_norm": 2.3817427158355713, "learning_rate": 9.990204553416824e-06, "loss": 2.9602, "step": 349850 }, { "epoch": 0.34470732879897226, "grad_norm": 2.3389711380004883, "learning_rate": 9.990194359428792e-06, "loss": 2.9574, "step": 349900 }, { "epoch": 0.3447565867767944, "grad_norm": 2.449887990951538, "learning_rate": 9.990184160144351e-06, "loss": 2.8807, "step": 349950 }, { "epoch": 0.34480584475461645, "grad_norm": 2.046319007873535, "learning_rate": 9.990173955563515e-06, "loss": 2.9828, "step": 350000 }, { "epoch": 0.3448551027324385, "grad_norm": 2.2042977809906006, "learning_rate": 9.990163745686292e-06, "loss": 2.9986, "step": 350050 }, { "epoch": 0.34490436071026065, "grad_norm": 2.2430944442749023, "learning_rate": 9.990153530512694e-06, "loss": 3.0311, "step": 350100 }, { "epoch": 0.3449536186880827, "grad_norm": 2.3518428802490234, "learning_rate": 9.990143310042731e-06, "loss": 2.9379, "step": 350150 }, { "epoch": 0.3450028766659048, "grad_norm": 2.335559606552124, "learning_rate": 9.990133084276415e-06, "loss": 2.9948, "step": 350200 }, { "epoch": 0.3450521346437269, "grad_norm": 2.346264600753784, "learning_rate": 9.990122853213758e-06, "loss": 2.9984, "step": 350250 }, { "epoch": 0.345101392621549, "grad_norm": 2.236358880996704, "learning_rate": 9.990112616854767e-06, "loss": 2.9606, "step": 350300 }, { "epoch": 0.34515065059937106, "grad_norm": 2.4653396606445312, "learning_rate": 9.990102375199456e-06, "loss": 2.9754, "step": 350350 }, { "epoch": 0.3451999085771932, "grad_norm": 2.4637696743011475, "learning_rate": 9.990092128247836e-06, "loss": 3.0131, "step": 350400 }, { "epoch": 0.34524916655501525, "grad_norm": 2.2879128456115723, "learning_rate": 9.990081875999915e-06, "loss": 2.9637, "step": 350450 }, { "epoch": 0.3452984245328373, "grad_norm": 2.128631114959717, "learning_rate": 9.990071618455708e-06, "loss": 2.994, "step": 350500 }, { "epoch": 0.34534768251065945, "grad_norm": 2.239274501800537, "learning_rate": 9.99006135561522e-06, "loss": 3.0033, "step": 350550 }, { "epoch": 0.3453969404884815, "grad_norm": 2.1282689571380615, "learning_rate": 9.990051087478468e-06, "loss": 3.0316, "step": 350600 }, { "epoch": 0.3454461984663036, "grad_norm": 2.2384164333343506, "learning_rate": 9.99004081404546e-06, "loss": 2.9616, "step": 350650 }, { "epoch": 0.3454954564441257, "grad_norm": 2.3246188163757324, "learning_rate": 9.990030535316208e-06, "loss": 2.98, "step": 350700 }, { "epoch": 0.3455447144219478, "grad_norm": 2.3451755046844482, "learning_rate": 9.990020251290722e-06, "loss": 2.9336, "step": 350750 }, { "epoch": 0.34559397239976986, "grad_norm": 2.212925910949707, "learning_rate": 9.990009961969013e-06, "loss": 2.9396, "step": 350800 }, { "epoch": 0.345643230377592, "grad_norm": 2.2294952869415283, "learning_rate": 9.98999966735109e-06, "loss": 2.9371, "step": 350850 }, { "epoch": 0.34569248835541405, "grad_norm": 2.126357316970825, "learning_rate": 9.98998936743697e-06, "loss": 2.9553, "step": 350900 }, { "epoch": 0.3457417463332361, "grad_norm": 2.472942590713501, "learning_rate": 9.989979062226657e-06, "loss": 2.9545, "step": 350950 }, { "epoch": 0.3457910043110582, "grad_norm": 2.397287607192993, "learning_rate": 9.989968751720165e-06, "loss": 3.0573, "step": 351000 }, { "epoch": 0.3458402622888803, "grad_norm": 2.3889689445495605, "learning_rate": 9.989958435917506e-06, "loss": 2.9325, "step": 351050 }, { "epoch": 0.3458895202667024, "grad_norm": 2.3354578018188477, "learning_rate": 9.989948114818688e-06, "loss": 2.9935, "step": 351100 }, { "epoch": 0.34593877824452446, "grad_norm": 2.4032914638519287, "learning_rate": 9.989937788423726e-06, "loss": 3.0069, "step": 351150 }, { "epoch": 0.3459880362223466, "grad_norm": 2.4838826656341553, "learning_rate": 9.989927456732628e-06, "loss": 2.9375, "step": 351200 }, { "epoch": 0.34603729420016865, "grad_norm": 2.5062577724456787, "learning_rate": 9.989917119745404e-06, "loss": 2.9435, "step": 351250 }, { "epoch": 0.3460865521779907, "grad_norm": 3.2255961894989014, "learning_rate": 9.989906777462068e-06, "loss": 2.8883, "step": 351300 }, { "epoch": 0.34613581015581285, "grad_norm": 2.119917154312134, "learning_rate": 9.98989642988263e-06, "loss": 2.9855, "step": 351350 }, { "epoch": 0.3461850681336349, "grad_norm": 2.2643604278564453, "learning_rate": 9.9898860770071e-06, "loss": 3.0105, "step": 351400 }, { "epoch": 0.346234326111457, "grad_norm": 2.2403066158294678, "learning_rate": 9.989875718835489e-06, "loss": 2.9812, "step": 351450 }, { "epoch": 0.3462835840892791, "grad_norm": 2.1916775703430176, "learning_rate": 9.98986535536781e-06, "loss": 3.0031, "step": 351500 }, { "epoch": 0.3463328420671012, "grad_norm": 2.522244930267334, "learning_rate": 9.98985498660407e-06, "loss": 2.9959, "step": 351550 }, { "epoch": 0.34638210004492326, "grad_norm": 2.2052228450775146, "learning_rate": 9.989844612544284e-06, "loss": 2.9928, "step": 351600 }, { "epoch": 0.3464313580227454, "grad_norm": 2.498222827911377, "learning_rate": 9.989834233188462e-06, "loss": 2.9789, "step": 351650 }, { "epoch": 0.34648061600056745, "grad_norm": 2.411092758178711, "learning_rate": 9.989823848536615e-06, "loss": 2.9887, "step": 351700 }, { "epoch": 0.3465298739783895, "grad_norm": 2.2537975311279297, "learning_rate": 9.989813458588753e-06, "loss": 2.9318, "step": 351750 }, { "epoch": 0.34657913195621165, "grad_norm": 2.305896282196045, "learning_rate": 9.989803063344887e-06, "loss": 3.0033, "step": 351800 }, { "epoch": 0.3466283899340337, "grad_norm": 2.3288862705230713, "learning_rate": 9.98979266280503e-06, "loss": 2.9588, "step": 351850 }, { "epoch": 0.3466776479118558, "grad_norm": 2.464510679244995, "learning_rate": 9.989782256969189e-06, "loss": 3.028, "step": 351900 }, { "epoch": 0.3467269058896779, "grad_norm": 2.442789316177368, "learning_rate": 9.98977184583738e-06, "loss": 3.0335, "step": 351950 }, { "epoch": 0.3467761638675, "grad_norm": 2.246234893798828, "learning_rate": 9.989761429409613e-06, "loss": 2.9772, "step": 352000 }, { "epoch": 0.34682542184532206, "grad_norm": 2.212339401245117, "learning_rate": 9.989751007685895e-06, "loss": 2.9783, "step": 352050 }, { "epoch": 0.3468746798231442, "grad_norm": 2.2476232051849365, "learning_rate": 9.989740580666242e-06, "loss": 2.9176, "step": 352100 }, { "epoch": 0.34692393780096625, "grad_norm": 2.1859371662139893, "learning_rate": 9.98973014835066e-06, "loss": 3.0212, "step": 352150 }, { "epoch": 0.3469731957787883, "grad_norm": 2.4295313358306885, "learning_rate": 9.989719710739167e-06, "loss": 2.9607, "step": 352200 }, { "epoch": 0.3470224537566104, "grad_norm": 2.2728710174560547, "learning_rate": 9.989709267831767e-06, "loss": 2.9783, "step": 352250 }, { "epoch": 0.3470717117344325, "grad_norm": 2.8162448406219482, "learning_rate": 9.989698819628477e-06, "loss": 2.9824, "step": 352300 }, { "epoch": 0.3471209697122546, "grad_norm": 2.3433547019958496, "learning_rate": 9.989688366129303e-06, "loss": 2.9871, "step": 352350 }, { "epoch": 0.34717022769007666, "grad_norm": 2.3773014545440674, "learning_rate": 9.98967790733426e-06, "loss": 2.9912, "step": 352400 }, { "epoch": 0.3472194856678988, "grad_norm": 2.2639942169189453, "learning_rate": 9.989667443243356e-06, "loss": 2.9114, "step": 352450 }, { "epoch": 0.34726874364572086, "grad_norm": 2.3164124488830566, "learning_rate": 9.989656973856603e-06, "loss": 2.941, "step": 352500 }, { "epoch": 0.3473180016235429, "grad_norm": 2.096545696258545, "learning_rate": 9.989646499174015e-06, "loss": 2.9609, "step": 352550 }, { "epoch": 0.34736725960136505, "grad_norm": 2.305152177810669, "learning_rate": 9.9896360191956e-06, "loss": 2.9586, "step": 352600 }, { "epoch": 0.3474165175791871, "grad_norm": 2.227459669113159, "learning_rate": 9.98962553392137e-06, "loss": 2.9828, "step": 352650 }, { "epoch": 0.3474657755570092, "grad_norm": 2.441070556640625, "learning_rate": 9.989615043351335e-06, "loss": 2.9343, "step": 352700 }, { "epoch": 0.3475150335348313, "grad_norm": 2.367798089981079, "learning_rate": 9.989604547485508e-06, "loss": 3.0377, "step": 352750 }, { "epoch": 0.3475642915126534, "grad_norm": 2.2145838737487793, "learning_rate": 9.9895940463239e-06, "loss": 2.9151, "step": 352800 }, { "epoch": 0.34761354949047546, "grad_norm": 2.217722177505493, "learning_rate": 9.98958353986652e-06, "loss": 2.9814, "step": 352850 }, { "epoch": 0.3476628074682976, "grad_norm": 2.535430669784546, "learning_rate": 9.989573028113382e-06, "loss": 3.0156, "step": 352900 }, { "epoch": 0.34771206544611966, "grad_norm": 2.456815719604492, "learning_rate": 9.989562511064496e-06, "loss": 3.0341, "step": 352950 }, { "epoch": 0.3477613234239417, "grad_norm": 2.612229585647583, "learning_rate": 9.989551988719873e-06, "loss": 2.9221, "step": 353000 }, { "epoch": 0.34781058140176385, "grad_norm": 2.1536998748779297, "learning_rate": 9.989541461079522e-06, "loss": 3.0046, "step": 353050 }, { "epoch": 0.3478598393795859, "grad_norm": 2.5180563926696777, "learning_rate": 9.989530928143457e-06, "loss": 2.9562, "step": 353100 }, { "epoch": 0.347909097357408, "grad_norm": 2.20945405960083, "learning_rate": 9.989520389911691e-06, "loss": 3.0499, "step": 353150 }, { "epoch": 0.3479583553352301, "grad_norm": 2.201202392578125, "learning_rate": 9.989509846384231e-06, "loss": 2.9096, "step": 353200 }, { "epoch": 0.3480076133130522, "grad_norm": 2.3982272148132324, "learning_rate": 9.989499297561089e-06, "loss": 2.9474, "step": 353250 }, { "epoch": 0.34805687129087426, "grad_norm": 2.297053098678589, "learning_rate": 9.989488743442277e-06, "loss": 2.9797, "step": 353300 }, { "epoch": 0.3481061292686964, "grad_norm": 2.201094627380371, "learning_rate": 9.98947818402781e-06, "loss": 2.9741, "step": 353350 }, { "epoch": 0.34815538724651846, "grad_norm": 2.219196081161499, "learning_rate": 9.989467619317692e-06, "loss": 2.9874, "step": 353400 }, { "epoch": 0.3482046452243405, "grad_norm": 2.271169424057007, "learning_rate": 9.989457049311938e-06, "loss": 2.9629, "step": 353450 }, { "epoch": 0.3482539032021626, "grad_norm": 2.208867073059082, "learning_rate": 9.98944647401056e-06, "loss": 2.9172, "step": 353500 }, { "epoch": 0.3483031611799847, "grad_norm": 2.2258501052856445, "learning_rate": 9.989435893413565e-06, "loss": 2.9294, "step": 353550 }, { "epoch": 0.3483524191578068, "grad_norm": 2.352414608001709, "learning_rate": 9.989425307520971e-06, "loss": 3.03, "step": 353600 }, { "epoch": 0.34840167713562886, "grad_norm": 2.137986660003662, "learning_rate": 9.989414716332783e-06, "loss": 2.9929, "step": 353650 }, { "epoch": 0.348450935113451, "grad_norm": 2.227226495742798, "learning_rate": 9.989404119849018e-06, "loss": 3.0565, "step": 353700 }, { "epoch": 0.34850019309127306, "grad_norm": 2.1841230392456055, "learning_rate": 9.989393518069683e-06, "loss": 2.9638, "step": 353750 }, { "epoch": 0.34854945106909513, "grad_norm": 2.903904676437378, "learning_rate": 9.989382910994789e-06, "loss": 3.0011, "step": 353800 }, { "epoch": 0.34859870904691725, "grad_norm": 2.297473907470703, "learning_rate": 9.989372298624349e-06, "loss": 2.902, "step": 353850 }, { "epoch": 0.3486479670247393, "grad_norm": 2.259995460510254, "learning_rate": 9.989361680958373e-06, "loss": 2.9633, "step": 353900 }, { "epoch": 0.3486972250025614, "grad_norm": 2.30344557762146, "learning_rate": 9.989351057996875e-06, "loss": 2.9547, "step": 353950 }, { "epoch": 0.3487464829803835, "grad_norm": 2.433990716934204, "learning_rate": 9.989340429739864e-06, "loss": 2.8804, "step": 354000 }, { "epoch": 0.3487957409582056, "grad_norm": 2.1678948402404785, "learning_rate": 9.98932979618735e-06, "loss": 2.967, "step": 354050 }, { "epoch": 0.34884499893602766, "grad_norm": 2.4016711711883545, "learning_rate": 9.989319157339348e-06, "loss": 2.944, "step": 354100 }, { "epoch": 0.3488942569138498, "grad_norm": 2.3355093002319336, "learning_rate": 9.989308513195866e-06, "loss": 3.0276, "step": 354150 }, { "epoch": 0.34894351489167186, "grad_norm": 2.378403425216675, "learning_rate": 9.989297863756916e-06, "loss": 2.9824, "step": 354200 }, { "epoch": 0.34899277286949393, "grad_norm": 2.3439412117004395, "learning_rate": 9.989287209022512e-06, "loss": 2.9846, "step": 354250 }, { "epoch": 0.34904203084731605, "grad_norm": 2.409548282623291, "learning_rate": 9.989276548992662e-06, "loss": 2.9595, "step": 354300 }, { "epoch": 0.3490912888251381, "grad_norm": 2.2143380641937256, "learning_rate": 9.989265883667378e-06, "loss": 2.9634, "step": 354350 }, { "epoch": 0.3491405468029602, "grad_norm": 2.348940849304199, "learning_rate": 9.989255213046673e-06, "loss": 2.9346, "step": 354400 }, { "epoch": 0.3491898047807823, "grad_norm": 2.1382863521575928, "learning_rate": 9.989244537130558e-06, "loss": 2.9854, "step": 354450 }, { "epoch": 0.3492390627586044, "grad_norm": 2.2843260765075684, "learning_rate": 9.98923385591904e-06, "loss": 3.0107, "step": 354500 }, { "epoch": 0.34928832073642646, "grad_norm": 2.2672765254974365, "learning_rate": 9.989223169412136e-06, "loss": 2.9257, "step": 354550 }, { "epoch": 0.3493375787142486, "grad_norm": 2.379582166671753, "learning_rate": 9.989212477609855e-06, "loss": 2.9586, "step": 354600 }, { "epoch": 0.34938683669207066, "grad_norm": 2.3744139671325684, "learning_rate": 9.98920178051221e-06, "loss": 2.9649, "step": 354650 }, { "epoch": 0.3494360946698927, "grad_norm": 2.415107250213623, "learning_rate": 9.989191078119209e-06, "loss": 2.929, "step": 354700 }, { "epoch": 0.3494853526477148, "grad_norm": 2.24250864982605, "learning_rate": 9.989180370430863e-06, "loss": 2.9899, "step": 354750 }, { "epoch": 0.3495346106255369, "grad_norm": 2.2148826122283936, "learning_rate": 9.98916965744719e-06, "loss": 2.9633, "step": 354800 }, { "epoch": 0.349583868603359, "grad_norm": 2.344635009765625, "learning_rate": 9.989158939168195e-06, "loss": 2.9389, "step": 354850 }, { "epoch": 0.34963312658118106, "grad_norm": 2.3568108081817627, "learning_rate": 9.98914821559389e-06, "loss": 2.9867, "step": 354900 }, { "epoch": 0.3496823845590032, "grad_norm": 2.244147300720215, "learning_rate": 9.989137486724288e-06, "loss": 3.0213, "step": 354950 }, { "epoch": 0.34973164253682526, "grad_norm": 2.2120232582092285, "learning_rate": 9.989126752559404e-06, "loss": 2.9431, "step": 355000 }, { "epoch": 0.34978090051464733, "grad_norm": 2.498013973236084, "learning_rate": 9.98911601309924e-06, "loss": 2.9326, "step": 355050 }, { "epoch": 0.34983015849246946, "grad_norm": 2.6632590293884277, "learning_rate": 9.989105268343816e-06, "loss": 2.9297, "step": 355100 }, { "epoch": 0.3498794164702915, "grad_norm": 2.4480175971984863, "learning_rate": 9.989094518293139e-06, "loss": 2.9617, "step": 355150 }, { "epoch": 0.3499286744481136, "grad_norm": 2.4922454357147217, "learning_rate": 9.989083762947223e-06, "loss": 2.969, "step": 355200 }, { "epoch": 0.3499779324259357, "grad_norm": 2.4732539653778076, "learning_rate": 9.989073002306078e-06, "loss": 2.9374, "step": 355250 }, { "epoch": 0.3500271904037578, "grad_norm": 2.362722396850586, "learning_rate": 9.989062236369714e-06, "loss": 2.9908, "step": 355300 }, { "epoch": 0.35007644838157986, "grad_norm": 2.4490747451782227, "learning_rate": 9.989051465138145e-06, "loss": 2.9625, "step": 355350 }, { "epoch": 0.350125706359402, "grad_norm": 2.2461891174316406, "learning_rate": 9.989040688611382e-06, "loss": 2.9439, "step": 355400 }, { "epoch": 0.35017496433722406, "grad_norm": 2.2784056663513184, "learning_rate": 9.989029906789434e-06, "loss": 2.9879, "step": 355450 }, { "epoch": 0.35022422231504613, "grad_norm": 2.343109607696533, "learning_rate": 9.989019119672315e-06, "loss": 2.9904, "step": 355500 }, { "epoch": 0.35027348029286826, "grad_norm": 2.2368667125701904, "learning_rate": 9.989008327260036e-06, "loss": 2.9599, "step": 355550 }, { "epoch": 0.3503227382706903, "grad_norm": 2.131196975708008, "learning_rate": 9.988997529552609e-06, "loss": 2.9368, "step": 355600 }, { "epoch": 0.3503719962485124, "grad_norm": 2.273326873779297, "learning_rate": 9.988986726550044e-06, "loss": 2.9755, "step": 355650 }, { "epoch": 0.3504212542263345, "grad_norm": 2.1986782550811768, "learning_rate": 9.988975918252354e-06, "loss": 2.9604, "step": 355700 }, { "epoch": 0.3504705122041566, "grad_norm": 2.2057065963745117, "learning_rate": 9.988965104659548e-06, "loss": 2.8619, "step": 355750 }, { "epoch": 0.35051977018197866, "grad_norm": 2.3890700340270996, "learning_rate": 9.988954285771638e-06, "loss": 2.8533, "step": 355800 }, { "epoch": 0.3505690281598008, "grad_norm": 2.234123468399048, "learning_rate": 9.988943461588639e-06, "loss": 3.046, "step": 355850 }, { "epoch": 0.35061828613762286, "grad_norm": 2.371110200881958, "learning_rate": 9.988932632110559e-06, "loss": 2.9889, "step": 355900 }, { "epoch": 0.35066754411544493, "grad_norm": 2.7943058013916016, "learning_rate": 9.988921797337412e-06, "loss": 2.9495, "step": 355950 }, { "epoch": 0.350716802093267, "grad_norm": 2.357105016708374, "learning_rate": 9.988910957269206e-06, "loss": 2.9976, "step": 356000 }, { "epoch": 0.3507660600710891, "grad_norm": 2.2993555068969727, "learning_rate": 9.988900111905955e-06, "loss": 3.0133, "step": 356050 }, { "epoch": 0.3508153180489112, "grad_norm": 2.297830820083618, "learning_rate": 9.988889261247668e-06, "loss": 3.0257, "step": 356100 }, { "epoch": 0.35086457602673327, "grad_norm": 2.292912721633911, "learning_rate": 9.988878405294362e-06, "loss": 2.9236, "step": 356150 }, { "epoch": 0.3509138340045554, "grad_norm": 2.2850868701934814, "learning_rate": 9.988867544046043e-06, "loss": 3.0578, "step": 356200 }, { "epoch": 0.35096309198237746, "grad_norm": 2.3444504737854004, "learning_rate": 9.988856677502726e-06, "loss": 2.8876, "step": 356250 }, { "epoch": 0.35101234996019953, "grad_norm": 2.772386312484741, "learning_rate": 9.988845805664422e-06, "loss": 2.9507, "step": 356300 }, { "epoch": 0.35106160793802166, "grad_norm": 2.4222049713134766, "learning_rate": 9.988834928531138e-06, "loss": 2.9493, "step": 356350 }, { "epoch": 0.35111086591584373, "grad_norm": 2.3395941257476807, "learning_rate": 9.988824046102893e-06, "loss": 2.9923, "step": 356400 }, { "epoch": 0.3511601238936658, "grad_norm": 2.233515739440918, "learning_rate": 9.988813158379691e-06, "loss": 2.9678, "step": 356450 }, { "epoch": 0.3512093818714879, "grad_norm": 2.153210401535034, "learning_rate": 9.98880226536155e-06, "loss": 2.9764, "step": 356500 }, { "epoch": 0.35125863984931, "grad_norm": 2.2453973293304443, "learning_rate": 9.988791367048477e-06, "loss": 2.9744, "step": 356550 }, { "epoch": 0.35130789782713207, "grad_norm": 2.2487072944641113, "learning_rate": 9.98878046344049e-06, "loss": 2.954, "step": 356600 }, { "epoch": 0.3513571558049542, "grad_norm": 2.2866883277893066, "learning_rate": 9.988769554537592e-06, "loss": 2.9819, "step": 356650 }, { "epoch": 0.35140641378277626, "grad_norm": 2.303689956665039, "learning_rate": 9.988758640339798e-06, "loss": 3.0144, "step": 356700 }, { "epoch": 0.35145567176059833, "grad_norm": 2.1545472145080566, "learning_rate": 9.98874772084712e-06, "loss": 2.9374, "step": 356750 }, { "epoch": 0.35150492973842046, "grad_norm": 2.2671215534210205, "learning_rate": 9.988736796059573e-06, "loss": 2.9738, "step": 356800 }, { "epoch": 0.3515541877162425, "grad_norm": 2.3107852935791016, "learning_rate": 9.988725865977164e-06, "loss": 2.9539, "step": 356850 }, { "epoch": 0.3516034456940646, "grad_norm": 2.239840507507324, "learning_rate": 9.988714930599906e-06, "loss": 2.9699, "step": 356900 }, { "epoch": 0.3516527036718867, "grad_norm": 2.3705084323883057, "learning_rate": 9.988703989927808e-06, "loss": 3.0575, "step": 356950 }, { "epoch": 0.3517019616497088, "grad_norm": 2.132145881652832, "learning_rate": 9.988693043960889e-06, "loss": 2.9612, "step": 357000 }, { "epoch": 0.35175121962753086, "grad_norm": 2.151716947555542, "learning_rate": 9.98868209269915e-06, "loss": 3.0733, "step": 357050 }, { "epoch": 0.351800477605353, "grad_norm": 2.3170366287231445, "learning_rate": 9.988671136142612e-06, "loss": 3.0176, "step": 357100 }, { "epoch": 0.35184973558317506, "grad_norm": 2.314544677734375, "learning_rate": 9.988660174291285e-06, "loss": 2.9312, "step": 357150 }, { "epoch": 0.35189899356099713, "grad_norm": 2.4334261417388916, "learning_rate": 9.988649207145174e-06, "loss": 2.9695, "step": 357200 }, { "epoch": 0.3519482515388192, "grad_norm": 2.3540995121002197, "learning_rate": 9.988638234704299e-06, "loss": 2.9904, "step": 357250 }, { "epoch": 0.3519975095166413, "grad_norm": 2.376408100128174, "learning_rate": 9.988627256968664e-06, "loss": 3.0294, "step": 357300 }, { "epoch": 0.3520467674944634, "grad_norm": 2.3114094734191895, "learning_rate": 9.988616273938288e-06, "loss": 2.9612, "step": 357350 }, { "epoch": 0.35209602547228547, "grad_norm": 2.1653671264648438, "learning_rate": 9.988605285613179e-06, "loss": 2.965, "step": 357400 }, { "epoch": 0.3521452834501076, "grad_norm": 2.359914541244507, "learning_rate": 9.988594291993348e-06, "loss": 2.9944, "step": 357450 }, { "epoch": 0.35219454142792966, "grad_norm": 2.2301807403564453, "learning_rate": 9.988583293078809e-06, "loss": 3.0185, "step": 357500 }, { "epoch": 0.35224379940575173, "grad_norm": 2.339677333831787, "learning_rate": 9.98857228886957e-06, "loss": 2.9666, "step": 357550 }, { "epoch": 0.35229305738357386, "grad_norm": 2.388258457183838, "learning_rate": 9.988561279365647e-06, "loss": 2.9407, "step": 357600 }, { "epoch": 0.35234231536139593, "grad_norm": 2.3422458171844482, "learning_rate": 9.988550264567049e-06, "loss": 2.937, "step": 357650 }, { "epoch": 0.352391573339218, "grad_norm": 2.264251708984375, "learning_rate": 9.988539244473788e-06, "loss": 2.9296, "step": 357700 }, { "epoch": 0.3524408313170401, "grad_norm": 2.200772762298584, "learning_rate": 9.988528219085876e-06, "loss": 2.9293, "step": 357750 }, { "epoch": 0.3524900892948622, "grad_norm": 2.2455132007598877, "learning_rate": 9.988517188403325e-06, "loss": 2.9781, "step": 357800 }, { "epoch": 0.35253934727268427, "grad_norm": 2.0888431072235107, "learning_rate": 9.988506152426148e-06, "loss": 2.9484, "step": 357850 }, { "epoch": 0.3525886052505064, "grad_norm": 2.261477470397949, "learning_rate": 9.988495111154352e-06, "loss": 2.9471, "step": 357900 }, { "epoch": 0.35263786322832846, "grad_norm": 2.394035577774048, "learning_rate": 9.988484064587953e-06, "loss": 2.9385, "step": 357950 }, { "epoch": 0.35268712120615053, "grad_norm": 2.3250510692596436, "learning_rate": 9.988473012726964e-06, "loss": 2.9749, "step": 358000 }, { "epoch": 0.35273637918397266, "grad_norm": 2.2200255393981934, "learning_rate": 9.988461955571391e-06, "loss": 2.9754, "step": 358050 }, { "epoch": 0.35278563716179473, "grad_norm": 2.394873857498169, "learning_rate": 9.988450893121251e-06, "loss": 3.0553, "step": 358100 }, { "epoch": 0.3528348951396168, "grad_norm": 2.2738566398620605, "learning_rate": 9.988439825376552e-06, "loss": 3.0531, "step": 358150 }, { "epoch": 0.3528841531174389, "grad_norm": 2.412632703781128, "learning_rate": 9.988428752337309e-06, "loss": 2.9567, "step": 358200 }, { "epoch": 0.352933411095261, "grad_norm": 2.41582989692688, "learning_rate": 9.988417674003532e-06, "loss": 2.966, "step": 358250 }, { "epoch": 0.35298266907308307, "grad_norm": 2.322500705718994, "learning_rate": 9.988406590375234e-06, "loss": 2.9727, "step": 358300 }, { "epoch": 0.3530319270509052, "grad_norm": 2.422396659851074, "learning_rate": 9.988395501452424e-06, "loss": 2.9461, "step": 358350 }, { "epoch": 0.35308118502872726, "grad_norm": 2.250699996948242, "learning_rate": 9.988384407235117e-06, "loss": 2.9876, "step": 358400 }, { "epoch": 0.35313044300654933, "grad_norm": 2.2414398193359375, "learning_rate": 9.988373307723324e-06, "loss": 2.9498, "step": 358450 }, { "epoch": 0.3531797009843714, "grad_norm": 2.470309257507324, "learning_rate": 9.988362202917054e-06, "loss": 2.9702, "step": 358500 }, { "epoch": 0.35322895896219353, "grad_norm": 2.403179168701172, "learning_rate": 9.988351092816324e-06, "loss": 2.9003, "step": 358550 }, { "epoch": 0.3532782169400156, "grad_norm": 2.502044677734375, "learning_rate": 9.988339977421139e-06, "loss": 2.9964, "step": 358600 }, { "epoch": 0.35332747491783767, "grad_norm": 2.3590304851531982, "learning_rate": 9.988328856731517e-06, "loss": 2.9218, "step": 358650 }, { "epoch": 0.3533767328956598, "grad_norm": 2.409125328063965, "learning_rate": 9.988317730747467e-06, "loss": 2.9444, "step": 358700 }, { "epoch": 0.35342599087348187, "grad_norm": 2.5448644161224365, "learning_rate": 9.988306599469e-06, "loss": 2.984, "step": 358750 }, { "epoch": 0.35347524885130394, "grad_norm": 2.252824068069458, "learning_rate": 9.98829546289613e-06, "loss": 2.9029, "step": 358800 }, { "epoch": 0.35352450682912606, "grad_norm": 2.3183107376098633, "learning_rate": 9.988284321028868e-06, "loss": 2.9424, "step": 358850 }, { "epoch": 0.35357376480694813, "grad_norm": 2.322129964828491, "learning_rate": 9.988273173867224e-06, "loss": 2.9279, "step": 358900 }, { "epoch": 0.3536230227847702, "grad_norm": 2.255025863647461, "learning_rate": 9.988262021411212e-06, "loss": 2.9145, "step": 358950 }, { "epoch": 0.35367228076259233, "grad_norm": 2.1860194206237793, "learning_rate": 9.988250863660845e-06, "loss": 2.9387, "step": 359000 }, { "epoch": 0.3537215387404144, "grad_norm": 2.0853891372680664, "learning_rate": 9.988239700616132e-06, "loss": 2.9281, "step": 359050 }, { "epoch": 0.35377079671823647, "grad_norm": 2.384831428527832, "learning_rate": 9.988228532277087e-06, "loss": 2.9531, "step": 359100 }, { "epoch": 0.3538200546960586, "grad_norm": 2.2465662956237793, "learning_rate": 9.988217358643718e-06, "loss": 2.9637, "step": 359150 }, { "epoch": 0.35386931267388066, "grad_norm": 2.1904618740081787, "learning_rate": 9.988206179716043e-06, "loss": 2.9842, "step": 359200 }, { "epoch": 0.35391857065170274, "grad_norm": 2.2384650707244873, "learning_rate": 9.98819499549407e-06, "loss": 2.9332, "step": 359250 }, { "epoch": 0.35396782862952486, "grad_norm": 2.1186344623565674, "learning_rate": 9.98818380597781e-06, "loss": 2.9131, "step": 359300 }, { "epoch": 0.35401708660734693, "grad_norm": 2.356910228729248, "learning_rate": 9.988172611167274e-06, "loss": 3.0061, "step": 359350 }, { "epoch": 0.354066344585169, "grad_norm": 2.4279441833496094, "learning_rate": 9.988161411062479e-06, "loss": 2.938, "step": 359400 }, { "epoch": 0.3541156025629911, "grad_norm": 2.365097761154175, "learning_rate": 9.988150205663434e-06, "loss": 2.9729, "step": 359450 }, { "epoch": 0.3541648605408132, "grad_norm": 2.3526840209960938, "learning_rate": 9.98813899497015e-06, "loss": 2.9465, "step": 359500 }, { "epoch": 0.35421411851863527, "grad_norm": 2.284461498260498, "learning_rate": 9.988127778982643e-06, "loss": 3.0423, "step": 359550 }, { "epoch": 0.3542633764964574, "grad_norm": 2.309664487838745, "learning_rate": 9.988116557700918e-06, "loss": 3.0386, "step": 359600 }, { "epoch": 0.35431263447427946, "grad_norm": 2.3674206733703613, "learning_rate": 9.988105331124993e-06, "loss": 2.9665, "step": 359650 }, { "epoch": 0.35436189245210153, "grad_norm": 2.3047034740448, "learning_rate": 9.988094099254877e-06, "loss": 2.979, "step": 359700 }, { "epoch": 0.3544111504299236, "grad_norm": 2.397012948989868, "learning_rate": 9.988082862090582e-06, "loss": 2.9424, "step": 359750 }, { "epoch": 0.35446040840774573, "grad_norm": 2.3555734157562256, "learning_rate": 9.98807161963212e-06, "loss": 2.9627, "step": 359800 }, { "epoch": 0.3545096663855678, "grad_norm": 2.236656904220581, "learning_rate": 9.988060371879504e-06, "loss": 2.9614, "step": 359850 }, { "epoch": 0.35455892436338987, "grad_norm": 2.461535930633545, "learning_rate": 9.988049118832745e-06, "loss": 2.8731, "step": 359900 }, { "epoch": 0.354608182341212, "grad_norm": 2.2792508602142334, "learning_rate": 9.988037860491855e-06, "loss": 3.0164, "step": 359950 }, { "epoch": 0.35465744031903407, "grad_norm": 2.491628646850586, "learning_rate": 9.988026596856848e-06, "loss": 2.9098, "step": 360000 }, { "epoch": 0.35470669829685614, "grad_norm": 2.393871307373047, "learning_rate": 9.988015327927732e-06, "loss": 2.9714, "step": 360050 }, { "epoch": 0.35475595627467826, "grad_norm": 2.2619409561157227, "learning_rate": 9.98800405370452e-06, "loss": 2.8985, "step": 360100 }, { "epoch": 0.35480521425250033, "grad_norm": 2.492304801940918, "learning_rate": 9.98799277418723e-06, "loss": 2.9599, "step": 360150 }, { "epoch": 0.3548544722303224, "grad_norm": 2.1492764949798584, "learning_rate": 9.987981489375865e-06, "loss": 2.9358, "step": 360200 }, { "epoch": 0.35490373020814453, "grad_norm": 2.3312129974365234, "learning_rate": 9.987970199270441e-06, "loss": 2.969, "step": 360250 }, { "epoch": 0.3549529881859666, "grad_norm": 2.2014849185943604, "learning_rate": 9.987958903870972e-06, "loss": 2.9999, "step": 360300 }, { "epoch": 0.35500224616378867, "grad_norm": 2.3313918113708496, "learning_rate": 9.987947603177466e-06, "loss": 2.9053, "step": 360350 }, { "epoch": 0.3550515041416108, "grad_norm": 2.3025901317596436, "learning_rate": 9.987936297189939e-06, "loss": 2.9352, "step": 360400 }, { "epoch": 0.35510076211943287, "grad_norm": 2.3685007095336914, "learning_rate": 9.987924985908399e-06, "loss": 2.9073, "step": 360450 }, { "epoch": 0.35515002009725494, "grad_norm": 2.3598215579986572, "learning_rate": 9.987913669332862e-06, "loss": 2.9633, "step": 360500 }, { "epoch": 0.35519927807507706, "grad_norm": 2.3687245845794678, "learning_rate": 9.987902347463337e-06, "loss": 2.9756, "step": 360550 }, { "epoch": 0.35524853605289913, "grad_norm": 2.204298734664917, "learning_rate": 9.987891020299836e-06, "loss": 2.9631, "step": 360600 }, { "epoch": 0.3552977940307212, "grad_norm": 2.3189995288848877, "learning_rate": 9.987879687842374e-06, "loss": 2.9543, "step": 360650 }, { "epoch": 0.35534705200854333, "grad_norm": 2.2477996349334717, "learning_rate": 9.987868350090959e-06, "loss": 3.0321, "step": 360700 }, { "epoch": 0.3553963099863654, "grad_norm": 2.292478322982788, "learning_rate": 9.987857007045607e-06, "loss": 2.9709, "step": 360750 }, { "epoch": 0.35544556796418747, "grad_norm": 2.332951545715332, "learning_rate": 9.987845658706326e-06, "loss": 2.9043, "step": 360800 }, { "epoch": 0.35549482594200954, "grad_norm": 2.3388519287109375, "learning_rate": 9.987834305073133e-06, "loss": 2.9777, "step": 360850 }, { "epoch": 0.35554408391983167, "grad_norm": 2.2666866779327393, "learning_rate": 9.987822946146036e-06, "loss": 2.9716, "step": 360900 }, { "epoch": 0.35559334189765374, "grad_norm": 2.3690900802612305, "learning_rate": 9.987811581925048e-06, "loss": 2.9342, "step": 360950 }, { "epoch": 0.3556425998754758, "grad_norm": 2.228529214859009, "learning_rate": 9.98780021241018e-06, "loss": 3.0428, "step": 361000 }, { "epoch": 0.35569185785329793, "grad_norm": 2.387155771255493, "learning_rate": 9.987788837601447e-06, "loss": 3.023, "step": 361050 }, { "epoch": 0.35574111583112, "grad_norm": 2.4653706550598145, "learning_rate": 9.98777745749886e-06, "loss": 2.9577, "step": 361100 }, { "epoch": 0.3557903738089421, "grad_norm": 2.3315913677215576, "learning_rate": 9.98776607210243e-06, "loss": 2.9488, "step": 361150 }, { "epoch": 0.3558396317867642, "grad_norm": 2.3207244873046875, "learning_rate": 9.987754681412168e-06, "loss": 2.9376, "step": 361200 }, { "epoch": 0.35588888976458627, "grad_norm": 2.247079849243164, "learning_rate": 9.987743285428089e-06, "loss": 3.038, "step": 361250 }, { "epoch": 0.35593814774240834, "grad_norm": 2.353811025619507, "learning_rate": 9.987731884150204e-06, "loss": 2.9644, "step": 361300 }, { "epoch": 0.35598740572023047, "grad_norm": 2.222411632537842, "learning_rate": 9.987720477578527e-06, "loss": 2.9213, "step": 361350 }, { "epoch": 0.35603666369805254, "grad_norm": 2.2937471866607666, "learning_rate": 9.987709065713066e-06, "loss": 3.0056, "step": 361400 }, { "epoch": 0.3560859216758746, "grad_norm": 2.420436143875122, "learning_rate": 9.987697648553835e-06, "loss": 2.9928, "step": 361450 }, { "epoch": 0.35613517965369673, "grad_norm": 2.3408195972442627, "learning_rate": 9.987686226100847e-06, "loss": 2.9229, "step": 361500 }, { "epoch": 0.3561844376315188, "grad_norm": 2.26583194732666, "learning_rate": 9.987674798354113e-06, "loss": 2.984, "step": 361550 }, { "epoch": 0.3562336956093409, "grad_norm": 2.393211841583252, "learning_rate": 9.987663365313645e-06, "loss": 2.8888, "step": 361600 }, { "epoch": 0.356282953587163, "grad_norm": 2.5020511150360107, "learning_rate": 9.987651926979456e-06, "loss": 2.9762, "step": 361650 }, { "epoch": 0.35633221156498507, "grad_norm": 2.3906404972076416, "learning_rate": 9.987640483351557e-06, "loss": 2.9536, "step": 361700 }, { "epoch": 0.35638146954280714, "grad_norm": 2.4819223880767822, "learning_rate": 9.987629034429962e-06, "loss": 2.9712, "step": 361750 }, { "epoch": 0.35643072752062926, "grad_norm": 2.050848960876465, "learning_rate": 9.987617580214683e-06, "loss": 2.9817, "step": 361800 }, { "epoch": 0.35647998549845133, "grad_norm": 2.185004711151123, "learning_rate": 9.98760612070573e-06, "loss": 2.9406, "step": 361850 }, { "epoch": 0.3565292434762734, "grad_norm": 2.222330093383789, "learning_rate": 9.987594655903116e-06, "loss": 2.9584, "step": 361900 }, { "epoch": 0.35657850145409553, "grad_norm": 2.568225860595703, "learning_rate": 9.987583185806854e-06, "loss": 2.9547, "step": 361950 }, { "epoch": 0.3566277594319176, "grad_norm": 2.1780872344970703, "learning_rate": 9.987571710416956e-06, "loss": 2.9979, "step": 362000 }, { "epoch": 0.35667701740973967, "grad_norm": 2.386396884918213, "learning_rate": 9.987560229733434e-06, "loss": 2.9594, "step": 362050 }, { "epoch": 0.35672627538756174, "grad_norm": 2.1966021060943604, "learning_rate": 9.9875487437563e-06, "loss": 2.9675, "step": 362100 }, { "epoch": 0.35677553336538387, "grad_norm": 2.2054474353790283, "learning_rate": 9.987537252485566e-06, "loss": 2.9671, "step": 362150 }, { "epoch": 0.35682479134320594, "grad_norm": 2.214151382446289, "learning_rate": 9.987525755921246e-06, "loss": 3.0101, "step": 362200 }, { "epoch": 0.356874049321028, "grad_norm": 2.341277599334717, "learning_rate": 9.987514254063349e-06, "loss": 2.9643, "step": 362250 }, { "epoch": 0.35692330729885013, "grad_norm": 2.472186326980591, "learning_rate": 9.98750274691189e-06, "loss": 2.9857, "step": 362300 }, { "epoch": 0.3569725652766722, "grad_norm": 2.230236768722534, "learning_rate": 9.98749123446688e-06, "loss": 2.959, "step": 362350 }, { "epoch": 0.3570218232544943, "grad_norm": 2.503843069076538, "learning_rate": 9.98747971672833e-06, "loss": 3.0442, "step": 362400 }, { "epoch": 0.3570710812323164, "grad_norm": 2.2867438793182373, "learning_rate": 9.987468193696255e-06, "loss": 3.0416, "step": 362450 }, { "epoch": 0.35712033921013847, "grad_norm": 2.368241786956787, "learning_rate": 9.987456665370665e-06, "loss": 2.9049, "step": 362500 }, { "epoch": 0.35716959718796054, "grad_norm": 2.510207176208496, "learning_rate": 9.987445131751574e-06, "loss": 2.9384, "step": 362550 }, { "epoch": 0.35721885516578267, "grad_norm": 2.4941651821136475, "learning_rate": 9.987433592838992e-06, "loss": 2.9454, "step": 362600 }, { "epoch": 0.35726811314360474, "grad_norm": 2.286174774169922, "learning_rate": 9.987422048632934e-06, "loss": 2.9053, "step": 362650 }, { "epoch": 0.3573173711214268, "grad_norm": 2.355470657348633, "learning_rate": 9.98741049913341e-06, "loss": 2.9402, "step": 362700 }, { "epoch": 0.35736662909924893, "grad_norm": 3.3021585941314697, "learning_rate": 9.987398944340433e-06, "loss": 2.908, "step": 362750 }, { "epoch": 0.357415887077071, "grad_norm": 2.586327314376831, "learning_rate": 9.987387384254016e-06, "loss": 2.9315, "step": 362800 }, { "epoch": 0.3574651450548931, "grad_norm": 2.2139227390289307, "learning_rate": 9.987375818874169e-06, "loss": 3.0591, "step": 362850 }, { "epoch": 0.3575144030327152, "grad_norm": 2.154409885406494, "learning_rate": 9.987364248200908e-06, "loss": 3.0009, "step": 362900 }, { "epoch": 0.35756366101053727, "grad_norm": 2.3353519439697266, "learning_rate": 9.987352672234242e-06, "loss": 3.0115, "step": 362950 }, { "epoch": 0.35761291898835934, "grad_norm": 2.2507901191711426, "learning_rate": 9.987341090974183e-06, "loss": 3.0133, "step": 363000 }, { "epoch": 0.35766217696618147, "grad_norm": 2.3147101402282715, "learning_rate": 9.987329504420747e-06, "loss": 3.0013, "step": 363050 }, { "epoch": 0.35771143494400354, "grad_norm": 2.372318744659424, "learning_rate": 9.987317912573944e-06, "loss": 2.9555, "step": 363100 }, { "epoch": 0.3577606929218256, "grad_norm": 2.5550198554992676, "learning_rate": 9.987306315433785e-06, "loss": 3.0199, "step": 363150 }, { "epoch": 0.35780995089964773, "grad_norm": 2.389742136001587, "learning_rate": 9.987294713000284e-06, "loss": 2.9651, "step": 363200 }, { "epoch": 0.3578592088774698, "grad_norm": 2.2752418518066406, "learning_rate": 9.987283105273453e-06, "loss": 2.9912, "step": 363250 }, { "epoch": 0.3579084668552919, "grad_norm": 2.1332128047943115, "learning_rate": 9.987271492253305e-06, "loss": 3.0228, "step": 363300 }, { "epoch": 0.35795772483311394, "grad_norm": 2.503787040710449, "learning_rate": 9.987259873939851e-06, "loss": 2.8871, "step": 363350 }, { "epoch": 0.35800698281093607, "grad_norm": 2.158021926879883, "learning_rate": 9.987248250333103e-06, "loss": 2.9359, "step": 363400 }, { "epoch": 0.35805624078875814, "grad_norm": 2.12276029586792, "learning_rate": 9.987236621433076e-06, "loss": 3.0293, "step": 363450 }, { "epoch": 0.3581054987665802, "grad_norm": 2.621997833251953, "learning_rate": 9.987224987239779e-06, "loss": 2.9296, "step": 363500 }, { "epoch": 0.35815475674440234, "grad_norm": 2.2211179733276367, "learning_rate": 9.987213347753226e-06, "loss": 2.8561, "step": 363550 }, { "epoch": 0.3582040147222244, "grad_norm": 2.472642660140991, "learning_rate": 9.987201702973432e-06, "loss": 2.9218, "step": 363600 }, { "epoch": 0.3582532727000465, "grad_norm": 2.535191059112549, "learning_rate": 9.987190052900403e-06, "loss": 2.9592, "step": 363650 }, { "epoch": 0.3583025306778686, "grad_norm": 2.1980667114257812, "learning_rate": 9.987178397534156e-06, "loss": 2.9872, "step": 363700 }, { "epoch": 0.3583517886556907, "grad_norm": 2.2821850776672363, "learning_rate": 9.987166736874703e-06, "loss": 2.9709, "step": 363750 }, { "epoch": 0.35840104663351274, "grad_norm": 2.5591869354248047, "learning_rate": 9.987155070922055e-06, "loss": 2.9498, "step": 363800 }, { "epoch": 0.35845030461133487, "grad_norm": 1.9769644737243652, "learning_rate": 9.987143399676227e-06, "loss": 2.9963, "step": 363850 }, { "epoch": 0.35849956258915694, "grad_norm": 2.4309816360473633, "learning_rate": 9.987131723137229e-06, "loss": 2.9249, "step": 363900 }, { "epoch": 0.358548820566979, "grad_norm": 2.3071231842041016, "learning_rate": 9.987120041305073e-06, "loss": 2.9601, "step": 363950 }, { "epoch": 0.35859807854480114, "grad_norm": 2.614656686782837, "learning_rate": 9.987108354179773e-06, "loss": 3.0029, "step": 364000 }, { "epoch": 0.3586473365226232, "grad_norm": 2.2864067554473877, "learning_rate": 9.987096661761341e-06, "loss": 2.9195, "step": 364050 }, { "epoch": 0.3586965945004453, "grad_norm": 2.4196524620056152, "learning_rate": 9.987084964049787e-06, "loss": 2.9755, "step": 364100 }, { "epoch": 0.3587458524782674, "grad_norm": 2.383248805999756, "learning_rate": 9.987073261045128e-06, "loss": 2.9514, "step": 364150 }, { "epoch": 0.35879511045608947, "grad_norm": 2.295335054397583, "learning_rate": 9.987061552747374e-06, "loss": 2.8639, "step": 364200 }, { "epoch": 0.35884436843391154, "grad_norm": 2.1179449558258057, "learning_rate": 9.987049839156535e-06, "loss": 2.8941, "step": 364250 }, { "epoch": 0.35889362641173367, "grad_norm": 2.1878271102905273, "learning_rate": 9.987038120272627e-06, "loss": 2.8847, "step": 364300 }, { "epoch": 0.35894288438955574, "grad_norm": 2.256791591644287, "learning_rate": 9.987026396095663e-06, "loss": 3.0081, "step": 364350 }, { "epoch": 0.3589921423673778, "grad_norm": 2.2335755825042725, "learning_rate": 9.987014666625652e-06, "loss": 2.9417, "step": 364400 }, { "epoch": 0.35904140034519993, "grad_norm": 2.2680416107177734, "learning_rate": 9.987002931862609e-06, "loss": 2.9282, "step": 364450 }, { "epoch": 0.359090658323022, "grad_norm": 2.2982795238494873, "learning_rate": 9.986991191806545e-06, "loss": 2.9667, "step": 364500 }, { "epoch": 0.3591399163008441, "grad_norm": 2.4332163333892822, "learning_rate": 9.986979446457474e-06, "loss": 3.01, "step": 364550 }, { "epoch": 0.35918917427866615, "grad_norm": 2.370990037918091, "learning_rate": 9.986967695815405e-06, "loss": 2.9812, "step": 364600 }, { "epoch": 0.35923843225648827, "grad_norm": 2.5364742279052734, "learning_rate": 9.986955939880355e-06, "loss": 2.945, "step": 364650 }, { "epoch": 0.35928769023431034, "grad_norm": 2.8644158840179443, "learning_rate": 9.986944178652336e-06, "loss": 2.9634, "step": 364700 }, { "epoch": 0.3593369482121324, "grad_norm": 2.2953004837036133, "learning_rate": 9.986932412131356e-06, "loss": 2.8936, "step": 364750 }, { "epoch": 0.35938620618995454, "grad_norm": 2.4109456539154053, "learning_rate": 9.986920640317432e-06, "loss": 2.8999, "step": 364800 }, { "epoch": 0.3594354641677766, "grad_norm": 2.4547135829925537, "learning_rate": 9.986908863210576e-06, "loss": 2.8984, "step": 364850 }, { "epoch": 0.3594847221455987, "grad_norm": 2.2411201000213623, "learning_rate": 9.986897080810796e-06, "loss": 3.0263, "step": 364900 }, { "epoch": 0.3595339801234208, "grad_norm": 2.384784460067749, "learning_rate": 9.986885293118111e-06, "loss": 2.9623, "step": 364950 }, { "epoch": 0.3595832381012429, "grad_norm": 2.492511034011841, "learning_rate": 9.98687350013253e-06, "loss": 2.9564, "step": 365000 }, { "epoch": 0.35963249607906494, "grad_norm": 2.6095516681671143, "learning_rate": 9.986861701854066e-06, "loss": 2.9609, "step": 365050 }, { "epoch": 0.35968175405688707, "grad_norm": 2.2681164741516113, "learning_rate": 9.986849898282731e-06, "loss": 2.9927, "step": 365100 }, { "epoch": 0.35973101203470914, "grad_norm": 2.4563984870910645, "learning_rate": 9.986838089418539e-06, "loss": 2.9934, "step": 365150 }, { "epoch": 0.3597802700125312, "grad_norm": 2.186204671859741, "learning_rate": 9.9868262752615e-06, "loss": 2.9799, "step": 365200 }, { "epoch": 0.35982952799035334, "grad_norm": 2.279505729675293, "learning_rate": 9.98681445581163e-06, "loss": 2.9062, "step": 365250 }, { "epoch": 0.3598787859681754, "grad_norm": 2.197453260421753, "learning_rate": 9.986802631068937e-06, "loss": 2.93, "step": 365300 }, { "epoch": 0.3599280439459975, "grad_norm": 2.3760247230529785, "learning_rate": 9.986790801033439e-06, "loss": 2.9809, "step": 365350 }, { "epoch": 0.3599773019238196, "grad_norm": 2.4089417457580566, "learning_rate": 9.986778965705145e-06, "loss": 2.9856, "step": 365400 }, { "epoch": 0.3600265599016417, "grad_norm": 2.1843948364257812, "learning_rate": 9.986767125084068e-06, "loss": 2.9609, "step": 365450 }, { "epoch": 0.36007581787946374, "grad_norm": 2.1791772842407227, "learning_rate": 9.98675527917022e-06, "loss": 3.0356, "step": 365500 }, { "epoch": 0.36012507585728587, "grad_norm": 2.2925949096679688, "learning_rate": 9.986743427963617e-06, "loss": 2.9506, "step": 365550 }, { "epoch": 0.36017433383510794, "grad_norm": 2.3661959171295166, "learning_rate": 9.986731571464266e-06, "loss": 2.9346, "step": 365600 }, { "epoch": 0.36022359181293, "grad_norm": 2.3750007152557373, "learning_rate": 9.986719709672183e-06, "loss": 2.9629, "step": 365650 }, { "epoch": 0.36027284979075214, "grad_norm": 2.163908004760742, "learning_rate": 9.986707842587383e-06, "loss": 2.9306, "step": 365700 }, { "epoch": 0.3603221077685742, "grad_norm": 2.4724814891815186, "learning_rate": 9.986695970209873e-06, "loss": 2.9844, "step": 365750 }, { "epoch": 0.3603713657463963, "grad_norm": 2.3490304946899414, "learning_rate": 9.98668409253967e-06, "loss": 3.0254, "step": 365800 }, { "epoch": 0.36042062372421835, "grad_norm": 2.229485273361206, "learning_rate": 9.986672209576784e-06, "loss": 2.9513, "step": 365850 }, { "epoch": 0.3604698817020405, "grad_norm": 2.306624412536621, "learning_rate": 9.98666032132123e-06, "loss": 3.0177, "step": 365900 }, { "epoch": 0.36051913967986254, "grad_norm": 2.2640554904937744, "learning_rate": 9.986648427773019e-06, "loss": 2.9233, "step": 365950 }, { "epoch": 0.3605683976576846, "grad_norm": 2.309554100036621, "learning_rate": 9.986636528932162e-06, "loss": 2.8838, "step": 366000 }, { "epoch": 0.36061765563550674, "grad_norm": 2.2442033290863037, "learning_rate": 9.986624624798676e-06, "loss": 2.9352, "step": 366050 }, { "epoch": 0.3606669136133288, "grad_norm": 2.1522607803344727, "learning_rate": 9.986612715372568e-06, "loss": 2.9641, "step": 366100 }, { "epoch": 0.3607161715911509, "grad_norm": 2.265014886856079, "learning_rate": 9.986600800653857e-06, "loss": 2.8896, "step": 366150 }, { "epoch": 0.360765429568973, "grad_norm": 2.417192220687866, "learning_rate": 9.986588880642551e-06, "loss": 2.97, "step": 366200 }, { "epoch": 0.3608146875467951, "grad_norm": 2.3026037216186523, "learning_rate": 9.986576955338666e-06, "loss": 2.9248, "step": 366250 }, { "epoch": 0.36086394552461715, "grad_norm": 2.2389063835144043, "learning_rate": 9.98656502474221e-06, "loss": 3.057, "step": 366300 }, { "epoch": 0.3609132035024393, "grad_norm": 2.346858263015747, "learning_rate": 9.986553088853199e-06, "loss": 3.041, "step": 366350 }, { "epoch": 0.36096246148026134, "grad_norm": 2.509387493133545, "learning_rate": 9.986541147671647e-06, "loss": 2.9944, "step": 366400 }, { "epoch": 0.3610117194580834, "grad_norm": 2.3504912853240967, "learning_rate": 9.986529201197563e-06, "loss": 2.971, "step": 366450 }, { "epoch": 0.36106097743590554, "grad_norm": 2.295961380004883, "learning_rate": 9.98651724943096e-06, "loss": 2.8939, "step": 366500 }, { "epoch": 0.3611102354137276, "grad_norm": 2.0386202335357666, "learning_rate": 9.986505292371855e-06, "loss": 2.925, "step": 366550 }, { "epoch": 0.3611594933915497, "grad_norm": 2.1347155570983887, "learning_rate": 9.986493330020256e-06, "loss": 2.9795, "step": 366600 }, { "epoch": 0.3612087513693718, "grad_norm": 2.1900064945220947, "learning_rate": 9.986481362376178e-06, "loss": 3.007, "step": 366650 }, { "epoch": 0.3612580093471939, "grad_norm": 2.499217987060547, "learning_rate": 9.986469389439634e-06, "loss": 2.9398, "step": 366700 }, { "epoch": 0.36130726732501595, "grad_norm": 2.3247182369232178, "learning_rate": 9.986457411210636e-06, "loss": 2.9411, "step": 366750 }, { "epoch": 0.36135652530283807, "grad_norm": 2.3710410594940186, "learning_rate": 9.986445427689195e-06, "loss": 2.9323, "step": 366800 }, { "epoch": 0.36140578328066014, "grad_norm": 2.4857873916625977, "learning_rate": 9.986433438875326e-06, "loss": 2.9854, "step": 366850 }, { "epoch": 0.3614550412584822, "grad_norm": 2.5338656902313232, "learning_rate": 9.986421444769042e-06, "loss": 2.932, "step": 366900 }, { "epoch": 0.36150429923630434, "grad_norm": 2.296097755432129, "learning_rate": 9.986409445370355e-06, "loss": 2.9773, "step": 366950 }, { "epoch": 0.3615535572141264, "grad_norm": 2.2456724643707275, "learning_rate": 9.986397440679275e-06, "loss": 2.9337, "step": 367000 }, { "epoch": 0.3616028151919485, "grad_norm": 2.162834405899048, "learning_rate": 9.986385430695819e-06, "loss": 2.9935, "step": 367050 }, { "epoch": 0.36165207316977055, "grad_norm": 2.3892998695373535, "learning_rate": 9.986373415419998e-06, "loss": 2.9845, "step": 367100 }, { "epoch": 0.3617013311475927, "grad_norm": 2.4545092582702637, "learning_rate": 9.986361394851825e-06, "loss": 2.9134, "step": 367150 }, { "epoch": 0.36175058912541475, "grad_norm": 2.2731893062591553, "learning_rate": 9.986349368991313e-06, "loss": 3.0407, "step": 367200 }, { "epoch": 0.3617998471032368, "grad_norm": 2.473620891571045, "learning_rate": 9.986337337838473e-06, "loss": 3.0378, "step": 367250 }, { "epoch": 0.36184910508105894, "grad_norm": 2.3677704334259033, "learning_rate": 9.98632530139332e-06, "loss": 2.9873, "step": 367300 }, { "epoch": 0.361898363058881, "grad_norm": 2.3013105392456055, "learning_rate": 9.986313259655865e-06, "loss": 2.967, "step": 367350 }, { "epoch": 0.3619476210367031, "grad_norm": 2.243058443069458, "learning_rate": 9.986301212626123e-06, "loss": 2.9289, "step": 367400 }, { "epoch": 0.3619968790145252, "grad_norm": 2.2675092220306396, "learning_rate": 9.986289160304104e-06, "loss": 2.9242, "step": 367450 }, { "epoch": 0.3620461369923473, "grad_norm": 2.5041918754577637, "learning_rate": 9.986277102689823e-06, "loss": 2.999, "step": 367500 }, { "epoch": 0.36209539497016935, "grad_norm": 2.413787603378296, "learning_rate": 9.986265039783292e-06, "loss": 2.9146, "step": 367550 }, { "epoch": 0.3621446529479915, "grad_norm": 2.516357660293579, "learning_rate": 9.986252971584522e-06, "loss": 3.0032, "step": 367600 }, { "epoch": 0.36219391092581354, "grad_norm": 2.228656768798828, "learning_rate": 9.986240898093529e-06, "loss": 2.8708, "step": 367650 }, { "epoch": 0.3622431689036356, "grad_norm": 2.2819879055023193, "learning_rate": 9.986228819310326e-06, "loss": 2.9564, "step": 367700 }, { "epoch": 0.36229242688145774, "grad_norm": 2.2887861728668213, "learning_rate": 9.986216735234922e-06, "loss": 2.9807, "step": 367750 }, { "epoch": 0.3623416848592798, "grad_norm": 2.261397123336792, "learning_rate": 9.986204645867332e-06, "loss": 2.9571, "step": 367800 }, { "epoch": 0.3623909428371019, "grad_norm": 2.3557450771331787, "learning_rate": 9.98619255120757e-06, "loss": 2.9528, "step": 367850 }, { "epoch": 0.362440200814924, "grad_norm": 2.3293778896331787, "learning_rate": 9.986180451255649e-06, "loss": 2.9899, "step": 367900 }, { "epoch": 0.3624894587927461, "grad_norm": 2.6079020500183105, "learning_rate": 9.986168346011577e-06, "loss": 2.9645, "step": 367950 }, { "epoch": 0.36253871677056815, "grad_norm": 2.277489185333252, "learning_rate": 9.986156235475372e-06, "loss": 2.9775, "step": 368000 }, { "epoch": 0.3625879747483903, "grad_norm": 2.3319947719573975, "learning_rate": 9.986144119647047e-06, "loss": 2.9454, "step": 368050 }, { "epoch": 0.36263723272621234, "grad_norm": 2.243565320968628, "learning_rate": 9.986131998526612e-06, "loss": 2.9363, "step": 368100 }, { "epoch": 0.3626864907040344, "grad_norm": 2.294163465499878, "learning_rate": 9.98611987211408e-06, "loss": 2.9562, "step": 368150 }, { "epoch": 0.36273574868185654, "grad_norm": 2.332155227661133, "learning_rate": 9.986107740409466e-06, "loss": 2.9653, "step": 368200 }, { "epoch": 0.3627850066596786, "grad_norm": 2.35566782951355, "learning_rate": 9.986095603412782e-06, "loss": 2.9554, "step": 368250 }, { "epoch": 0.3628342646375007, "grad_norm": 2.2926464080810547, "learning_rate": 9.98608346112404e-06, "loss": 2.9571, "step": 368300 }, { "epoch": 0.36288352261532275, "grad_norm": 2.4502904415130615, "learning_rate": 9.986071313543253e-06, "loss": 3.0489, "step": 368350 }, { "epoch": 0.3629327805931449, "grad_norm": 2.2514894008636475, "learning_rate": 9.986059160670436e-06, "loss": 2.9877, "step": 368400 }, { "epoch": 0.36298203857096695, "grad_norm": 2.3496105670928955, "learning_rate": 9.986047002505599e-06, "loss": 2.978, "step": 368450 }, { "epoch": 0.363031296548789, "grad_norm": 2.636286735534668, "learning_rate": 9.986034839048757e-06, "loss": 2.9124, "step": 368500 }, { "epoch": 0.36308055452661114, "grad_norm": 2.268375873565674, "learning_rate": 9.986022670299922e-06, "loss": 2.9387, "step": 368550 }, { "epoch": 0.3631298125044332, "grad_norm": 2.1338846683502197, "learning_rate": 9.986010496259107e-06, "loss": 2.9319, "step": 368600 }, { "epoch": 0.3631790704822553, "grad_norm": 2.427481174468994, "learning_rate": 9.985998316926324e-06, "loss": 2.9621, "step": 368650 }, { "epoch": 0.3632283284600774, "grad_norm": 2.7243635654449463, "learning_rate": 9.985986132301588e-06, "loss": 2.9761, "step": 368700 }, { "epoch": 0.3632775864378995, "grad_norm": 2.272566080093384, "learning_rate": 9.98597394238491e-06, "loss": 2.912, "step": 368750 }, { "epoch": 0.36332684441572155, "grad_norm": 2.305267095565796, "learning_rate": 9.985961747176305e-06, "loss": 2.9062, "step": 368800 }, { "epoch": 0.3633761023935437, "grad_norm": 2.4928536415100098, "learning_rate": 9.985949546675784e-06, "loss": 2.9573, "step": 368850 }, { "epoch": 0.36342536037136575, "grad_norm": 2.3475279808044434, "learning_rate": 9.98593734088336e-06, "loss": 3.0469, "step": 368900 }, { "epoch": 0.3634746183491878, "grad_norm": 2.2307870388031006, "learning_rate": 9.985925129799046e-06, "loss": 2.9711, "step": 368950 }, { "epoch": 0.36352387632700994, "grad_norm": 2.524484395980835, "learning_rate": 9.98591291342286e-06, "loss": 2.964, "step": 369000 }, { "epoch": 0.363573134304832, "grad_norm": 2.4868838787078857, "learning_rate": 9.985900691754805e-06, "loss": 2.9731, "step": 369050 }, { "epoch": 0.3636223922826541, "grad_norm": 2.344097137451172, "learning_rate": 9.985888464794902e-06, "loss": 2.9503, "step": 369100 }, { "epoch": 0.3636716502604762, "grad_norm": 2.3688931465148926, "learning_rate": 9.985876232543161e-06, "loss": 2.9353, "step": 369150 }, { "epoch": 0.3637209082382983, "grad_norm": 2.55981183052063, "learning_rate": 9.985863994999597e-06, "loss": 3.0133, "step": 369200 }, { "epoch": 0.36377016621612035, "grad_norm": 2.342165470123291, "learning_rate": 9.98585175216422e-06, "loss": 2.9629, "step": 369250 }, { "epoch": 0.3638194241939425, "grad_norm": 2.3682823181152344, "learning_rate": 9.985839504037044e-06, "loss": 2.9878, "step": 369300 }, { "epoch": 0.36386868217176455, "grad_norm": 2.286450147628784, "learning_rate": 9.985827250618083e-06, "loss": 2.9082, "step": 369350 }, { "epoch": 0.3639179401495866, "grad_norm": 2.179332971572876, "learning_rate": 9.98581499190735e-06, "loss": 2.9408, "step": 369400 }, { "epoch": 0.36396719812740874, "grad_norm": 2.163161277770996, "learning_rate": 9.985802727904857e-06, "loss": 2.8897, "step": 369450 }, { "epoch": 0.3640164561052308, "grad_norm": 2.451552152633667, "learning_rate": 9.985790458610617e-06, "loss": 2.9046, "step": 369500 }, { "epoch": 0.3640657140830529, "grad_norm": 2.3201611042022705, "learning_rate": 9.985778184024645e-06, "loss": 2.9703, "step": 369550 }, { "epoch": 0.36411497206087495, "grad_norm": 2.236283302307129, "learning_rate": 9.985765904146952e-06, "loss": 2.9671, "step": 369600 }, { "epoch": 0.3641642300386971, "grad_norm": 2.3461666107177734, "learning_rate": 9.985753618977549e-06, "loss": 3.0061, "step": 369650 }, { "epoch": 0.36421348801651915, "grad_norm": 2.5266242027282715, "learning_rate": 9.985741328516455e-06, "loss": 2.9636, "step": 369700 }, { "epoch": 0.3642627459943412, "grad_norm": 2.3190064430236816, "learning_rate": 9.985729032763678e-06, "loss": 3.0236, "step": 369750 }, { "epoch": 0.36431200397216335, "grad_norm": 2.3034493923187256, "learning_rate": 9.985716731719234e-06, "loss": 2.9802, "step": 369800 }, { "epoch": 0.3643612619499854, "grad_norm": 2.4126546382904053, "learning_rate": 9.985704425383134e-06, "loss": 2.8808, "step": 369850 }, { "epoch": 0.3644105199278075, "grad_norm": 2.2490317821502686, "learning_rate": 9.985692113755391e-06, "loss": 2.9698, "step": 369900 }, { "epoch": 0.3644597779056296, "grad_norm": 2.1654293537139893, "learning_rate": 9.98567979683602e-06, "loss": 2.9584, "step": 369950 }, { "epoch": 0.3645090358834517, "grad_norm": 2.415447235107422, "learning_rate": 9.985667474625034e-06, "loss": 2.9243, "step": 370000 }, { "epoch": 0.36455829386127375, "grad_norm": 2.400291919708252, "learning_rate": 9.985655147122442e-06, "loss": 2.9454, "step": 370050 }, { "epoch": 0.3646075518390959, "grad_norm": 2.5096678733825684, "learning_rate": 9.985642814328263e-06, "loss": 3.0104, "step": 370100 }, { "epoch": 0.36465680981691795, "grad_norm": 2.4434659481048584, "learning_rate": 9.985630476242504e-06, "loss": 2.8913, "step": 370150 }, { "epoch": 0.36470606779474, "grad_norm": 2.177313804626465, "learning_rate": 9.985618132865184e-06, "loss": 2.9738, "step": 370200 }, { "epoch": 0.36475532577256214, "grad_norm": 2.4110448360443115, "learning_rate": 9.985605784196311e-06, "loss": 2.9989, "step": 370250 }, { "epoch": 0.3648045837503842, "grad_norm": 2.401695489883423, "learning_rate": 9.985593430235904e-06, "loss": 2.9327, "step": 370300 }, { "epoch": 0.3648538417282063, "grad_norm": 2.3993358612060547, "learning_rate": 9.98558107098397e-06, "loss": 2.892, "step": 370350 }, { "epoch": 0.3649030997060284, "grad_norm": 2.3527796268463135, "learning_rate": 9.985568706440525e-06, "loss": 2.9539, "step": 370400 }, { "epoch": 0.3649523576838505, "grad_norm": 2.3178253173828125, "learning_rate": 9.985556336605583e-06, "loss": 2.9045, "step": 370450 }, { "epoch": 0.36500161566167255, "grad_norm": 2.186147451400757, "learning_rate": 9.985543961479156e-06, "loss": 2.9434, "step": 370500 }, { "epoch": 0.3650508736394947, "grad_norm": 2.193699836730957, "learning_rate": 9.985531581061255e-06, "loss": 2.9492, "step": 370550 }, { "epoch": 0.36510013161731675, "grad_norm": 2.316592216491699, "learning_rate": 9.985519195351898e-06, "loss": 2.9169, "step": 370600 }, { "epoch": 0.3651493895951388, "grad_norm": 2.2591137886047363, "learning_rate": 9.985506804351094e-06, "loss": 2.9958, "step": 370650 }, { "epoch": 0.36519864757296094, "grad_norm": 2.52751088142395, "learning_rate": 9.985494408058857e-06, "loss": 2.9704, "step": 370700 }, { "epoch": 0.365247905550783, "grad_norm": 2.3038623332977295, "learning_rate": 9.985482006475201e-06, "loss": 2.9745, "step": 370750 }, { "epoch": 0.3652971635286051, "grad_norm": 2.0761172771453857, "learning_rate": 9.98546959960014e-06, "loss": 2.9095, "step": 370800 }, { "epoch": 0.36534642150642715, "grad_norm": 2.328183889389038, "learning_rate": 9.985457187433685e-06, "loss": 2.9857, "step": 370850 }, { "epoch": 0.3653956794842493, "grad_norm": 2.258840560913086, "learning_rate": 9.985444769975852e-06, "loss": 3.008, "step": 370900 }, { "epoch": 0.36544493746207135, "grad_norm": 2.78092098236084, "learning_rate": 9.98543234722665e-06, "loss": 3.0109, "step": 370950 }, { "epoch": 0.3654941954398934, "grad_norm": 2.2364814281463623, "learning_rate": 9.985419919186097e-06, "loss": 2.9273, "step": 371000 }, { "epoch": 0.36554345341771555, "grad_norm": 2.4338488578796387, "learning_rate": 9.9854074858542e-06, "loss": 2.9747, "step": 371050 }, { "epoch": 0.3655927113955376, "grad_norm": 2.2652275562286377, "learning_rate": 9.98539504723098e-06, "loss": 3.0107, "step": 371100 }, { "epoch": 0.3656419693733597, "grad_norm": 2.472808837890625, "learning_rate": 9.985382603316445e-06, "loss": 2.9147, "step": 371150 }, { "epoch": 0.3656912273511818, "grad_norm": 2.108581304550171, "learning_rate": 9.98537015411061e-06, "loss": 3.007, "step": 371200 }, { "epoch": 0.3657404853290039, "grad_norm": 2.147116184234619, "learning_rate": 9.985357699613485e-06, "loss": 2.917, "step": 371250 }, { "epoch": 0.36578974330682595, "grad_norm": 2.3535375595092773, "learning_rate": 9.985345239825087e-06, "loss": 2.9482, "step": 371300 }, { "epoch": 0.3658390012846481, "grad_norm": 2.345290184020996, "learning_rate": 9.985332774745428e-06, "loss": 2.9712, "step": 371350 }, { "epoch": 0.36588825926247015, "grad_norm": 2.2744669914245605, "learning_rate": 9.985320304374522e-06, "loss": 2.9224, "step": 371400 }, { "epoch": 0.3659375172402922, "grad_norm": 2.3576161861419678, "learning_rate": 9.985307828712382e-06, "loss": 2.9867, "step": 371450 }, { "epoch": 0.36598677521811435, "grad_norm": 2.1783623695373535, "learning_rate": 9.98529534775902e-06, "loss": 2.9896, "step": 371500 }, { "epoch": 0.3660360331959364, "grad_norm": 2.198509693145752, "learning_rate": 9.98528286151445e-06, "loss": 3.0207, "step": 371550 }, { "epoch": 0.3660852911737585, "grad_norm": 2.1839842796325684, "learning_rate": 9.985270369978684e-06, "loss": 2.9209, "step": 371600 }, { "epoch": 0.3661345491515806, "grad_norm": 2.2869932651519775, "learning_rate": 9.98525787315174e-06, "loss": 2.9513, "step": 371650 }, { "epoch": 0.3661838071294027, "grad_norm": 2.234440326690674, "learning_rate": 9.985245371033625e-06, "loss": 3.0109, "step": 371700 }, { "epoch": 0.36623306510722475, "grad_norm": 2.219343900680542, "learning_rate": 9.985232863624356e-06, "loss": 2.9843, "step": 371750 }, { "epoch": 0.3662823230850469, "grad_norm": 2.0281782150268555, "learning_rate": 9.985220350923944e-06, "loss": 2.9324, "step": 371800 }, { "epoch": 0.36633158106286895, "grad_norm": 2.2894396781921387, "learning_rate": 9.985207832932405e-06, "loss": 2.9609, "step": 371850 }, { "epoch": 0.366380839040691, "grad_norm": 2.1873865127563477, "learning_rate": 9.98519530964975e-06, "loss": 2.9367, "step": 371900 }, { "epoch": 0.36643009701851315, "grad_norm": 2.3196494579315186, "learning_rate": 9.985182781075994e-06, "loss": 2.9752, "step": 371950 }, { "epoch": 0.3664793549963352, "grad_norm": 2.3310279846191406, "learning_rate": 9.985170247211148e-06, "loss": 2.9706, "step": 372000 }, { "epoch": 0.3665286129741573, "grad_norm": 2.1667609214782715, "learning_rate": 9.985157708055228e-06, "loss": 2.9831, "step": 372050 }, { "epoch": 0.36657787095197936, "grad_norm": 2.3297908306121826, "learning_rate": 9.985145163608246e-06, "loss": 2.9683, "step": 372100 }, { "epoch": 0.3666271289298015, "grad_norm": 2.489253520965576, "learning_rate": 9.985132613870216e-06, "loss": 2.8663, "step": 372150 }, { "epoch": 0.36667638690762355, "grad_norm": 2.3462092876434326, "learning_rate": 9.98512005884115e-06, "loss": 2.9662, "step": 372200 }, { "epoch": 0.3667256448854456, "grad_norm": 2.2548539638519287, "learning_rate": 9.985107498521061e-06, "loss": 2.8724, "step": 372250 }, { "epoch": 0.36677490286326775, "grad_norm": 2.333889961242676, "learning_rate": 9.985094932909964e-06, "loss": 2.934, "step": 372300 }, { "epoch": 0.3668241608410898, "grad_norm": 2.2352466583251953, "learning_rate": 9.985082362007873e-06, "loss": 2.9945, "step": 372350 }, { "epoch": 0.3668734188189119, "grad_norm": 2.476855754852295, "learning_rate": 9.985069785814799e-06, "loss": 2.9613, "step": 372400 }, { "epoch": 0.366922676796734, "grad_norm": 2.2411882877349854, "learning_rate": 9.985057204330757e-06, "loss": 2.994, "step": 372450 }, { "epoch": 0.3669719347745561, "grad_norm": 2.365665912628174, "learning_rate": 9.985044617555759e-06, "loss": 2.9658, "step": 372500 }, { "epoch": 0.36702119275237816, "grad_norm": 2.410503387451172, "learning_rate": 9.98503202548982e-06, "loss": 2.9761, "step": 372550 }, { "epoch": 0.3670704507302003, "grad_norm": 2.296252727508545, "learning_rate": 9.985019428132951e-06, "loss": 3.0214, "step": 372600 }, { "epoch": 0.36711970870802235, "grad_norm": 2.4165725708007812, "learning_rate": 9.98500682548517e-06, "loss": 2.9707, "step": 372650 }, { "epoch": 0.3671689666858444, "grad_norm": 2.514622211456299, "learning_rate": 9.984994217546484e-06, "loss": 2.9524, "step": 372700 }, { "epoch": 0.36721822466366655, "grad_norm": 2.3531386852264404, "learning_rate": 9.98498160431691e-06, "loss": 2.9862, "step": 372750 }, { "epoch": 0.3672674826414886, "grad_norm": 2.316027879714966, "learning_rate": 9.984968985796462e-06, "loss": 3.0338, "step": 372800 }, { "epoch": 0.3673167406193107, "grad_norm": 2.230767011642456, "learning_rate": 9.984956361985152e-06, "loss": 2.9049, "step": 372850 }, { "epoch": 0.3673659985971328, "grad_norm": 2.3280653953552246, "learning_rate": 9.984943732882993e-06, "loss": 2.9116, "step": 372900 }, { "epoch": 0.3674152565749549, "grad_norm": 2.170419454574585, "learning_rate": 9.98493109849e-06, "loss": 2.9143, "step": 372950 }, { "epoch": 0.36746451455277696, "grad_norm": 2.414006233215332, "learning_rate": 9.984918458806186e-06, "loss": 2.9215, "step": 373000 }, { "epoch": 0.3675137725305991, "grad_norm": 2.1818084716796875, "learning_rate": 9.984905813831563e-06, "loss": 2.9626, "step": 373050 }, { "epoch": 0.36756303050842115, "grad_norm": 2.2205467224121094, "learning_rate": 9.984893163566147e-06, "loss": 2.9607, "step": 373100 }, { "epoch": 0.3676122884862432, "grad_norm": 2.4412145614624023, "learning_rate": 9.98488050800995e-06, "loss": 2.9775, "step": 373150 }, { "epoch": 0.36766154646406535, "grad_norm": 2.4067630767822266, "learning_rate": 9.984867847162983e-06, "loss": 2.9083, "step": 373200 }, { "epoch": 0.3677108044418874, "grad_norm": 2.3929998874664307, "learning_rate": 9.984855181025263e-06, "loss": 2.9626, "step": 373250 }, { "epoch": 0.3677600624197095, "grad_norm": 2.305105447769165, "learning_rate": 9.984842509596803e-06, "loss": 2.9716, "step": 373300 }, { "epoch": 0.36780932039753156, "grad_norm": 2.321188449859619, "learning_rate": 9.984829832877613e-06, "loss": 3.0035, "step": 373350 }, { "epoch": 0.3678585783753537, "grad_norm": 2.369049310684204, "learning_rate": 9.984817150867711e-06, "loss": 2.9729, "step": 373400 }, { "epoch": 0.36790783635317575, "grad_norm": 2.4235341548919678, "learning_rate": 9.98480446356711e-06, "loss": 2.9881, "step": 373450 }, { "epoch": 0.3679570943309978, "grad_norm": 2.2693064212799072, "learning_rate": 9.98479177097582e-06, "loss": 2.9061, "step": 373500 }, { "epoch": 0.36800635230881995, "grad_norm": 2.517242670059204, "learning_rate": 9.984779073093857e-06, "loss": 2.9807, "step": 373550 }, { "epoch": 0.368055610286642, "grad_norm": 2.0748016834259033, "learning_rate": 9.984766369921233e-06, "loss": 2.9643, "step": 373600 }, { "epoch": 0.3681048682644641, "grad_norm": 2.199024200439453, "learning_rate": 9.984753661457964e-06, "loss": 2.9868, "step": 373650 }, { "epoch": 0.3681541262422862, "grad_norm": 2.3255419731140137, "learning_rate": 9.98474094770406e-06, "loss": 2.9676, "step": 373700 }, { "epoch": 0.3682033842201083, "grad_norm": 2.3928890228271484, "learning_rate": 9.984728228659539e-06, "loss": 2.9904, "step": 373750 }, { "epoch": 0.36825264219793036, "grad_norm": 2.1849429607391357, "learning_rate": 9.984715504324412e-06, "loss": 2.9982, "step": 373800 }, { "epoch": 0.3683019001757525, "grad_norm": 2.254878520965576, "learning_rate": 9.984702774698688e-06, "loss": 2.9344, "step": 373850 }, { "epoch": 0.36835115815357455, "grad_norm": 2.37884783744812, "learning_rate": 9.984690039782389e-06, "loss": 2.946, "step": 373900 }, { "epoch": 0.3684004161313966, "grad_norm": 2.3156776428222656, "learning_rate": 9.984677299575524e-06, "loss": 2.9555, "step": 373950 }, { "epoch": 0.36844967410921875, "grad_norm": 2.351181745529175, "learning_rate": 9.984664554078106e-06, "loss": 2.9793, "step": 374000 }, { "epoch": 0.3684989320870408, "grad_norm": 2.1720635890960693, "learning_rate": 9.98465180329015e-06, "loss": 2.9607, "step": 374050 }, { "epoch": 0.3685481900648629, "grad_norm": 2.1921885013580322, "learning_rate": 9.98463904721167e-06, "loss": 2.9083, "step": 374100 }, { "epoch": 0.368597448042685, "grad_norm": 2.349820137023926, "learning_rate": 9.984626285842676e-06, "loss": 2.9387, "step": 374150 }, { "epoch": 0.3686467060205071, "grad_norm": 2.197554349899292, "learning_rate": 9.984613519183185e-06, "loss": 2.9459, "step": 374200 }, { "epoch": 0.36869596399832916, "grad_norm": 2.1750175952911377, "learning_rate": 9.98460074723321e-06, "loss": 2.9614, "step": 374250 }, { "epoch": 0.3687452219761513, "grad_norm": 2.2455074787139893, "learning_rate": 9.984587969992765e-06, "loss": 2.9816, "step": 374300 }, { "epoch": 0.36879447995397335, "grad_norm": 2.44236421585083, "learning_rate": 9.984575187461863e-06, "loss": 2.9058, "step": 374350 }, { "epoch": 0.3688437379317954, "grad_norm": 2.3136444091796875, "learning_rate": 9.984562399640516e-06, "loss": 2.9997, "step": 374400 }, { "epoch": 0.36889299590961755, "grad_norm": 2.4760191440582275, "learning_rate": 9.98454960652874e-06, "loss": 2.9829, "step": 374450 }, { "epoch": 0.3689422538874396, "grad_norm": 2.095914602279663, "learning_rate": 9.984536808126546e-06, "loss": 2.9528, "step": 374500 }, { "epoch": 0.3689915118652617, "grad_norm": 2.1936299800872803, "learning_rate": 9.984524004433951e-06, "loss": 2.9808, "step": 374550 }, { "epoch": 0.36904076984308376, "grad_norm": 2.119621992111206, "learning_rate": 9.984511195450966e-06, "loss": 2.9285, "step": 374600 }, { "epoch": 0.3690900278209059, "grad_norm": 2.307997703552246, "learning_rate": 9.984498381177605e-06, "loss": 2.9547, "step": 374650 }, { "epoch": 0.36913928579872796, "grad_norm": 2.356783628463745, "learning_rate": 9.984485561613882e-06, "loss": 2.8964, "step": 374700 }, { "epoch": 0.36918854377655, "grad_norm": 2.4519779682159424, "learning_rate": 9.98447273675981e-06, "loss": 2.9427, "step": 374750 }, { "epoch": 0.36923780175437215, "grad_norm": 2.169921875, "learning_rate": 9.984459906615403e-06, "loss": 2.9656, "step": 374800 }, { "epoch": 0.3692870597321942, "grad_norm": 2.3823702335357666, "learning_rate": 9.984447071180675e-06, "loss": 2.9071, "step": 374850 }, { "epoch": 0.3693363177100163, "grad_norm": 2.482151746749878, "learning_rate": 9.98443423045564e-06, "loss": 2.8651, "step": 374900 }, { "epoch": 0.3693855756878384, "grad_norm": 2.277364730834961, "learning_rate": 9.984421384440312e-06, "loss": 2.9314, "step": 374950 }, { "epoch": 0.3694348336656605, "grad_norm": 2.3988230228424072, "learning_rate": 9.984408533134703e-06, "loss": 2.972, "step": 375000 }, { "epoch": 0.36948409164348256, "grad_norm": 2.3329312801361084, "learning_rate": 9.984395676538825e-06, "loss": 3.0057, "step": 375050 }, { "epoch": 0.3695333496213047, "grad_norm": 2.27927303314209, "learning_rate": 9.984382814652695e-06, "loss": 2.9203, "step": 375100 }, { "epoch": 0.36958260759912676, "grad_norm": 2.181896448135376, "learning_rate": 9.984369947476327e-06, "loss": 2.9744, "step": 375150 }, { "epoch": 0.3696318655769488, "grad_norm": 2.4688825607299805, "learning_rate": 9.984357075009732e-06, "loss": 2.9081, "step": 375200 }, { "epoch": 0.36968112355477095, "grad_norm": 2.3291006088256836, "learning_rate": 9.984344197252924e-06, "loss": 2.9749, "step": 375250 }, { "epoch": 0.369730381532593, "grad_norm": 2.0842764377593994, "learning_rate": 9.98433131420592e-06, "loss": 2.9621, "step": 375300 }, { "epoch": 0.3697796395104151, "grad_norm": 2.230250358581543, "learning_rate": 9.984318425868728e-06, "loss": 2.9321, "step": 375350 }, { "epoch": 0.3698288974882372, "grad_norm": 2.2620532512664795, "learning_rate": 9.984305532241368e-06, "loss": 2.957, "step": 375400 }, { "epoch": 0.3698781554660593, "grad_norm": 2.3934977054595947, "learning_rate": 9.984292633323848e-06, "loss": 2.9044, "step": 375450 }, { "epoch": 0.36992741344388136, "grad_norm": 2.244781494140625, "learning_rate": 9.984279729116186e-06, "loss": 2.8956, "step": 375500 }, { "epoch": 0.3699766714217035, "grad_norm": 2.2515761852264404, "learning_rate": 9.984266819618393e-06, "loss": 2.9966, "step": 375550 }, { "epoch": 0.37002592939952555, "grad_norm": 2.2890937328338623, "learning_rate": 9.984253904830483e-06, "loss": 2.9551, "step": 375600 }, { "epoch": 0.3700751873773476, "grad_norm": 2.1997873783111572, "learning_rate": 9.98424098475247e-06, "loss": 2.9423, "step": 375650 }, { "epoch": 0.3701244453551697, "grad_norm": 2.3467485904693604, "learning_rate": 9.98422805938437e-06, "loss": 2.9414, "step": 375700 }, { "epoch": 0.3701737033329918, "grad_norm": 2.1842405796051025, "learning_rate": 9.984215128726195e-06, "loss": 2.9733, "step": 375750 }, { "epoch": 0.3702229613108139, "grad_norm": 2.2888901233673096, "learning_rate": 9.984202192777957e-06, "loss": 2.9493, "step": 375800 }, { "epoch": 0.37027221928863596, "grad_norm": 2.376406192779541, "learning_rate": 9.984189251539671e-06, "loss": 2.9967, "step": 375850 }, { "epoch": 0.3703214772664581, "grad_norm": 2.2927086353302, "learning_rate": 9.984176305011353e-06, "loss": 2.9621, "step": 375900 }, { "epoch": 0.37037073524428016, "grad_norm": 2.871405601501465, "learning_rate": 9.984163353193014e-06, "loss": 2.9996, "step": 375950 }, { "epoch": 0.37041999322210223, "grad_norm": 2.3754570484161377, "learning_rate": 9.984150396084668e-06, "loss": 3.0148, "step": 376000 }, { "epoch": 0.37046925119992435, "grad_norm": 2.215423107147217, "learning_rate": 9.984137433686328e-06, "loss": 2.9743, "step": 376050 }, { "epoch": 0.3705185091777464, "grad_norm": 2.1638331413269043, "learning_rate": 9.98412446599801e-06, "loss": 2.9501, "step": 376100 }, { "epoch": 0.3705677671555685, "grad_norm": 2.2707722187042236, "learning_rate": 9.984111493019726e-06, "loss": 2.8654, "step": 376150 }, { "epoch": 0.3706170251333906, "grad_norm": 2.252742290496826, "learning_rate": 9.984098514751491e-06, "loss": 2.9567, "step": 376200 }, { "epoch": 0.3706662831112127, "grad_norm": 2.2318477630615234, "learning_rate": 9.98408553119332e-06, "loss": 2.9694, "step": 376250 }, { "epoch": 0.37071554108903476, "grad_norm": 2.3586008548736572, "learning_rate": 9.984072542345223e-06, "loss": 2.9453, "step": 376300 }, { "epoch": 0.3707647990668569, "grad_norm": 2.398897409439087, "learning_rate": 9.984059548207217e-06, "loss": 2.939, "step": 376350 }, { "epoch": 0.37081405704467896, "grad_norm": 2.1523945331573486, "learning_rate": 9.984046548779315e-06, "loss": 3.0162, "step": 376400 }, { "epoch": 0.37086331502250103, "grad_norm": 2.14494252204895, "learning_rate": 9.984033544061529e-06, "loss": 2.9228, "step": 376450 }, { "epoch": 0.37091257300032315, "grad_norm": 2.288625478744507, "learning_rate": 9.984020534053874e-06, "loss": 2.9365, "step": 376500 }, { "epoch": 0.3709618309781452, "grad_norm": 2.6372318267822266, "learning_rate": 9.984007518756366e-06, "loss": 2.9431, "step": 376550 }, { "epoch": 0.3710110889559673, "grad_norm": 2.268389940261841, "learning_rate": 9.983994498169016e-06, "loss": 2.9589, "step": 376600 }, { "epoch": 0.3710603469337894, "grad_norm": 2.0807669162750244, "learning_rate": 9.983981472291839e-06, "loss": 2.9207, "step": 376650 }, { "epoch": 0.3711096049116115, "grad_norm": 2.4463517665863037, "learning_rate": 9.983968441124847e-06, "loss": 3.0308, "step": 376700 }, { "epoch": 0.37115886288943356, "grad_norm": 2.3623292446136475, "learning_rate": 9.983955404668056e-06, "loss": 2.9293, "step": 376750 }, { "epoch": 0.3712081208672557, "grad_norm": 2.2119033336639404, "learning_rate": 9.983942362921479e-06, "loss": 2.9782, "step": 376800 }, { "epoch": 0.37125737884507776, "grad_norm": 2.3923895359039307, "learning_rate": 9.98392931588513e-06, "loss": 2.9648, "step": 376850 }, { "epoch": 0.3713066368228998, "grad_norm": 2.366384983062744, "learning_rate": 9.983916263559024e-06, "loss": 2.9679, "step": 376900 }, { "epoch": 0.3713558948007219, "grad_norm": 2.367220878601074, "learning_rate": 9.983903205943174e-06, "loss": 2.9611, "step": 376950 }, { "epoch": 0.371405152778544, "grad_norm": 2.2120461463928223, "learning_rate": 9.983890143037592e-06, "loss": 2.9881, "step": 377000 }, { "epoch": 0.3714544107563661, "grad_norm": 2.301856756210327, "learning_rate": 9.983877074842295e-06, "loss": 2.978, "step": 377050 }, { "epoch": 0.37150366873418816, "grad_norm": 2.3141276836395264, "learning_rate": 9.983864001357293e-06, "loss": 2.9988, "step": 377100 }, { "epoch": 0.3715529267120103, "grad_norm": 2.574843406677246, "learning_rate": 9.983850922582605e-06, "loss": 3.03, "step": 377150 }, { "epoch": 0.37160218468983236, "grad_norm": 2.3167924880981445, "learning_rate": 9.98383783851824e-06, "loss": 3.0272, "step": 377200 }, { "epoch": 0.37165144266765443, "grad_norm": 2.2507688999176025, "learning_rate": 9.983824749164214e-06, "loss": 3.0214, "step": 377250 }, { "epoch": 0.37170070064547656, "grad_norm": 2.3787684440612793, "learning_rate": 9.983811654520542e-06, "loss": 3.0381, "step": 377300 }, { "epoch": 0.3717499586232986, "grad_norm": 2.086843967437744, "learning_rate": 9.983798554587237e-06, "loss": 2.9696, "step": 377350 }, { "epoch": 0.3717992166011207, "grad_norm": 2.3180222511291504, "learning_rate": 9.983785449364312e-06, "loss": 3.0184, "step": 377400 }, { "epoch": 0.3718484745789428, "grad_norm": 2.451256275177002, "learning_rate": 9.98377233885178e-06, "loss": 2.9659, "step": 377450 }, { "epoch": 0.3718977325567649, "grad_norm": 2.152449131011963, "learning_rate": 9.983759223049657e-06, "loss": 2.9323, "step": 377500 }, { "epoch": 0.37194699053458696, "grad_norm": 2.3356964588165283, "learning_rate": 9.983746101957959e-06, "loss": 2.9107, "step": 377550 }, { "epoch": 0.3719962485124091, "grad_norm": 2.23681640625, "learning_rate": 9.983732975576695e-06, "loss": 3.0137, "step": 377600 }, { "epoch": 0.37204550649023116, "grad_norm": 2.232588529586792, "learning_rate": 9.983719843905882e-06, "loss": 2.9537, "step": 377650 }, { "epoch": 0.37209476446805323, "grad_norm": 2.1547937393188477, "learning_rate": 9.983706706945533e-06, "loss": 2.904, "step": 377700 }, { "epoch": 0.37214402244587536, "grad_norm": 2.462756872177124, "learning_rate": 9.983693564695663e-06, "loss": 2.9208, "step": 377750 }, { "epoch": 0.3721932804236974, "grad_norm": 2.170409679412842, "learning_rate": 9.983680417156284e-06, "loss": 2.9105, "step": 377800 }, { "epoch": 0.3722425384015195, "grad_norm": 2.18009877204895, "learning_rate": 9.983667264327411e-06, "loss": 2.9855, "step": 377850 }, { "epoch": 0.3722917963793416, "grad_norm": 2.197504758834839, "learning_rate": 9.983654106209058e-06, "loss": 2.8638, "step": 377900 }, { "epoch": 0.3723410543571637, "grad_norm": 2.2677419185638428, "learning_rate": 9.98364094280124e-06, "loss": 2.9591, "step": 377950 }, { "epoch": 0.37239031233498576, "grad_norm": 2.124849319458008, "learning_rate": 9.983627774103969e-06, "loss": 2.9426, "step": 378000 }, { "epoch": 0.3724395703128079, "grad_norm": 2.1744275093078613, "learning_rate": 9.98361460011726e-06, "loss": 2.9031, "step": 378050 }, { "epoch": 0.37248882829062996, "grad_norm": 2.2415709495544434, "learning_rate": 9.983601420841127e-06, "loss": 2.9301, "step": 378100 }, { "epoch": 0.37253808626845203, "grad_norm": 2.313066244125366, "learning_rate": 9.983588236275583e-06, "loss": 2.9666, "step": 378150 }, { "epoch": 0.3725873442462741, "grad_norm": 2.235607385635376, "learning_rate": 9.983575046420645e-06, "loss": 2.9215, "step": 378200 }, { "epoch": 0.3726366022240962, "grad_norm": 2.7131216526031494, "learning_rate": 9.983561851276322e-06, "loss": 3.0446, "step": 378250 }, { "epoch": 0.3726858602019183, "grad_norm": 2.1737685203552246, "learning_rate": 9.983548650842632e-06, "loss": 2.982, "step": 378300 }, { "epoch": 0.37273511817974037, "grad_norm": 2.439265489578247, "learning_rate": 9.983535445119589e-06, "loss": 2.9811, "step": 378350 }, { "epoch": 0.3727843761575625, "grad_norm": 2.48960018157959, "learning_rate": 9.983522234107205e-06, "loss": 2.9064, "step": 378400 }, { "epoch": 0.37283363413538456, "grad_norm": 3.1482553482055664, "learning_rate": 9.983509017805496e-06, "loss": 2.9722, "step": 378450 }, { "epoch": 0.37288289211320663, "grad_norm": 2.257810354232788, "learning_rate": 9.983495796214474e-06, "loss": 2.9485, "step": 378500 }, { "epoch": 0.37293215009102876, "grad_norm": 2.324298143386841, "learning_rate": 9.983482569334154e-06, "loss": 2.9941, "step": 378550 }, { "epoch": 0.37298140806885083, "grad_norm": 2.148759126663208, "learning_rate": 9.98346933716455e-06, "loss": 2.9585, "step": 378600 }, { "epoch": 0.3730306660466729, "grad_norm": 2.2838656902313232, "learning_rate": 9.983456099705676e-06, "loss": 2.9601, "step": 378650 }, { "epoch": 0.373079924024495, "grad_norm": 2.3178234100341797, "learning_rate": 9.983442856957546e-06, "loss": 2.9077, "step": 378700 }, { "epoch": 0.3731291820023171, "grad_norm": 2.3086631298065186, "learning_rate": 9.983429608920174e-06, "loss": 2.8466, "step": 378750 }, { "epoch": 0.37317843998013916, "grad_norm": 2.237861156463623, "learning_rate": 9.983416355593576e-06, "loss": 2.9318, "step": 378800 }, { "epoch": 0.3732276979579613, "grad_norm": 2.318366527557373, "learning_rate": 9.983403096977763e-06, "loss": 2.8833, "step": 378850 }, { "epoch": 0.37327695593578336, "grad_norm": 2.258074998855591, "learning_rate": 9.98338983307275e-06, "loss": 2.9152, "step": 378900 }, { "epoch": 0.37332621391360543, "grad_norm": 2.2780075073242188, "learning_rate": 9.983376563878552e-06, "loss": 2.9505, "step": 378950 }, { "epoch": 0.37337547189142756, "grad_norm": 2.4153873920440674, "learning_rate": 9.983363289395182e-06, "loss": 2.9706, "step": 379000 }, { "epoch": 0.3734247298692496, "grad_norm": 2.261320114135742, "learning_rate": 9.983350009622655e-06, "loss": 2.9845, "step": 379050 }, { "epoch": 0.3734739878470717, "grad_norm": 2.2231285572052, "learning_rate": 9.983336724560985e-06, "loss": 2.9666, "step": 379100 }, { "epoch": 0.3735232458248938, "grad_norm": 2.301809072494507, "learning_rate": 9.983323434210184e-06, "loss": 2.9267, "step": 379150 }, { "epoch": 0.3735725038027159, "grad_norm": 2.3009023666381836, "learning_rate": 9.98331013857027e-06, "loss": 2.9643, "step": 379200 }, { "epoch": 0.37362176178053796, "grad_norm": 2.296684503555298, "learning_rate": 9.983296837641257e-06, "loss": 2.9944, "step": 379250 }, { "epoch": 0.3736710197583601, "grad_norm": 2.2427263259887695, "learning_rate": 9.983283531423153e-06, "loss": 2.9733, "step": 379300 }, { "epoch": 0.37372027773618216, "grad_norm": 2.1772594451904297, "learning_rate": 9.983270219915977e-06, "loss": 2.981, "step": 379350 }, { "epoch": 0.37376953571400423, "grad_norm": 2.2721621990203857, "learning_rate": 9.983256903119743e-06, "loss": 2.9747, "step": 379400 }, { "epoch": 0.3738187936918263, "grad_norm": 2.5400753021240234, "learning_rate": 9.983243581034467e-06, "loss": 2.954, "step": 379450 }, { "epoch": 0.3738680516696484, "grad_norm": 2.2973482608795166, "learning_rate": 9.983230253660157e-06, "loss": 2.9293, "step": 379500 }, { "epoch": 0.3739173096474705, "grad_norm": 2.349046230316162, "learning_rate": 9.983216920996833e-06, "loss": 2.9491, "step": 379550 }, { "epoch": 0.37396656762529257, "grad_norm": 2.2408735752105713, "learning_rate": 9.983203583044505e-06, "loss": 2.9659, "step": 379600 }, { "epoch": 0.3740158256031147, "grad_norm": 2.256530284881592, "learning_rate": 9.983190239803192e-06, "loss": 2.9685, "step": 379650 }, { "epoch": 0.37406508358093676, "grad_norm": 2.320786476135254, "learning_rate": 9.983176891272903e-06, "loss": 2.9333, "step": 379700 }, { "epoch": 0.37411434155875883, "grad_norm": 2.120586395263672, "learning_rate": 9.983163537453655e-06, "loss": 2.955, "step": 379750 }, { "epoch": 0.37416359953658096, "grad_norm": 2.2356503009796143, "learning_rate": 9.983150178345463e-06, "loss": 2.9932, "step": 379800 }, { "epoch": 0.37421285751440303, "grad_norm": 2.387631893157959, "learning_rate": 9.983136813948338e-06, "loss": 2.9027, "step": 379850 }, { "epoch": 0.3742621154922251, "grad_norm": 2.155264377593994, "learning_rate": 9.983123444262297e-06, "loss": 2.9777, "step": 379900 }, { "epoch": 0.3743113734700472, "grad_norm": 2.4461753368377686, "learning_rate": 9.983110069287353e-06, "loss": 3.0014, "step": 379950 }, { "epoch": 0.3743606314478693, "grad_norm": 2.2724449634552, "learning_rate": 9.98309668902352e-06, "loss": 2.9499, "step": 380000 }, { "epoch": 0.37440988942569137, "grad_norm": 2.389441967010498, "learning_rate": 9.983083303470813e-06, "loss": 2.9624, "step": 380050 }, { "epoch": 0.3744591474035135, "grad_norm": 2.2443060874938965, "learning_rate": 9.983069912629245e-06, "loss": 2.8841, "step": 380100 }, { "epoch": 0.37450840538133556, "grad_norm": 2.349874496459961, "learning_rate": 9.983056516498832e-06, "loss": 2.8847, "step": 380150 }, { "epoch": 0.37455766335915763, "grad_norm": 2.331456184387207, "learning_rate": 9.983043115079587e-06, "loss": 2.9813, "step": 380200 }, { "epoch": 0.37460692133697976, "grad_norm": 2.2243292331695557, "learning_rate": 9.983029708371523e-06, "loss": 2.8511, "step": 380250 }, { "epoch": 0.37465617931480183, "grad_norm": 2.399918794631958, "learning_rate": 9.983016296374658e-06, "loss": 2.9639, "step": 380300 }, { "epoch": 0.3747054372926239, "grad_norm": 2.401742696762085, "learning_rate": 9.983002879089002e-06, "loss": 2.9453, "step": 380350 }, { "epoch": 0.374754695270446, "grad_norm": 2.1671876907348633, "learning_rate": 9.982989456514572e-06, "loss": 2.9845, "step": 380400 }, { "epoch": 0.3748039532482681, "grad_norm": 2.22387433052063, "learning_rate": 9.982976028651383e-06, "loss": 2.9935, "step": 380450 }, { "epoch": 0.37485321122609017, "grad_norm": 2.282721519470215, "learning_rate": 9.982962595499444e-06, "loss": 2.9534, "step": 380500 }, { "epoch": 0.3749024692039123, "grad_norm": 2.334825038909912, "learning_rate": 9.982949157058777e-06, "loss": 2.9438, "step": 380550 }, { "epoch": 0.37495172718173436, "grad_norm": 2.341357469558716, "learning_rate": 9.982935713329389e-06, "loss": 2.9912, "step": 380600 }, { "epoch": 0.37500098515955643, "grad_norm": 2.213778257369995, "learning_rate": 9.982922264311297e-06, "loss": 2.9659, "step": 380650 }, { "epoch": 0.3750502431373785, "grad_norm": 2.7562761306762695, "learning_rate": 9.982908810004518e-06, "loss": 2.8802, "step": 380700 }, { "epoch": 0.37509950111520063, "grad_norm": 2.3115639686584473, "learning_rate": 9.982895350409065e-06, "loss": 2.9607, "step": 380750 }, { "epoch": 0.3751487590930227, "grad_norm": 2.305588483810425, "learning_rate": 9.982881885524949e-06, "loss": 2.9551, "step": 380800 }, { "epoch": 0.37519801707084477, "grad_norm": 2.3463213443756104, "learning_rate": 9.982868415352187e-06, "loss": 2.9875, "step": 380850 }, { "epoch": 0.3752472750486669, "grad_norm": 2.2627429962158203, "learning_rate": 9.982854939890794e-06, "loss": 2.9236, "step": 380900 }, { "epoch": 0.37529653302648897, "grad_norm": 2.2784066200256348, "learning_rate": 9.982841459140781e-06, "loss": 2.9761, "step": 380950 }, { "epoch": 0.37534579100431104, "grad_norm": 2.6127262115478516, "learning_rate": 9.982827973102166e-06, "loss": 2.9523, "step": 381000 }, { "epoch": 0.37539504898213316, "grad_norm": 2.474867820739746, "learning_rate": 9.982814481774961e-06, "loss": 2.9892, "step": 381050 }, { "epoch": 0.37544430695995523, "grad_norm": 2.3431968688964844, "learning_rate": 9.982800985159181e-06, "loss": 2.9654, "step": 381100 }, { "epoch": 0.3754935649377773, "grad_norm": 2.230130195617676, "learning_rate": 9.982787483254843e-06, "loss": 2.9834, "step": 381150 }, { "epoch": 0.37554282291559943, "grad_norm": 2.3372015953063965, "learning_rate": 9.982773976061955e-06, "loss": 2.955, "step": 381200 }, { "epoch": 0.3755920808934215, "grad_norm": 2.236506700515747, "learning_rate": 9.982760463580538e-06, "loss": 2.9096, "step": 381250 }, { "epoch": 0.37564133887124357, "grad_norm": 2.1727795600891113, "learning_rate": 9.982746945810602e-06, "loss": 3.0035, "step": 381300 }, { "epoch": 0.3756905968490657, "grad_norm": 2.188779592514038, "learning_rate": 9.982733422752162e-06, "loss": 2.9821, "step": 381350 }, { "epoch": 0.37573985482688776, "grad_norm": 2.1022939682006836, "learning_rate": 9.982719894405235e-06, "loss": 2.9635, "step": 381400 }, { "epoch": 0.37578911280470984, "grad_norm": 2.4634947776794434, "learning_rate": 9.982706360769833e-06, "loss": 3.0527, "step": 381450 }, { "epoch": 0.37583837078253196, "grad_norm": 2.3958916664123535, "learning_rate": 9.98269282184597e-06, "loss": 2.9836, "step": 381500 }, { "epoch": 0.37588762876035403, "grad_norm": 2.3277928829193115, "learning_rate": 9.982679277633662e-06, "loss": 2.8822, "step": 381550 }, { "epoch": 0.3759368867381761, "grad_norm": 2.15012788772583, "learning_rate": 9.982665728132923e-06, "loss": 2.9239, "step": 381600 }, { "epoch": 0.3759861447159982, "grad_norm": 2.4942073822021484, "learning_rate": 9.982652173343765e-06, "loss": 2.9144, "step": 381650 }, { "epoch": 0.3760354026938203, "grad_norm": 2.2041287422180176, "learning_rate": 9.982638613266206e-06, "loss": 2.9738, "step": 381700 }, { "epoch": 0.37608466067164237, "grad_norm": 2.3448901176452637, "learning_rate": 9.982625047900259e-06, "loss": 2.9674, "step": 381750 }, { "epoch": 0.3761339186494645, "grad_norm": 2.1779625415802, "learning_rate": 9.982611477245939e-06, "loss": 2.9622, "step": 381800 }, { "epoch": 0.37618317662728656, "grad_norm": 2.3161866664886475, "learning_rate": 9.982597901303258e-06, "loss": 2.9463, "step": 381850 }, { "epoch": 0.37623243460510863, "grad_norm": 2.3850579261779785, "learning_rate": 9.98258432007223e-06, "loss": 2.9491, "step": 381900 }, { "epoch": 0.3762816925829307, "grad_norm": 2.595911979675293, "learning_rate": 9.982570733552877e-06, "loss": 2.9195, "step": 381950 }, { "epoch": 0.37633095056075283, "grad_norm": 2.4508371353149414, "learning_rate": 9.982557141745203e-06, "loss": 2.9322, "step": 382000 }, { "epoch": 0.3763802085385749, "grad_norm": 2.613887310028076, "learning_rate": 9.982543544649228e-06, "loss": 2.8852, "step": 382050 }, { "epoch": 0.37642946651639697, "grad_norm": 2.1819987297058105, "learning_rate": 9.982529942264967e-06, "loss": 2.9761, "step": 382100 }, { "epoch": 0.3764787244942191, "grad_norm": 2.2993366718292236, "learning_rate": 9.982516334592433e-06, "loss": 2.8982, "step": 382150 }, { "epoch": 0.37652798247204117, "grad_norm": 2.30232572555542, "learning_rate": 9.98250272163164e-06, "loss": 2.8999, "step": 382200 }, { "epoch": 0.37657724044986324, "grad_norm": 2.351127862930298, "learning_rate": 9.982489103382602e-06, "loss": 2.9606, "step": 382250 }, { "epoch": 0.37662649842768536, "grad_norm": 2.365689516067505, "learning_rate": 9.982475479845337e-06, "loss": 2.9566, "step": 382300 }, { "epoch": 0.37667575640550743, "grad_norm": 2.505145311355591, "learning_rate": 9.982461851019855e-06, "loss": 2.9188, "step": 382350 }, { "epoch": 0.3767250143833295, "grad_norm": 2.3287723064422607, "learning_rate": 9.982448216906174e-06, "loss": 2.9526, "step": 382400 }, { "epoch": 0.37677427236115163, "grad_norm": 2.4133005142211914, "learning_rate": 9.982434577504305e-06, "loss": 2.9188, "step": 382450 }, { "epoch": 0.3768235303389737, "grad_norm": 2.5473077297210693, "learning_rate": 9.982420932814265e-06, "loss": 2.9247, "step": 382500 }, { "epoch": 0.37687278831679577, "grad_norm": 2.6773362159729004, "learning_rate": 9.982407282836067e-06, "loss": 3.0252, "step": 382550 }, { "epoch": 0.3769220462946179, "grad_norm": 2.104987144470215, "learning_rate": 9.982393627569728e-06, "loss": 2.9268, "step": 382600 }, { "epoch": 0.37697130427243997, "grad_norm": 2.200910806655884, "learning_rate": 9.98237996701526e-06, "loss": 3.0066, "step": 382650 }, { "epoch": 0.37702056225026204, "grad_norm": 2.27367901802063, "learning_rate": 9.98236630117268e-06, "loss": 2.9697, "step": 382700 }, { "epoch": 0.37706982022808416, "grad_norm": 2.525668144226074, "learning_rate": 9.982352630041998e-06, "loss": 2.9623, "step": 382750 }, { "epoch": 0.37711907820590623, "grad_norm": 2.3522837162017822, "learning_rate": 9.982338953623231e-06, "loss": 2.931, "step": 382800 }, { "epoch": 0.3771683361837283, "grad_norm": 2.35286545753479, "learning_rate": 9.982325271916395e-06, "loss": 2.9289, "step": 382850 }, { "epoch": 0.37721759416155043, "grad_norm": 2.290729522705078, "learning_rate": 9.982311584921505e-06, "loss": 2.9473, "step": 382900 }, { "epoch": 0.3772668521393725, "grad_norm": 2.3276634216308594, "learning_rate": 9.982297892638571e-06, "loss": 2.9352, "step": 382950 }, { "epoch": 0.37731611011719457, "grad_norm": 2.5626871585845947, "learning_rate": 9.982284195067612e-06, "loss": 2.9355, "step": 383000 }, { "epoch": 0.3773653680950167, "grad_norm": 2.207639694213867, "learning_rate": 9.98227049220864e-06, "loss": 2.9153, "step": 383050 }, { "epoch": 0.37741462607283877, "grad_norm": 2.305419921875, "learning_rate": 9.982256784061673e-06, "loss": 3.0122, "step": 383100 }, { "epoch": 0.37746388405066084, "grad_norm": 2.1661837100982666, "learning_rate": 9.98224307062672e-06, "loss": 2.9663, "step": 383150 }, { "epoch": 0.3775131420284829, "grad_norm": 2.267343282699585, "learning_rate": 9.9822293519038e-06, "loss": 2.9553, "step": 383200 }, { "epoch": 0.37756240000630503, "grad_norm": 2.2342965602874756, "learning_rate": 9.982215627892925e-06, "loss": 2.9212, "step": 383250 }, { "epoch": 0.3776116579841271, "grad_norm": 2.263211488723755, "learning_rate": 9.982201898594113e-06, "loss": 2.919, "step": 383300 }, { "epoch": 0.3776609159619492, "grad_norm": 2.8387789726257324, "learning_rate": 9.982188164007374e-06, "loss": 2.9201, "step": 383350 }, { "epoch": 0.3777101739397713, "grad_norm": 2.2591090202331543, "learning_rate": 9.982174424132727e-06, "loss": 3.0324, "step": 383400 }, { "epoch": 0.37775943191759337, "grad_norm": 3.034109354019165, "learning_rate": 9.982160678970182e-06, "loss": 2.9244, "step": 383450 }, { "epoch": 0.37780868989541544, "grad_norm": 2.061087131500244, "learning_rate": 9.982146928519758e-06, "loss": 3.0109, "step": 383500 }, { "epoch": 0.37785794787323757, "grad_norm": 2.2751572132110596, "learning_rate": 9.982133172781468e-06, "loss": 3.0019, "step": 383550 }, { "epoch": 0.37790720585105964, "grad_norm": 2.3569588661193848, "learning_rate": 9.982119411755324e-06, "loss": 2.9185, "step": 383600 }, { "epoch": 0.3779564638288817, "grad_norm": 2.2831270694732666, "learning_rate": 9.982105645441345e-06, "loss": 2.9403, "step": 383650 }, { "epoch": 0.37800572180670383, "grad_norm": 2.432507038116455, "learning_rate": 9.982091873839542e-06, "loss": 2.9767, "step": 383700 }, { "epoch": 0.3780549797845259, "grad_norm": 2.270585536956787, "learning_rate": 9.982078096949931e-06, "loss": 2.929, "step": 383750 }, { "epoch": 0.37810423776234797, "grad_norm": 2.340792417526245, "learning_rate": 9.982064314772528e-06, "loss": 2.9184, "step": 383800 }, { "epoch": 0.3781534957401701, "grad_norm": 2.3089921474456787, "learning_rate": 9.982050527307345e-06, "loss": 2.8877, "step": 383850 }, { "epoch": 0.37820275371799217, "grad_norm": 2.2184929847717285, "learning_rate": 9.982036734554397e-06, "loss": 2.9103, "step": 383900 }, { "epoch": 0.37825201169581424, "grad_norm": 2.3943469524383545, "learning_rate": 9.982022936513703e-06, "loss": 2.9159, "step": 383950 }, { "epoch": 0.37830126967363636, "grad_norm": 2.278113842010498, "learning_rate": 9.982009133185272e-06, "loss": 2.928, "step": 384000 }, { "epoch": 0.37835052765145843, "grad_norm": 2.2057759761810303, "learning_rate": 9.98199532456912e-06, "loss": 2.9584, "step": 384050 }, { "epoch": 0.3783997856292805, "grad_norm": 2.3828728199005127, "learning_rate": 9.981981510665264e-06, "loss": 2.9749, "step": 384100 }, { "epoch": 0.37844904360710263, "grad_norm": 2.357083797454834, "learning_rate": 9.981967691473717e-06, "loss": 2.9828, "step": 384150 }, { "epoch": 0.3784983015849247, "grad_norm": 2.1632978916168213, "learning_rate": 9.981953866994493e-06, "loss": 3.0096, "step": 384200 }, { "epoch": 0.37854755956274677, "grad_norm": 2.434342384338379, "learning_rate": 9.981940037227607e-06, "loss": 2.9527, "step": 384250 }, { "epoch": 0.3785968175405689, "grad_norm": 2.4327685832977295, "learning_rate": 9.981926202173078e-06, "loss": 2.941, "step": 384300 }, { "epoch": 0.37864607551839097, "grad_norm": 2.2512011528015137, "learning_rate": 9.981912361830913e-06, "loss": 2.9878, "step": 384350 }, { "epoch": 0.37869533349621304, "grad_norm": 2.3989973068237305, "learning_rate": 9.981898516201134e-06, "loss": 2.8809, "step": 384400 }, { "epoch": 0.3787445914740351, "grad_norm": 2.205296277999878, "learning_rate": 9.981884665283749e-06, "loss": 2.8709, "step": 384450 }, { "epoch": 0.37879384945185723, "grad_norm": 2.297117233276367, "learning_rate": 9.981870809078776e-06, "loss": 2.9554, "step": 384500 }, { "epoch": 0.3788431074296793, "grad_norm": 2.2439632415771484, "learning_rate": 9.981856947586232e-06, "loss": 2.9371, "step": 384550 }, { "epoch": 0.3788923654075014, "grad_norm": 2.375781536102295, "learning_rate": 9.98184308080613e-06, "loss": 2.9318, "step": 384600 }, { "epoch": 0.3789416233853235, "grad_norm": 2.498847723007202, "learning_rate": 9.981829208738482e-06, "loss": 2.9316, "step": 384650 }, { "epoch": 0.37899088136314557, "grad_norm": 2.691211700439453, "learning_rate": 9.981815331383303e-06, "loss": 2.9711, "step": 384700 }, { "epoch": 0.37904013934096764, "grad_norm": 2.297635316848755, "learning_rate": 9.981801448740613e-06, "loss": 2.9477, "step": 384750 }, { "epoch": 0.37908939731878977, "grad_norm": 2.405432939529419, "learning_rate": 9.981787560810424e-06, "loss": 2.9108, "step": 384800 }, { "epoch": 0.37913865529661184, "grad_norm": 2.379842758178711, "learning_rate": 9.981773667592747e-06, "loss": 2.9015, "step": 384850 }, { "epoch": 0.3791879132744339, "grad_norm": 2.2931039333343506, "learning_rate": 9.981759769087601e-06, "loss": 2.9823, "step": 384900 }, { "epoch": 0.37923717125225603, "grad_norm": 2.2205679416656494, "learning_rate": 9.981745865294999e-06, "loss": 2.9363, "step": 384950 }, { "epoch": 0.3792864292300781, "grad_norm": 2.728055238723755, "learning_rate": 9.981731956214957e-06, "loss": 2.9761, "step": 385000 }, { "epoch": 0.3793356872079002, "grad_norm": 2.280641794204712, "learning_rate": 9.981718041847489e-06, "loss": 3.0008, "step": 385050 }, { "epoch": 0.3793849451857223, "grad_norm": 2.257351875305176, "learning_rate": 9.981704122192611e-06, "loss": 2.9602, "step": 385100 }, { "epoch": 0.37943420316354437, "grad_norm": 3.0179498195648193, "learning_rate": 9.981690197250333e-06, "loss": 2.9687, "step": 385150 }, { "epoch": 0.37948346114136644, "grad_norm": 2.336813449859619, "learning_rate": 9.981676267020678e-06, "loss": 2.9693, "step": 385200 }, { "epoch": 0.37953271911918857, "grad_norm": 2.234058141708374, "learning_rate": 9.981662331503655e-06, "loss": 2.9492, "step": 385250 }, { "epoch": 0.37958197709701064, "grad_norm": 2.5932741165161133, "learning_rate": 9.981648390699277e-06, "loss": 2.8995, "step": 385300 }, { "epoch": 0.3796312350748327, "grad_norm": 2.118471622467041, "learning_rate": 9.981634444607564e-06, "loss": 2.9583, "step": 385350 }, { "epoch": 0.37968049305265483, "grad_norm": 2.3443875312805176, "learning_rate": 9.981620493228528e-06, "loss": 2.9432, "step": 385400 }, { "epoch": 0.3797297510304769, "grad_norm": 2.5241475105285645, "learning_rate": 9.981606536562185e-06, "loss": 2.9689, "step": 385450 }, { "epoch": 0.379779009008299, "grad_norm": 2.3216278553009033, "learning_rate": 9.981592574608548e-06, "loss": 2.9448, "step": 385500 }, { "epoch": 0.3798282669861211, "grad_norm": 2.2835676670074463, "learning_rate": 9.981578607367635e-06, "loss": 2.9422, "step": 385550 }, { "epoch": 0.37987752496394317, "grad_norm": 2.383307933807373, "learning_rate": 9.981564634839457e-06, "loss": 2.892, "step": 385600 }, { "epoch": 0.37992678294176524, "grad_norm": 2.352965831756592, "learning_rate": 9.981550657024032e-06, "loss": 2.9538, "step": 385650 }, { "epoch": 0.3799760409195873, "grad_norm": 2.384941339492798, "learning_rate": 9.981536673921372e-06, "loss": 2.9642, "step": 385700 }, { "epoch": 0.38002529889740944, "grad_norm": 2.2934842109680176, "learning_rate": 9.981522685531494e-06, "loss": 2.9216, "step": 385750 }, { "epoch": 0.3800745568752315, "grad_norm": 2.301699161529541, "learning_rate": 9.981508691854414e-06, "loss": 2.9439, "step": 385800 }, { "epoch": 0.3801238148530536, "grad_norm": 2.949601173400879, "learning_rate": 9.981494692890142e-06, "loss": 2.9173, "step": 385850 }, { "epoch": 0.3801730728308757, "grad_norm": 2.270137071609497, "learning_rate": 9.981480688638697e-06, "loss": 2.9662, "step": 385900 }, { "epoch": 0.3802223308086978, "grad_norm": 2.2989695072174072, "learning_rate": 9.981466679100093e-06, "loss": 2.9579, "step": 385950 }, { "epoch": 0.38027158878651984, "grad_norm": 2.3157379627227783, "learning_rate": 9.981452664274345e-06, "loss": 2.9569, "step": 386000 }, { "epoch": 0.38032084676434197, "grad_norm": 2.313140392303467, "learning_rate": 9.981438644161466e-06, "loss": 2.9406, "step": 386050 }, { "epoch": 0.38037010474216404, "grad_norm": 2.3458917140960693, "learning_rate": 9.981424618761474e-06, "loss": 2.8884, "step": 386100 }, { "epoch": 0.3804193627199861, "grad_norm": 2.3566503524780273, "learning_rate": 9.981410588074382e-06, "loss": 2.9198, "step": 386150 }, { "epoch": 0.38046862069780824, "grad_norm": 2.458625316619873, "learning_rate": 9.981396552100205e-06, "loss": 2.924, "step": 386200 }, { "epoch": 0.3805178786756303, "grad_norm": 2.2177822589874268, "learning_rate": 9.981382510838958e-06, "loss": 2.9881, "step": 386250 }, { "epoch": 0.3805671366534524, "grad_norm": 2.2473883628845215, "learning_rate": 9.981368464290656e-06, "loss": 2.8866, "step": 386300 }, { "epoch": 0.3806163946312745, "grad_norm": 2.1755101680755615, "learning_rate": 9.981354412455314e-06, "loss": 2.93, "step": 386350 }, { "epoch": 0.38066565260909657, "grad_norm": 2.4807536602020264, "learning_rate": 9.981340355332947e-06, "loss": 2.9347, "step": 386400 }, { "epoch": 0.38071491058691864, "grad_norm": 2.348172187805176, "learning_rate": 9.98132629292357e-06, "loss": 3.0173, "step": 386450 }, { "epoch": 0.38076416856474077, "grad_norm": 2.2987067699432373, "learning_rate": 9.981312225227196e-06, "loss": 2.9281, "step": 386500 }, { "epoch": 0.38081342654256284, "grad_norm": 2.388678789138794, "learning_rate": 9.981298152243842e-06, "loss": 2.9713, "step": 386550 }, { "epoch": 0.3808626845203849, "grad_norm": 2.2215285301208496, "learning_rate": 9.981284073973522e-06, "loss": 2.9481, "step": 386600 }, { "epoch": 0.38091194249820703, "grad_norm": 2.215832471847534, "learning_rate": 9.981269990416253e-06, "loss": 2.9948, "step": 386650 }, { "epoch": 0.3809612004760291, "grad_norm": 2.3434252738952637, "learning_rate": 9.98125590157205e-06, "loss": 2.8829, "step": 386700 }, { "epoch": 0.3810104584538512, "grad_norm": 2.145595073699951, "learning_rate": 9.981241807440922e-06, "loss": 2.936, "step": 386750 }, { "epoch": 0.3810597164316733, "grad_norm": 2.2813658714294434, "learning_rate": 9.98122770802289e-06, "loss": 2.9104, "step": 386800 }, { "epoch": 0.38110897440949537, "grad_norm": 2.4013724327087402, "learning_rate": 9.981213603317969e-06, "loss": 2.8793, "step": 386850 }, { "epoch": 0.38115823238731744, "grad_norm": 2.357405424118042, "learning_rate": 9.98119949332617e-06, "loss": 2.9712, "step": 386900 }, { "epoch": 0.3812074903651395, "grad_norm": 2.4944064617156982, "learning_rate": 9.981185378047512e-06, "loss": 2.8684, "step": 386950 }, { "epoch": 0.38125674834296164, "grad_norm": 2.2425084114074707, "learning_rate": 9.981171257482006e-06, "loss": 2.8901, "step": 387000 }, { "epoch": 0.3813060063207837, "grad_norm": 2.3172616958618164, "learning_rate": 9.981157131629671e-06, "loss": 3.005, "step": 387050 }, { "epoch": 0.3813552642986058, "grad_norm": 2.245866060256958, "learning_rate": 9.98114300049052e-06, "loss": 2.9596, "step": 387100 }, { "epoch": 0.3814045222764279, "grad_norm": 2.287627696990967, "learning_rate": 9.981128864064567e-06, "loss": 2.894, "step": 387150 }, { "epoch": 0.38145378025425, "grad_norm": 2.3936431407928467, "learning_rate": 9.98111472235183e-06, "loss": 2.9409, "step": 387200 }, { "epoch": 0.38150303823207204, "grad_norm": 2.4696266651153564, "learning_rate": 9.98110057535232e-06, "loss": 2.9893, "step": 387250 }, { "epoch": 0.38155229620989417, "grad_norm": 2.525493621826172, "learning_rate": 9.981086423066057e-06, "loss": 2.9332, "step": 387300 }, { "epoch": 0.38160155418771624, "grad_norm": 2.131305694580078, "learning_rate": 9.98107226549305e-06, "loss": 2.9005, "step": 387350 }, { "epoch": 0.3816508121655383, "grad_norm": 2.3775036334991455, "learning_rate": 9.981058102633319e-06, "loss": 2.8762, "step": 387400 }, { "epoch": 0.38170007014336044, "grad_norm": 2.217301368713379, "learning_rate": 9.981043934486877e-06, "loss": 2.9507, "step": 387450 }, { "epoch": 0.3817493281211825, "grad_norm": 2.307715654373169, "learning_rate": 9.98102976105374e-06, "loss": 2.9253, "step": 387500 }, { "epoch": 0.3817985860990046, "grad_norm": 2.32776141166687, "learning_rate": 9.98101558233392e-06, "loss": 2.9376, "step": 387550 }, { "epoch": 0.3818478440768267, "grad_norm": 2.397231340408325, "learning_rate": 9.981001398327438e-06, "loss": 2.8954, "step": 387600 }, { "epoch": 0.3818971020546488, "grad_norm": 2.1324849128723145, "learning_rate": 9.980987209034303e-06, "loss": 2.9716, "step": 387650 }, { "epoch": 0.38194636003247084, "grad_norm": 2.6258606910705566, "learning_rate": 9.980973014454534e-06, "loss": 2.9195, "step": 387700 }, { "epoch": 0.38199561801029297, "grad_norm": 2.2931880950927734, "learning_rate": 9.980958814588143e-06, "loss": 2.909, "step": 387750 }, { "epoch": 0.38204487598811504, "grad_norm": 2.3988630771636963, "learning_rate": 9.980944609435147e-06, "loss": 2.9345, "step": 387800 }, { "epoch": 0.3820941339659371, "grad_norm": 2.296689510345459, "learning_rate": 9.980930398995563e-06, "loss": 2.9057, "step": 387850 }, { "epoch": 0.38214339194375924, "grad_norm": 2.304396152496338, "learning_rate": 9.9809161832694e-06, "loss": 2.9478, "step": 387900 }, { "epoch": 0.3821926499215813, "grad_norm": 2.404599905014038, "learning_rate": 9.98090196225668e-06, "loss": 2.9499, "step": 387950 }, { "epoch": 0.3822419078994034, "grad_norm": 2.4150235652923584, "learning_rate": 9.980887735957413e-06, "loss": 2.9602, "step": 388000 }, { "epoch": 0.3822911658772255, "grad_norm": 2.237356662750244, "learning_rate": 9.980873504371615e-06, "loss": 2.9024, "step": 388050 }, { "epoch": 0.3823404238550476, "grad_norm": 2.144479751586914, "learning_rate": 9.980859267499306e-06, "loss": 2.9968, "step": 388100 }, { "epoch": 0.38238968183286964, "grad_norm": 2.637641668319702, "learning_rate": 9.980845025340494e-06, "loss": 2.8828, "step": 388150 }, { "epoch": 0.3824389398106917, "grad_norm": 2.304641008377075, "learning_rate": 9.9808307778952e-06, "loss": 2.9123, "step": 388200 }, { "epoch": 0.38248819778851384, "grad_norm": 2.3309876918792725, "learning_rate": 9.980816525163436e-06, "loss": 2.9351, "step": 388250 }, { "epoch": 0.3825374557663359, "grad_norm": 2.467850685119629, "learning_rate": 9.980802267145216e-06, "loss": 2.9444, "step": 388300 }, { "epoch": 0.382586713744158, "grad_norm": 2.334174156188965, "learning_rate": 9.980788003840557e-06, "loss": 2.911, "step": 388350 }, { "epoch": 0.3826359717219801, "grad_norm": 2.2261812686920166, "learning_rate": 9.980773735249476e-06, "loss": 2.9438, "step": 388400 }, { "epoch": 0.3826852296998022, "grad_norm": 2.416165590286255, "learning_rate": 9.980759461371984e-06, "loss": 2.9935, "step": 388450 }, { "epoch": 0.38273448767762425, "grad_norm": 2.4472458362579346, "learning_rate": 9.9807451822081e-06, "loss": 2.9617, "step": 388500 }, { "epoch": 0.3827837456554464, "grad_norm": 2.564047336578369, "learning_rate": 9.980730897757837e-06, "loss": 2.9265, "step": 388550 }, { "epoch": 0.38283300363326844, "grad_norm": 2.3975324630737305, "learning_rate": 9.98071660802121e-06, "loss": 2.9481, "step": 388600 }, { "epoch": 0.3828822616110905, "grad_norm": 2.257317304611206, "learning_rate": 9.980702312998235e-06, "loss": 2.931, "step": 388650 }, { "epoch": 0.38293151958891264, "grad_norm": 2.3408148288726807, "learning_rate": 9.980688012688926e-06, "loss": 2.8911, "step": 388700 }, { "epoch": 0.3829807775667347, "grad_norm": 2.4286556243896484, "learning_rate": 9.9806737070933e-06, "loss": 2.8983, "step": 388750 }, { "epoch": 0.3830300355445568, "grad_norm": 2.3013904094696045, "learning_rate": 9.98065939621137e-06, "loss": 2.9112, "step": 388800 }, { "epoch": 0.3830792935223789, "grad_norm": 2.1755692958831787, "learning_rate": 9.980645080043154e-06, "loss": 2.9062, "step": 388850 }, { "epoch": 0.383128551500201, "grad_norm": 2.4384419918060303, "learning_rate": 9.980630758588665e-06, "loss": 2.9605, "step": 388900 }, { "epoch": 0.38317780947802305, "grad_norm": 2.3144309520721436, "learning_rate": 9.98061643184792e-06, "loss": 2.9, "step": 388950 }, { "epoch": 0.38322706745584517, "grad_norm": 2.4883787631988525, "learning_rate": 9.980602099820932e-06, "loss": 2.9426, "step": 389000 }, { "epoch": 0.38327632543366724, "grad_norm": 2.3406410217285156, "learning_rate": 9.980587762507716e-06, "loss": 2.9644, "step": 389050 }, { "epoch": 0.3833255834114893, "grad_norm": 2.0897815227508545, "learning_rate": 9.980573419908291e-06, "loss": 2.9962, "step": 389100 }, { "epoch": 0.38337484138931144, "grad_norm": 2.359740972518921, "learning_rate": 9.980559072022668e-06, "loss": 2.9128, "step": 389150 }, { "epoch": 0.3834240993671335, "grad_norm": 2.1455585956573486, "learning_rate": 9.980544718850864e-06, "loss": 2.918, "step": 389200 }, { "epoch": 0.3834733573449556, "grad_norm": 2.3495569229125977, "learning_rate": 9.980530360392894e-06, "loss": 2.8974, "step": 389250 }, { "epoch": 0.3835226153227777, "grad_norm": 2.241229772567749, "learning_rate": 9.980515996648776e-06, "loss": 2.9793, "step": 389300 }, { "epoch": 0.3835718733005998, "grad_norm": 2.5353002548217773, "learning_rate": 9.98050162761852e-06, "loss": 2.9812, "step": 389350 }, { "epoch": 0.38362113127842185, "grad_norm": 2.265974283218384, "learning_rate": 9.980487253302145e-06, "loss": 2.8892, "step": 389400 }, { "epoch": 0.3836703892562439, "grad_norm": 2.4591147899627686, "learning_rate": 9.980472873699663e-06, "loss": 2.9266, "step": 389450 }, { "epoch": 0.38371964723406604, "grad_norm": 2.5734171867370605, "learning_rate": 9.980458488811093e-06, "loss": 2.8837, "step": 389500 }, { "epoch": 0.3837689052118881, "grad_norm": 2.7331769466400146, "learning_rate": 9.98044409863645e-06, "loss": 2.949, "step": 389550 }, { "epoch": 0.3838181631897102, "grad_norm": 2.399404287338257, "learning_rate": 9.980429703175746e-06, "loss": 2.9357, "step": 389600 }, { "epoch": 0.3838674211675323, "grad_norm": 2.356882095336914, "learning_rate": 9.980415302428998e-06, "loss": 2.9902, "step": 389650 }, { "epoch": 0.3839166791453544, "grad_norm": 2.3405539989471436, "learning_rate": 9.980400896396223e-06, "loss": 2.9746, "step": 389700 }, { "epoch": 0.38396593712317645, "grad_norm": 2.1536624431610107, "learning_rate": 9.980386485077434e-06, "loss": 2.9951, "step": 389750 }, { "epoch": 0.3840151951009986, "grad_norm": 2.2097647190093994, "learning_rate": 9.98037206847265e-06, "loss": 2.9882, "step": 389800 }, { "epoch": 0.38406445307882064, "grad_norm": 2.235062599182129, "learning_rate": 9.98035764658188e-06, "loss": 2.9422, "step": 389850 }, { "epoch": 0.3841137110566427, "grad_norm": 2.4241256713867188, "learning_rate": 9.980343219405143e-06, "loss": 2.968, "step": 389900 }, { "epoch": 0.38416296903446484, "grad_norm": 2.419393301010132, "learning_rate": 9.980328786942455e-06, "loss": 2.9558, "step": 389950 }, { "epoch": 0.3842122270122869, "grad_norm": 2.4655115604400635, "learning_rate": 9.980314349193828e-06, "loss": 2.939, "step": 390000 }, { "epoch": 0.384261484990109, "grad_norm": 2.3204212188720703, "learning_rate": 9.980299906159283e-06, "loss": 2.9337, "step": 390050 }, { "epoch": 0.3843107429679311, "grad_norm": 2.180722713470459, "learning_rate": 9.98028545783883e-06, "loss": 2.9663, "step": 390100 }, { "epoch": 0.3843600009457532, "grad_norm": 2.111448049545288, "learning_rate": 9.980271004232487e-06, "loss": 2.9666, "step": 390150 }, { "epoch": 0.38440925892357525, "grad_norm": 2.310415744781494, "learning_rate": 9.980256545340269e-06, "loss": 2.9857, "step": 390200 }, { "epoch": 0.3844585169013974, "grad_norm": 2.3165862560272217, "learning_rate": 9.98024208116219e-06, "loss": 2.9814, "step": 390250 }, { "epoch": 0.38450777487921944, "grad_norm": 2.269986152648926, "learning_rate": 9.980227611698267e-06, "loss": 2.9886, "step": 390300 }, { "epoch": 0.3845570328570415, "grad_norm": 2.1247880458831787, "learning_rate": 9.980213136948514e-06, "loss": 2.9267, "step": 390350 }, { "epoch": 0.38460629083486364, "grad_norm": 2.4749484062194824, "learning_rate": 9.980198656912947e-06, "loss": 2.9791, "step": 390400 }, { "epoch": 0.3846555488126857, "grad_norm": 2.229966402053833, "learning_rate": 9.980184171591585e-06, "loss": 2.9246, "step": 390450 }, { "epoch": 0.3847048067905078, "grad_norm": 2.3978431224823, "learning_rate": 9.980169680984435e-06, "loss": 2.9576, "step": 390500 }, { "epoch": 0.3847540647683299, "grad_norm": 2.3180367946624756, "learning_rate": 9.98015518509152e-06, "loss": 2.9483, "step": 390550 }, { "epoch": 0.384803322746152, "grad_norm": 2.3679287433624268, "learning_rate": 9.980140683912852e-06, "loss": 2.8582, "step": 390600 }, { "epoch": 0.38485258072397405, "grad_norm": 2.435758113861084, "learning_rate": 9.980126177448446e-06, "loss": 2.9315, "step": 390650 }, { "epoch": 0.3849018387017961, "grad_norm": 2.3825714588165283, "learning_rate": 9.980111665698321e-06, "loss": 2.9572, "step": 390700 }, { "epoch": 0.38495109667961824, "grad_norm": 2.192493200302124, "learning_rate": 9.980097148662486e-06, "loss": 2.9259, "step": 390750 }, { "epoch": 0.3850003546574403, "grad_norm": 2.3830983638763428, "learning_rate": 9.980082626340964e-06, "loss": 2.9253, "step": 390800 }, { "epoch": 0.3850496126352624, "grad_norm": 2.2133421897888184, "learning_rate": 9.980068098733764e-06, "loss": 2.9066, "step": 390850 }, { "epoch": 0.3850988706130845, "grad_norm": 2.2772750854492188, "learning_rate": 9.980053565840905e-06, "loss": 2.9671, "step": 390900 }, { "epoch": 0.3851481285909066, "grad_norm": 2.212965726852417, "learning_rate": 9.980039027662401e-06, "loss": 2.9365, "step": 390950 }, { "epoch": 0.38519738656872865, "grad_norm": 2.531747817993164, "learning_rate": 9.980024484198268e-06, "loss": 2.8745, "step": 391000 }, { "epoch": 0.3852466445465508, "grad_norm": 2.2650229930877686, "learning_rate": 9.980009935448522e-06, "loss": 2.9549, "step": 391050 }, { "epoch": 0.38529590252437285, "grad_norm": 2.329008102416992, "learning_rate": 9.979995381413177e-06, "loss": 2.9974, "step": 391100 }, { "epoch": 0.3853451605021949, "grad_norm": 2.1057419776916504, "learning_rate": 9.97998082209225e-06, "loss": 2.8691, "step": 391150 }, { "epoch": 0.38539441848001704, "grad_norm": 2.4120092391967773, "learning_rate": 9.979966257485754e-06, "loss": 2.9484, "step": 391200 }, { "epoch": 0.3854436764578391, "grad_norm": 2.2876040935516357, "learning_rate": 9.979951687593708e-06, "loss": 2.8632, "step": 391250 }, { "epoch": 0.3854929344356612, "grad_norm": 2.1893272399902344, "learning_rate": 9.979937112416126e-06, "loss": 2.8805, "step": 391300 }, { "epoch": 0.3855421924134833, "grad_norm": 2.0460052490234375, "learning_rate": 9.979922531953021e-06, "loss": 3.0156, "step": 391350 }, { "epoch": 0.3855914503913054, "grad_norm": 2.2820894718170166, "learning_rate": 9.979907946204412e-06, "loss": 3.0276, "step": 391400 }, { "epoch": 0.38564070836912745, "grad_norm": 2.1148838996887207, "learning_rate": 9.979893355170312e-06, "loss": 3.0004, "step": 391450 }, { "epoch": 0.3856899663469496, "grad_norm": 2.232616662979126, "learning_rate": 9.979878758850738e-06, "loss": 3.0147, "step": 391500 }, { "epoch": 0.38573922432477165, "grad_norm": 2.332245111465454, "learning_rate": 9.979864157245706e-06, "loss": 2.9141, "step": 391550 }, { "epoch": 0.3857884823025937, "grad_norm": 2.227760076522827, "learning_rate": 9.979849550355229e-06, "loss": 2.9422, "step": 391600 }, { "epoch": 0.38583774028041584, "grad_norm": 2.28507924079895, "learning_rate": 9.979834938179324e-06, "loss": 2.9111, "step": 391650 }, { "epoch": 0.3858869982582379, "grad_norm": 2.2557334899902344, "learning_rate": 9.979820320718008e-06, "loss": 2.874, "step": 391700 }, { "epoch": 0.38593625623606, "grad_norm": 2.42170786857605, "learning_rate": 9.979805697971294e-06, "loss": 2.9151, "step": 391750 }, { "epoch": 0.38598551421388205, "grad_norm": 2.4137589931488037, "learning_rate": 9.979791069939198e-06, "loss": 2.9202, "step": 391800 }, { "epoch": 0.3860347721917042, "grad_norm": 2.3238813877105713, "learning_rate": 9.979776436621737e-06, "loss": 2.9524, "step": 391850 }, { "epoch": 0.38608403016952625, "grad_norm": 2.295217514038086, "learning_rate": 9.979761798018924e-06, "loss": 2.9432, "step": 391900 }, { "epoch": 0.3861332881473483, "grad_norm": 2.3724780082702637, "learning_rate": 9.979747154130778e-06, "loss": 2.9694, "step": 391950 }, { "epoch": 0.38618254612517044, "grad_norm": 2.45386004447937, "learning_rate": 9.979732504957312e-06, "loss": 2.9135, "step": 392000 }, { "epoch": 0.3862318041029925, "grad_norm": 2.141925573348999, "learning_rate": 9.979717850498542e-06, "loss": 3.0031, "step": 392050 }, { "epoch": 0.3862810620808146, "grad_norm": 2.257509708404541, "learning_rate": 9.979703190754485e-06, "loss": 2.923, "step": 392100 }, { "epoch": 0.3863303200586367, "grad_norm": 2.45166015625, "learning_rate": 9.979688525725154e-06, "loss": 2.9685, "step": 392150 }, { "epoch": 0.3863795780364588, "grad_norm": 2.0280039310455322, "learning_rate": 9.979673855410565e-06, "loss": 2.9469, "step": 392200 }, { "epoch": 0.38642883601428085, "grad_norm": 2.0004684925079346, "learning_rate": 9.979659179810736e-06, "loss": 2.9351, "step": 392250 }, { "epoch": 0.386478093992103, "grad_norm": 2.286041021347046, "learning_rate": 9.979644498925681e-06, "loss": 2.9328, "step": 392300 }, { "epoch": 0.38652735196992505, "grad_norm": 2.3231406211853027, "learning_rate": 9.979629812755416e-06, "loss": 2.8741, "step": 392350 }, { "epoch": 0.3865766099477471, "grad_norm": 2.4011967182159424, "learning_rate": 9.979615121299954e-06, "loss": 2.9646, "step": 392400 }, { "epoch": 0.38662586792556924, "grad_norm": 2.19826078414917, "learning_rate": 9.979600424559315e-06, "loss": 2.912, "step": 392450 }, { "epoch": 0.3866751259033913, "grad_norm": 2.4304044246673584, "learning_rate": 9.979585722533511e-06, "loss": 2.9234, "step": 392500 }, { "epoch": 0.3867243838812134, "grad_norm": 2.1641976833343506, "learning_rate": 9.97957101522256e-06, "loss": 2.9399, "step": 392550 }, { "epoch": 0.3867736418590355, "grad_norm": 2.334501266479492, "learning_rate": 9.979556302626478e-06, "loss": 2.9586, "step": 392600 }, { "epoch": 0.3868228998368576, "grad_norm": 2.230888605117798, "learning_rate": 9.979541584745275e-06, "loss": 2.8994, "step": 392650 }, { "epoch": 0.38687215781467965, "grad_norm": 2.181016206741333, "learning_rate": 9.979526861578974e-06, "loss": 2.9058, "step": 392700 }, { "epoch": 0.3869214157925018, "grad_norm": 2.206455707550049, "learning_rate": 9.979512133127588e-06, "loss": 2.9382, "step": 392750 }, { "epoch": 0.38697067377032385, "grad_norm": 2.3430042266845703, "learning_rate": 9.97949739939113e-06, "loss": 2.9415, "step": 392800 }, { "epoch": 0.3870199317481459, "grad_norm": 2.262279510498047, "learning_rate": 9.979482660369619e-06, "loss": 2.939, "step": 392850 }, { "epoch": 0.38706918972596804, "grad_norm": 2.2696304321289062, "learning_rate": 9.97946791606307e-06, "loss": 2.9772, "step": 392900 }, { "epoch": 0.3871184477037901, "grad_norm": 2.395158529281616, "learning_rate": 9.979453166471498e-06, "loss": 2.936, "step": 392950 }, { "epoch": 0.3871677056816122, "grad_norm": 2.3531689643859863, "learning_rate": 9.979438411594917e-06, "loss": 2.9255, "step": 393000 }, { "epoch": 0.38721696365943425, "grad_norm": 2.2111704349517822, "learning_rate": 9.979423651433345e-06, "loss": 2.9817, "step": 393050 }, { "epoch": 0.3872662216372564, "grad_norm": 2.2347002029418945, "learning_rate": 9.979408885986798e-06, "loss": 2.9635, "step": 393100 }, { "epoch": 0.38731547961507845, "grad_norm": 2.2006163597106934, "learning_rate": 9.97939411525529e-06, "loss": 2.9003, "step": 393150 }, { "epoch": 0.3873647375929005, "grad_norm": 2.330244779586792, "learning_rate": 9.979379339238836e-06, "loss": 2.9877, "step": 393200 }, { "epoch": 0.38741399557072265, "grad_norm": 2.5020036697387695, "learning_rate": 9.979364557937454e-06, "loss": 2.9181, "step": 393250 }, { "epoch": 0.3874632535485447, "grad_norm": 2.2434589862823486, "learning_rate": 9.97934977135116e-06, "loss": 2.9512, "step": 393300 }, { "epoch": 0.3875125115263668, "grad_norm": 2.4554030895233154, "learning_rate": 9.979334979479968e-06, "loss": 2.8939, "step": 393350 }, { "epoch": 0.3875617695041889, "grad_norm": 2.2982890605926514, "learning_rate": 9.979320182323894e-06, "loss": 2.9071, "step": 393400 }, { "epoch": 0.387611027482011, "grad_norm": 2.199631929397583, "learning_rate": 9.979305379882954e-06, "loss": 2.9793, "step": 393450 }, { "epoch": 0.38766028545983305, "grad_norm": 2.4045679569244385, "learning_rate": 9.979290572157163e-06, "loss": 2.9522, "step": 393500 }, { "epoch": 0.3877095434376552, "grad_norm": 2.3802435398101807, "learning_rate": 9.979275759146538e-06, "loss": 2.9835, "step": 393550 }, { "epoch": 0.38775880141547725, "grad_norm": 2.3602936267852783, "learning_rate": 9.979260940851094e-06, "loss": 2.9622, "step": 393600 }, { "epoch": 0.3878080593932993, "grad_norm": 2.3720545768737793, "learning_rate": 9.979246117270846e-06, "loss": 2.968, "step": 393650 }, { "epoch": 0.38785731737112145, "grad_norm": 2.466792583465576, "learning_rate": 9.979231288405808e-06, "loss": 2.9853, "step": 393700 }, { "epoch": 0.3879065753489435, "grad_norm": 2.3967325687408447, "learning_rate": 9.979216454256002e-06, "loss": 2.9462, "step": 393750 }, { "epoch": 0.3879558333267656, "grad_norm": 2.1932873725891113, "learning_rate": 9.97920161482144e-06, "loss": 2.9118, "step": 393800 }, { "epoch": 0.3880050913045877, "grad_norm": 2.2917845249176025, "learning_rate": 9.979186770102136e-06, "loss": 2.9933, "step": 393850 }, { "epoch": 0.3880543492824098, "grad_norm": 2.3030731678009033, "learning_rate": 9.979171920098107e-06, "loss": 2.9489, "step": 393900 }, { "epoch": 0.38810360726023185, "grad_norm": 2.384669065475464, "learning_rate": 9.97915706480937e-06, "loss": 2.9821, "step": 393950 }, { "epoch": 0.388152865238054, "grad_norm": 2.3065404891967773, "learning_rate": 9.979142204235941e-06, "loss": 3.008, "step": 394000 }, { "epoch": 0.38820212321587605, "grad_norm": 2.3541007041931152, "learning_rate": 9.979127338377834e-06, "loss": 2.8413, "step": 394050 }, { "epoch": 0.3882513811936981, "grad_norm": 2.270364999771118, "learning_rate": 9.979112467235065e-06, "loss": 2.9201, "step": 394100 }, { "epoch": 0.38830063917152025, "grad_norm": 2.312594175338745, "learning_rate": 9.97909759080765e-06, "loss": 2.9364, "step": 394150 }, { "epoch": 0.3883498971493423, "grad_norm": 2.14455509185791, "learning_rate": 9.979082709095607e-06, "loss": 2.91, "step": 394200 }, { "epoch": 0.3883991551271644, "grad_norm": 2.552051067352295, "learning_rate": 9.979067822098949e-06, "loss": 2.9603, "step": 394250 }, { "epoch": 0.38844841310498646, "grad_norm": 2.354018211364746, "learning_rate": 9.979052929817691e-06, "loss": 2.9459, "step": 394300 }, { "epoch": 0.3884976710828086, "grad_norm": 2.487475633621216, "learning_rate": 9.979038032251853e-06, "loss": 2.9601, "step": 394350 }, { "epoch": 0.38854692906063065, "grad_norm": 2.2067630290985107, "learning_rate": 9.979023129401447e-06, "loss": 2.921, "step": 394400 }, { "epoch": 0.3885961870384527, "grad_norm": 2.3017663955688477, "learning_rate": 9.97900822126649e-06, "loss": 2.9652, "step": 394450 }, { "epoch": 0.38864544501627485, "grad_norm": 2.801220178604126, "learning_rate": 9.978993307846999e-06, "loss": 2.9422, "step": 394500 }, { "epoch": 0.3886947029940969, "grad_norm": 2.1518797874450684, "learning_rate": 9.978978389142988e-06, "loss": 3.0358, "step": 394550 }, { "epoch": 0.388743960971919, "grad_norm": 2.2775542736053467, "learning_rate": 9.978963465154473e-06, "loss": 2.9091, "step": 394600 }, { "epoch": 0.3887932189497411, "grad_norm": 2.538243055343628, "learning_rate": 9.97894853588147e-06, "loss": 2.8619, "step": 394650 }, { "epoch": 0.3888424769275632, "grad_norm": 2.184316635131836, "learning_rate": 9.978933601323997e-06, "loss": 2.9463, "step": 394700 }, { "epoch": 0.38889173490538526, "grad_norm": 2.2088029384613037, "learning_rate": 9.978918661482069e-06, "loss": 2.9625, "step": 394750 }, { "epoch": 0.3889409928832074, "grad_norm": 2.1962997913360596, "learning_rate": 9.9789037163557e-06, "loss": 2.9286, "step": 394800 }, { "epoch": 0.38899025086102945, "grad_norm": 2.1585006713867188, "learning_rate": 9.978888765944905e-06, "loss": 2.9684, "step": 394850 }, { "epoch": 0.3890395088388515, "grad_norm": 2.433762788772583, "learning_rate": 9.978873810249704e-06, "loss": 2.8823, "step": 394900 }, { "epoch": 0.38908876681667365, "grad_norm": 2.336325168609619, "learning_rate": 9.978858849270109e-06, "loss": 2.9762, "step": 394950 }, { "epoch": 0.3891380247944957, "grad_norm": 2.327550172805786, "learning_rate": 9.978843883006138e-06, "loss": 2.9386, "step": 395000 }, { "epoch": 0.3891872827723178, "grad_norm": 2.145416021347046, "learning_rate": 9.978828911457807e-06, "loss": 2.96, "step": 395050 }, { "epoch": 0.3892365407501399, "grad_norm": 2.2875404357910156, "learning_rate": 9.97881393462513e-06, "loss": 2.95, "step": 395100 }, { "epoch": 0.389285798727962, "grad_norm": 2.2477335929870605, "learning_rate": 9.978798952508125e-06, "loss": 2.9663, "step": 395150 }, { "epoch": 0.38933505670578405, "grad_norm": 2.6066479682922363, "learning_rate": 9.978783965106808e-06, "loss": 2.9516, "step": 395200 }, { "epoch": 0.3893843146836062, "grad_norm": 2.2401375770568848, "learning_rate": 9.978768972421192e-06, "loss": 2.8868, "step": 395250 }, { "epoch": 0.38943357266142825, "grad_norm": 2.476761817932129, "learning_rate": 9.978753974451297e-06, "loss": 2.919, "step": 395300 }, { "epoch": 0.3894828306392503, "grad_norm": 2.2410004138946533, "learning_rate": 9.978738971197136e-06, "loss": 2.9159, "step": 395350 }, { "epoch": 0.38953208861707245, "grad_norm": 2.38810396194458, "learning_rate": 9.978723962658724e-06, "loss": 2.968, "step": 395400 }, { "epoch": 0.3895813465948945, "grad_norm": 2.720501184463501, "learning_rate": 9.97870894883608e-06, "loss": 2.9265, "step": 395450 }, { "epoch": 0.3896306045727166, "grad_norm": 2.3998937606811523, "learning_rate": 9.97869392972922e-06, "loss": 2.9485, "step": 395500 }, { "epoch": 0.38967986255053866, "grad_norm": 2.3676793575286865, "learning_rate": 9.978678905338158e-06, "loss": 2.9389, "step": 395550 }, { "epoch": 0.3897291205283608, "grad_norm": 2.835904359817505, "learning_rate": 9.978663875662909e-06, "loss": 2.9099, "step": 395600 }, { "epoch": 0.38977837850618285, "grad_norm": 2.4374372959136963, "learning_rate": 9.978648840703491e-06, "loss": 2.9211, "step": 395650 }, { "epoch": 0.3898276364840049, "grad_norm": 2.2885894775390625, "learning_rate": 9.978633800459918e-06, "loss": 2.9723, "step": 395700 }, { "epoch": 0.38987689446182705, "grad_norm": 2.2738661766052246, "learning_rate": 9.97861875493221e-06, "loss": 2.9602, "step": 395750 }, { "epoch": 0.3899261524396491, "grad_norm": 2.3410656452178955, "learning_rate": 9.978603704120379e-06, "loss": 2.9655, "step": 395800 }, { "epoch": 0.3899754104174712, "grad_norm": 2.549328565597534, "learning_rate": 9.978588648024444e-06, "loss": 2.9178, "step": 395850 }, { "epoch": 0.3900246683952933, "grad_norm": 2.274117946624756, "learning_rate": 9.978573586644417e-06, "loss": 2.9096, "step": 395900 }, { "epoch": 0.3900739263731154, "grad_norm": 2.539698362350464, "learning_rate": 9.978558519980317e-06, "loss": 2.9022, "step": 395950 }, { "epoch": 0.39012318435093746, "grad_norm": 2.4561045169830322, "learning_rate": 9.978543448032159e-06, "loss": 3.001, "step": 396000 }, { "epoch": 0.3901724423287596, "grad_norm": 2.276322364807129, "learning_rate": 9.97852837079996e-06, "loss": 2.9612, "step": 396050 }, { "epoch": 0.39022170030658165, "grad_norm": 2.2897722721099854, "learning_rate": 9.978513288283735e-06, "loss": 2.8889, "step": 396100 }, { "epoch": 0.3902709582844037, "grad_norm": 2.2371175289154053, "learning_rate": 9.978498200483499e-06, "loss": 2.9893, "step": 396150 }, { "epoch": 0.39032021626222585, "grad_norm": 2.3963043689727783, "learning_rate": 9.97848310739927e-06, "loss": 2.8752, "step": 396200 }, { "epoch": 0.3903694742400479, "grad_norm": 2.3227274417877197, "learning_rate": 9.978468009031064e-06, "loss": 2.8799, "step": 396250 }, { "epoch": 0.39041873221787, "grad_norm": 2.6712234020233154, "learning_rate": 9.978452905378896e-06, "loss": 2.9923, "step": 396300 }, { "epoch": 0.3904679901956921, "grad_norm": 2.091886281967163, "learning_rate": 9.978437796442782e-06, "loss": 2.8841, "step": 396350 }, { "epoch": 0.3905172481735142, "grad_norm": 2.4216017723083496, "learning_rate": 9.978422682222739e-06, "loss": 2.952, "step": 396400 }, { "epoch": 0.39056650615133626, "grad_norm": 2.2659919261932373, "learning_rate": 9.978407562718781e-06, "loss": 2.9838, "step": 396450 }, { "epoch": 0.3906157641291584, "grad_norm": 2.1451048851013184, "learning_rate": 9.978392437930927e-06, "loss": 2.9622, "step": 396500 }, { "epoch": 0.39066502210698045, "grad_norm": 2.209489345550537, "learning_rate": 9.978377307859191e-06, "loss": 2.9325, "step": 396550 }, { "epoch": 0.3907142800848025, "grad_norm": 2.251373052597046, "learning_rate": 9.97836217250359e-06, "loss": 2.9034, "step": 396600 }, { "epoch": 0.39076353806262465, "grad_norm": 2.5070114135742188, "learning_rate": 9.978347031864139e-06, "loss": 2.8591, "step": 396650 }, { "epoch": 0.3908127960404467, "grad_norm": 2.1659536361694336, "learning_rate": 9.978331885940856e-06, "loss": 2.8722, "step": 396700 }, { "epoch": 0.3908620540182688, "grad_norm": 2.306225299835205, "learning_rate": 9.978316734733754e-06, "loss": 2.978, "step": 396750 }, { "epoch": 0.39091131199609086, "grad_norm": 2.2403042316436768, "learning_rate": 9.978301578242852e-06, "loss": 2.962, "step": 396800 }, { "epoch": 0.390960569973913, "grad_norm": 2.1655938625335693, "learning_rate": 9.978286416468164e-06, "loss": 2.879, "step": 396850 }, { "epoch": 0.39100982795173506, "grad_norm": 2.3574774265289307, "learning_rate": 9.978271249409708e-06, "loss": 2.9399, "step": 396900 }, { "epoch": 0.3910590859295571, "grad_norm": 2.4231810569763184, "learning_rate": 9.978256077067498e-06, "loss": 2.9583, "step": 396950 }, { "epoch": 0.39110834390737925, "grad_norm": 2.3082828521728516, "learning_rate": 9.978240899441552e-06, "loss": 2.9233, "step": 397000 }, { "epoch": 0.3911576018852013, "grad_norm": 2.370938539505005, "learning_rate": 9.978225716531886e-06, "loss": 2.9713, "step": 397050 }, { "epoch": 0.3912068598630234, "grad_norm": 2.240556001663208, "learning_rate": 9.978210528338516e-06, "loss": 2.8558, "step": 397100 }, { "epoch": 0.3912561178408455, "grad_norm": 2.2416858673095703, "learning_rate": 9.978195334861455e-06, "loss": 2.8649, "step": 397150 }, { "epoch": 0.3913053758186676, "grad_norm": 2.497138500213623, "learning_rate": 9.978180136100724e-06, "loss": 2.9395, "step": 397200 }, { "epoch": 0.39135463379648966, "grad_norm": 2.19053053855896, "learning_rate": 9.978164932056336e-06, "loss": 2.9558, "step": 397250 }, { "epoch": 0.3914038917743118, "grad_norm": 2.421168565750122, "learning_rate": 9.978149722728309e-06, "loss": 2.9061, "step": 397300 }, { "epoch": 0.39145314975213386, "grad_norm": 2.1900625228881836, "learning_rate": 9.978134508116656e-06, "loss": 2.9542, "step": 397350 }, { "epoch": 0.3915024077299559, "grad_norm": 2.2385172843933105, "learning_rate": 9.978119288221397e-06, "loss": 2.9121, "step": 397400 }, { "epoch": 0.39155166570777805, "grad_norm": 2.3285586833953857, "learning_rate": 9.978104063042544e-06, "loss": 2.9024, "step": 397450 }, { "epoch": 0.3916009236856001, "grad_norm": 2.2399396896362305, "learning_rate": 9.978088832580118e-06, "loss": 2.9237, "step": 397500 }, { "epoch": 0.3916501816634222, "grad_norm": 2.3268065452575684, "learning_rate": 9.978073596834134e-06, "loss": 2.9792, "step": 397550 }, { "epoch": 0.3916994396412443, "grad_norm": 2.268904447555542, "learning_rate": 9.978058355804604e-06, "loss": 2.9624, "step": 397600 }, { "epoch": 0.3917486976190664, "grad_norm": 2.361743688583374, "learning_rate": 9.978043109491549e-06, "loss": 3.0123, "step": 397650 }, { "epoch": 0.39179795559688846, "grad_norm": 2.2938709259033203, "learning_rate": 9.978027857894982e-06, "loss": 2.9391, "step": 397700 }, { "epoch": 0.3918472135747106, "grad_norm": 2.1634230613708496, "learning_rate": 9.97801260101492e-06, "loss": 2.9888, "step": 397750 }, { "epoch": 0.39189647155253265, "grad_norm": 2.225513458251953, "learning_rate": 9.977997338851382e-06, "loss": 2.8892, "step": 397800 }, { "epoch": 0.3919457295303547, "grad_norm": 2.2949130535125732, "learning_rate": 9.97798207140438e-06, "loss": 2.9009, "step": 397850 }, { "epoch": 0.39199498750817685, "grad_norm": 2.3509914875030518, "learning_rate": 9.977966798673933e-06, "loss": 2.9352, "step": 397900 }, { "epoch": 0.3920442454859989, "grad_norm": 2.3153235912323, "learning_rate": 9.977951520660056e-06, "loss": 2.9001, "step": 397950 }, { "epoch": 0.392093503463821, "grad_norm": 2.3168156147003174, "learning_rate": 9.977936237362765e-06, "loss": 2.9225, "step": 398000 }, { "epoch": 0.39214276144164306, "grad_norm": 2.1682627201080322, "learning_rate": 9.977920948782078e-06, "loss": 2.8744, "step": 398050 }, { "epoch": 0.3921920194194652, "grad_norm": 2.2787418365478516, "learning_rate": 9.977905654918008e-06, "loss": 2.861, "step": 398100 }, { "epoch": 0.39224127739728726, "grad_norm": 2.220623016357422, "learning_rate": 9.977890355770576e-06, "loss": 2.9313, "step": 398150 }, { "epoch": 0.39229053537510933, "grad_norm": 2.3804590702056885, "learning_rate": 9.977875051339793e-06, "loss": 2.9003, "step": 398200 }, { "epoch": 0.39233979335293145, "grad_norm": 2.463669538497925, "learning_rate": 9.977859741625678e-06, "loss": 2.9559, "step": 398250 }, { "epoch": 0.3923890513307535, "grad_norm": 2.3708572387695312, "learning_rate": 9.977844426628247e-06, "loss": 2.9613, "step": 398300 }, { "epoch": 0.3924383093085756, "grad_norm": 2.1908140182495117, "learning_rate": 9.977829106347516e-06, "loss": 2.9203, "step": 398350 }, { "epoch": 0.3924875672863977, "grad_norm": 2.513256072998047, "learning_rate": 9.977813780783502e-06, "loss": 2.9374, "step": 398400 }, { "epoch": 0.3925368252642198, "grad_norm": 2.361107110977173, "learning_rate": 9.97779844993622e-06, "loss": 2.9556, "step": 398450 }, { "epoch": 0.39258608324204186, "grad_norm": 2.299482822418213, "learning_rate": 9.977783113805688e-06, "loss": 2.9135, "step": 398500 }, { "epoch": 0.392635341219864, "grad_norm": 2.1812620162963867, "learning_rate": 9.97776777239192e-06, "loss": 2.9223, "step": 398550 }, { "epoch": 0.39268459919768606, "grad_norm": 2.2581756114959717, "learning_rate": 9.977752425694936e-06, "loss": 2.9202, "step": 398600 }, { "epoch": 0.3927338571755081, "grad_norm": 2.318518877029419, "learning_rate": 9.977737073714747e-06, "loss": 3.0141, "step": 398650 }, { "epoch": 0.39278311515333025, "grad_norm": 2.2614905834198, "learning_rate": 9.977721716451373e-06, "loss": 2.9381, "step": 398700 }, { "epoch": 0.3928323731311523, "grad_norm": 2.1833560466766357, "learning_rate": 9.97770635390483e-06, "loss": 2.9181, "step": 398750 }, { "epoch": 0.3928816311089744, "grad_norm": 2.477851152420044, "learning_rate": 9.977690986075134e-06, "loss": 2.9414, "step": 398800 }, { "epoch": 0.3929308890867965, "grad_norm": 2.2162816524505615, "learning_rate": 9.9776756129623e-06, "loss": 2.9049, "step": 398850 }, { "epoch": 0.3929801470646186, "grad_norm": 2.256434679031372, "learning_rate": 9.977660234566345e-06, "loss": 2.9332, "step": 398900 }, { "epoch": 0.39302940504244066, "grad_norm": 2.2473368644714355, "learning_rate": 9.977644850887285e-06, "loss": 2.9607, "step": 398950 }, { "epoch": 0.3930786630202628, "grad_norm": Infinity, "learning_rate": 9.97762946192514e-06, "loss": 2.9444, "step": 399000 }, { "epoch": 0.39312792099808486, "grad_norm": 2.2653214931488037, "learning_rate": 9.977614067679922e-06, "loss": 2.8776, "step": 399050 }, { "epoch": 0.3931771789759069, "grad_norm": 2.21346116065979, "learning_rate": 9.977598668151648e-06, "loss": 2.9114, "step": 399100 }, { "epoch": 0.39322643695372905, "grad_norm": 2.409407377243042, "learning_rate": 9.977583263340335e-06, "loss": 3.0093, "step": 399150 }, { "epoch": 0.3932756949315511, "grad_norm": 2.3705365657806396, "learning_rate": 9.977567853245999e-06, "loss": 2.9951, "step": 399200 }, { "epoch": 0.3933249529093732, "grad_norm": 2.5002593994140625, "learning_rate": 9.977552437868657e-06, "loss": 2.98, "step": 399250 }, { "epoch": 0.39337421088719526, "grad_norm": 2.295125722885132, "learning_rate": 9.977537017208327e-06, "loss": 2.8861, "step": 399300 }, { "epoch": 0.3934234688650174, "grad_norm": 2.2658581733703613, "learning_rate": 9.977521591265021e-06, "loss": 2.9057, "step": 399350 }, { "epoch": 0.39347272684283946, "grad_norm": 2.1383934020996094, "learning_rate": 9.977506160038758e-06, "loss": 2.8123, "step": 399400 }, { "epoch": 0.39352198482066153, "grad_norm": 2.384601593017578, "learning_rate": 9.977490723529555e-06, "loss": 2.8951, "step": 399450 }, { "epoch": 0.39357124279848366, "grad_norm": 2.2938709259033203, "learning_rate": 9.977475281737428e-06, "loss": 2.961, "step": 399500 }, { "epoch": 0.3936205007763057, "grad_norm": 2.4790265560150146, "learning_rate": 9.977459834662392e-06, "loss": 2.9497, "step": 399550 }, { "epoch": 0.3936697587541278, "grad_norm": 2.2960147857666016, "learning_rate": 9.977444382304466e-06, "loss": 2.914, "step": 399600 }, { "epoch": 0.3937190167319499, "grad_norm": 2.3548178672790527, "learning_rate": 9.977428924663663e-06, "loss": 2.8769, "step": 399650 }, { "epoch": 0.393768274709772, "grad_norm": 2.4988021850585938, "learning_rate": 9.977413461740002e-06, "loss": 2.9032, "step": 399700 }, { "epoch": 0.39381753268759406, "grad_norm": 2.2795305252075195, "learning_rate": 9.977397993533498e-06, "loss": 2.9452, "step": 399750 }, { "epoch": 0.3938667906654162, "grad_norm": 2.4802985191345215, "learning_rate": 9.977382520044168e-06, "loss": 2.8146, "step": 399800 }, { "epoch": 0.39391604864323826, "grad_norm": 2.148482322692871, "learning_rate": 9.977367041272028e-06, "loss": 2.9883, "step": 399850 }, { "epoch": 0.39396530662106033, "grad_norm": 2.322467565536499, "learning_rate": 9.977351557217095e-06, "loss": 2.9484, "step": 399900 }, { "epoch": 0.39401456459888246, "grad_norm": 2.3714652061462402, "learning_rate": 9.977336067879388e-06, "loss": 2.9774, "step": 399950 }, { "epoch": 0.3940638225767045, "grad_norm": 2.3027822971343994, "learning_rate": 9.977320573258917e-06, "loss": 2.9511, "step": 400000 }, { "epoch": 0.3941130805545266, "grad_norm": 2.3692619800567627, "learning_rate": 9.977305073355703e-06, "loss": 2.9347, "step": 400050 }, { "epoch": 0.3941623385323487, "grad_norm": 2.2894206047058105, "learning_rate": 9.977289568169762e-06, "loss": 2.9267, "step": 400100 }, { "epoch": 0.3942115965101708, "grad_norm": 2.2817955017089844, "learning_rate": 9.977274057701112e-06, "loss": 2.9367, "step": 400150 }, { "epoch": 0.39426085448799286, "grad_norm": 2.12579607963562, "learning_rate": 9.977258541949766e-06, "loss": 2.8881, "step": 400200 }, { "epoch": 0.394310112465815, "grad_norm": 2.5247082710266113, "learning_rate": 9.977243020915741e-06, "loss": 2.9906, "step": 400250 }, { "epoch": 0.39435937044363706, "grad_norm": 2.270658016204834, "learning_rate": 9.977227494599056e-06, "loss": 2.9566, "step": 400300 }, { "epoch": 0.39440862842145913, "grad_norm": 2.174466848373413, "learning_rate": 9.977211962999725e-06, "loss": 3.0, "step": 400350 }, { "epoch": 0.39445788639928125, "grad_norm": 2.253131151199341, "learning_rate": 9.977196426117765e-06, "loss": 2.9104, "step": 400400 }, { "epoch": 0.3945071443771033, "grad_norm": 2.2439017295837402, "learning_rate": 9.977180883953194e-06, "loss": 2.9818, "step": 400450 }, { "epoch": 0.3945564023549254, "grad_norm": 2.3574743270874023, "learning_rate": 9.977165336506028e-06, "loss": 2.8921, "step": 400500 }, { "epoch": 0.39460566033274747, "grad_norm": 2.3558509349823, "learning_rate": 9.977149783776282e-06, "loss": 2.8732, "step": 400550 }, { "epoch": 0.3946549183105696, "grad_norm": 2.0467140674591064, "learning_rate": 9.977134225763973e-06, "loss": 2.9146, "step": 400600 }, { "epoch": 0.39470417628839166, "grad_norm": 2.331331729888916, "learning_rate": 9.977118662469119e-06, "loss": 2.9554, "step": 400650 }, { "epoch": 0.39475343426621373, "grad_norm": 2.303225040435791, "learning_rate": 9.977103093891733e-06, "loss": 2.8676, "step": 400700 }, { "epoch": 0.39480269224403586, "grad_norm": 2.2612719535827637, "learning_rate": 9.977087520031837e-06, "loss": 3.0032, "step": 400750 }, { "epoch": 0.39485195022185793, "grad_norm": 2.3666911125183105, "learning_rate": 9.977071940889443e-06, "loss": 2.9534, "step": 400800 } ], "logging_steps": 50, "max_steps": 5075320, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 10150, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.7226592062149755e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }