{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 49408, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004047927461139896, "grad_norm": 0.41069459915161133, "learning_rate": 9.996154468911918e-05, "loss": 1.9484, "step": 20 }, { "epoch": 0.0008095854922279792, "grad_norm": 1.0464472770690918, "learning_rate": 9.992106541450778e-05, "loss": 1.8633, "step": 40 }, { "epoch": 0.0012143782383419689, "grad_norm": 0.3557087182998657, "learning_rate": 9.988058613989638e-05, "loss": 1.8267, "step": 60 }, { "epoch": 0.0016191709844559584, "grad_norm": 0.3391210436820984, "learning_rate": 9.984010686528498e-05, "loss": 1.7947, "step": 80 }, { "epoch": 0.002023963730569948, "grad_norm": 0.3341318666934967, "learning_rate": 9.979962759067359e-05, "loss": 1.7829, "step": 100 }, { "epoch": 0.0024287564766839378, "grad_norm": 0.3078671097755432, "learning_rate": 9.975914831606218e-05, "loss": 1.778, "step": 120 }, { "epoch": 0.0028335492227979276, "grad_norm": 0.33950164914131165, "learning_rate": 9.971866904145079e-05, "loss": 1.7593, "step": 140 }, { "epoch": 0.003238341968911917, "grad_norm": 0.3301551043987274, "learning_rate": 9.967818976683939e-05, "loss": 1.754, "step": 160 }, { "epoch": 0.0036431347150259067, "grad_norm": 0.3049352169036865, "learning_rate": 9.9637710492228e-05, "loss": 1.7556, "step": 180 }, { "epoch": 0.004047927461139896, "grad_norm": 0.32645484805107117, "learning_rate": 9.959723121761659e-05, "loss": 1.7453, "step": 200 }, { "epoch": 0.004452720207253886, "grad_norm": 0.3271784782409668, "learning_rate": 9.955675194300518e-05, "loss": 1.7375, "step": 220 }, { "epoch": 0.0048575129533678756, "grad_norm": 0.38031938672065735, "learning_rate": 9.951627266839378e-05, "loss": 1.7447, "step": 240 }, { "epoch": 0.005262305699481865, "grad_norm": 0.34311237931251526, "learning_rate": 9.947579339378239e-05, "loss": 1.7381, "step": 260 }, { "epoch": 0.005667098445595855, "grad_norm": 0.32988694310188293, "learning_rate": 9.943531411917098e-05, "loss": 1.731, "step": 280 }, { "epoch": 0.0060718911917098444, "grad_norm": 0.32449841499328613, "learning_rate": 9.939483484455959e-05, "loss": 1.7327, "step": 300 }, { "epoch": 0.006476683937823834, "grad_norm": 0.37877997756004333, "learning_rate": 9.935435556994818e-05, "loss": 1.7257, "step": 320 }, { "epoch": 0.006881476683937824, "grad_norm": 0.36291101574897766, "learning_rate": 9.931387629533679e-05, "loss": 1.714, "step": 340 }, { "epoch": 0.007286269430051813, "grad_norm": 0.33723726868629456, "learning_rate": 9.927339702072539e-05, "loss": 1.7278, "step": 360 }, { "epoch": 0.007691062176165803, "grad_norm": 0.34433430433273315, "learning_rate": 9.9232917746114e-05, "loss": 1.7151, "step": 380 }, { "epoch": 0.008095854922279792, "grad_norm": 0.31477224826812744, "learning_rate": 9.919243847150259e-05, "loss": 1.7134, "step": 400 }, { "epoch": 0.008500647668393783, "grad_norm": 0.3311842679977417, "learning_rate": 9.91519591968912e-05, "loss": 1.716, "step": 420 }, { "epoch": 0.008905440414507772, "grad_norm": 0.3523848056793213, "learning_rate": 9.91114799222798e-05, "loss": 1.703, "step": 440 }, { "epoch": 0.009310233160621762, "grad_norm": 0.3423062264919281, "learning_rate": 9.90710006476684e-05, "loss": 1.7074, "step": 460 }, { "epoch": 0.009715025906735751, "grad_norm": 0.3270164728164673, "learning_rate": 9.9030521373057e-05, "loss": 1.7006, "step": 480 }, { "epoch": 0.01011981865284974, "grad_norm": 0.3370889127254486, "learning_rate": 9.89900420984456e-05, "loss": 1.7075, "step": 500 }, { "epoch": 0.01052461139896373, "grad_norm": 0.31438136100769043, "learning_rate": 9.89495628238342e-05, "loss": 1.7004, "step": 520 }, { "epoch": 0.010929404145077721, "grad_norm": 0.3441039025783539, "learning_rate": 9.890908354922281e-05, "loss": 1.6951, "step": 540 }, { "epoch": 0.01133419689119171, "grad_norm": 0.32169097661972046, "learning_rate": 9.88686042746114e-05, "loss": 1.6985, "step": 560 }, { "epoch": 0.0117389896373057, "grad_norm": 0.34355267882347107, "learning_rate": 9.8828125e-05, "loss": 1.7022, "step": 580 }, { "epoch": 0.012143782383419689, "grad_norm": 0.35460326075553894, "learning_rate": 9.87876457253886e-05, "loss": 1.6916, "step": 600 }, { "epoch": 0.012548575129533678, "grad_norm": 0.30619382858276367, "learning_rate": 9.87471664507772e-05, "loss": 1.7023, "step": 620 }, { "epoch": 0.012953367875647668, "grad_norm": 0.3458530008792877, "learning_rate": 9.870668717616581e-05, "loss": 1.6992, "step": 640 }, { "epoch": 0.013358160621761659, "grad_norm": 0.3295366168022156, "learning_rate": 9.86662079015544e-05, "loss": 1.6995, "step": 660 }, { "epoch": 0.013762953367875648, "grad_norm": 0.3297481834888458, "learning_rate": 9.862572862694301e-05, "loss": 1.6968, "step": 680 }, { "epoch": 0.014167746113989637, "grad_norm": 0.34700652956962585, "learning_rate": 9.858524935233161e-05, "loss": 1.6856, "step": 700 }, { "epoch": 0.014572538860103627, "grad_norm": 0.34136682748794556, "learning_rate": 9.854477007772022e-05, "loss": 1.6843, "step": 720 }, { "epoch": 0.014977331606217616, "grad_norm": 0.3239290714263916, "learning_rate": 9.850429080310881e-05, "loss": 1.6827, "step": 740 }, { "epoch": 0.015382124352331605, "grad_norm": 0.33007895946502686, "learning_rate": 9.846381152849742e-05, "loss": 1.6828, "step": 760 }, { "epoch": 0.015786917098445596, "grad_norm": 0.3286311626434326, "learning_rate": 9.842333225388601e-05, "loss": 1.6836, "step": 780 }, { "epoch": 0.016191709844559584, "grad_norm": 0.3518012762069702, "learning_rate": 9.838285297927462e-05, "loss": 1.6839, "step": 800 }, { "epoch": 0.016596502590673575, "grad_norm": 0.3359794020652771, "learning_rate": 9.834237370466322e-05, "loss": 1.69, "step": 820 }, { "epoch": 0.017001295336787566, "grad_norm": 0.3336375057697296, "learning_rate": 9.830189443005183e-05, "loss": 1.6864, "step": 840 }, { "epoch": 0.017406088082901554, "grad_norm": 0.34287652373313904, "learning_rate": 9.826141515544042e-05, "loss": 1.6824, "step": 860 }, { "epoch": 0.017810880829015545, "grad_norm": 0.3529578149318695, "learning_rate": 9.822093588082903e-05, "loss": 1.6768, "step": 880 }, { "epoch": 0.018215673575129532, "grad_norm": 0.319440096616745, "learning_rate": 9.818045660621762e-05, "loss": 1.6715, "step": 900 }, { "epoch": 0.018620466321243524, "grad_norm": 0.32123440504074097, "learning_rate": 9.813997733160622e-05, "loss": 1.6717, "step": 920 }, { "epoch": 0.019025259067357515, "grad_norm": 0.3472713232040405, "learning_rate": 9.809949805699483e-05, "loss": 1.6733, "step": 940 }, { "epoch": 0.019430051813471502, "grad_norm": 0.31424686312675476, "learning_rate": 9.805901878238342e-05, "loss": 1.6768, "step": 960 }, { "epoch": 0.019834844559585493, "grad_norm": 0.323935329914093, "learning_rate": 9.801853950777202e-05, "loss": 1.6699, "step": 980 }, { "epoch": 0.02023963730569948, "grad_norm": 0.3278948664665222, "learning_rate": 9.797806023316063e-05, "loss": 1.676, "step": 1000 }, { "epoch": 0.020644430051813472, "grad_norm": 0.3446846306324005, "learning_rate": 9.793758095854922e-05, "loss": 1.6702, "step": 1020 }, { "epoch": 0.02104922279792746, "grad_norm": 0.32707011699676514, "learning_rate": 9.789710168393783e-05, "loss": 1.6734, "step": 1040 }, { "epoch": 0.02145401554404145, "grad_norm": 0.31675225496292114, "learning_rate": 9.785662240932642e-05, "loss": 1.6669, "step": 1060 }, { "epoch": 0.021858808290155442, "grad_norm": 0.31957605481147766, "learning_rate": 9.781614313471503e-05, "loss": 1.6631, "step": 1080 }, { "epoch": 0.02226360103626943, "grad_norm": 0.3194131851196289, "learning_rate": 9.777566386010363e-05, "loss": 1.6697, "step": 1100 }, { "epoch": 0.02266839378238342, "grad_norm": 0.3324158489704132, "learning_rate": 9.773518458549223e-05, "loss": 1.6642, "step": 1120 }, { "epoch": 0.023073186528497408, "grad_norm": 0.3344501554965973, "learning_rate": 9.769470531088083e-05, "loss": 1.66, "step": 1140 }, { "epoch": 0.0234779792746114, "grad_norm": 0.33399927616119385, "learning_rate": 9.765422603626944e-05, "loss": 1.6718, "step": 1160 }, { "epoch": 0.02388277202072539, "grad_norm": 0.3483513593673706, "learning_rate": 9.761374676165803e-05, "loss": 1.6729, "step": 1180 }, { "epoch": 0.024287564766839378, "grad_norm": 0.31099238991737366, "learning_rate": 9.757326748704664e-05, "loss": 1.6686, "step": 1200 }, { "epoch": 0.02469235751295337, "grad_norm": 0.33135154843330383, "learning_rate": 9.753278821243524e-05, "loss": 1.6615, "step": 1220 }, { "epoch": 0.025097150259067356, "grad_norm": 0.33803436160087585, "learning_rate": 9.749230893782384e-05, "loss": 1.6693, "step": 1240 }, { "epoch": 0.025501943005181348, "grad_norm": 0.36377739906311035, "learning_rate": 9.745182966321244e-05, "loss": 1.6643, "step": 1260 }, { "epoch": 0.025906735751295335, "grad_norm": 0.3291517198085785, "learning_rate": 9.741135038860105e-05, "loss": 1.6641, "step": 1280 }, { "epoch": 0.026311528497409326, "grad_norm": 0.3475387394428253, "learning_rate": 9.737087111398964e-05, "loss": 1.6616, "step": 1300 }, { "epoch": 0.026716321243523317, "grad_norm": 0.3039502203464508, "learning_rate": 9.733039183937824e-05, "loss": 1.6611, "step": 1320 }, { "epoch": 0.027121113989637305, "grad_norm": 0.31948477029800415, "learning_rate": 9.728991256476683e-05, "loss": 1.6499, "step": 1340 }, { "epoch": 0.027525906735751296, "grad_norm": 0.3199487328529358, "learning_rate": 9.724943329015544e-05, "loss": 1.6595, "step": 1360 }, { "epoch": 0.027930699481865284, "grad_norm": 0.3055227994918823, "learning_rate": 9.720895401554405e-05, "loss": 1.664, "step": 1380 }, { "epoch": 0.028335492227979275, "grad_norm": 0.36037421226501465, "learning_rate": 9.716847474093264e-05, "loss": 1.666, "step": 1400 }, { "epoch": 0.028740284974093266, "grad_norm": 0.3379644751548767, "learning_rate": 9.712799546632125e-05, "loss": 1.6583, "step": 1420 }, { "epoch": 0.029145077720207253, "grad_norm": 0.33292436599731445, "learning_rate": 9.708751619170985e-05, "loss": 1.6585, "step": 1440 }, { "epoch": 0.029549870466321244, "grad_norm": 0.31199154257774353, "learning_rate": 9.704703691709846e-05, "loss": 1.6592, "step": 1460 }, { "epoch": 0.029954663212435232, "grad_norm": 0.30537131428718567, "learning_rate": 9.700655764248705e-05, "loss": 1.6551, "step": 1480 }, { "epoch": 0.030359455958549223, "grad_norm": 0.31178486347198486, "learning_rate": 9.696607836787566e-05, "loss": 1.6514, "step": 1500 }, { "epoch": 0.03076424870466321, "grad_norm": 0.311338871717453, "learning_rate": 9.692559909326425e-05, "loss": 1.6479, "step": 1520 }, { "epoch": 0.031169041450777202, "grad_norm": 0.3047199547290802, "learning_rate": 9.688511981865286e-05, "loss": 1.6589, "step": 1540 }, { "epoch": 0.03157383419689119, "grad_norm": 0.30759191513061523, "learning_rate": 9.684464054404146e-05, "loss": 1.6513, "step": 1560 }, { "epoch": 0.031978626943005184, "grad_norm": 0.3179994523525238, "learning_rate": 9.680416126943006e-05, "loss": 1.6545, "step": 1580 }, { "epoch": 0.03238341968911917, "grad_norm": 0.3084184527397156, "learning_rate": 9.676368199481866e-05, "loss": 1.6521, "step": 1600 }, { "epoch": 0.03278821243523316, "grad_norm": 0.30535468459129333, "learning_rate": 9.672320272020727e-05, "loss": 1.6486, "step": 1620 }, { "epoch": 0.03319300518134715, "grad_norm": 0.30586951971054077, "learning_rate": 9.668272344559586e-05, "loss": 1.6528, "step": 1640 }, { "epoch": 0.03359779792746114, "grad_norm": 0.30306655168533325, "learning_rate": 9.664224417098446e-05, "loss": 1.6525, "step": 1660 }, { "epoch": 0.03400259067357513, "grad_norm": 0.31174448132514954, "learning_rate": 9.660176489637307e-05, "loss": 1.648, "step": 1680 }, { "epoch": 0.034407383419689117, "grad_norm": 0.29106420278549194, "learning_rate": 9.656128562176166e-05, "loss": 1.6498, "step": 1700 }, { "epoch": 0.03481217616580311, "grad_norm": 0.29478907585144043, "learning_rate": 9.652080634715026e-05, "loss": 1.6525, "step": 1720 }, { "epoch": 0.0352169689119171, "grad_norm": 0.3267880380153656, "learning_rate": 9.648032707253886e-05, "loss": 1.6534, "step": 1740 }, { "epoch": 0.03562176165803109, "grad_norm": 0.30427759885787964, "learning_rate": 9.643984779792746e-05, "loss": 1.6506, "step": 1760 }, { "epoch": 0.03602655440414508, "grad_norm": 0.29657241702079773, "learning_rate": 9.639936852331607e-05, "loss": 1.648, "step": 1780 }, { "epoch": 0.036431347150259065, "grad_norm": 0.29255980253219604, "learning_rate": 9.635888924870466e-05, "loss": 1.6488, "step": 1800 }, { "epoch": 0.036836139896373056, "grad_norm": 0.3029814064502716, "learning_rate": 9.631840997409327e-05, "loss": 1.6473, "step": 1820 }, { "epoch": 0.03724093264248705, "grad_norm": 0.30492427945137024, "learning_rate": 9.627793069948187e-05, "loss": 1.6465, "step": 1840 }, { "epoch": 0.03764572538860104, "grad_norm": 0.2815641760826111, "learning_rate": 9.623745142487047e-05, "loss": 1.6445, "step": 1860 }, { "epoch": 0.03805051813471503, "grad_norm": 0.31647318601608276, "learning_rate": 9.619697215025907e-05, "loss": 1.644, "step": 1880 }, { "epoch": 0.03845531088082901, "grad_norm": 0.39855894446372986, "learning_rate": 9.615649287564768e-05, "loss": 1.6457, "step": 1900 }, { "epoch": 0.038860103626943004, "grad_norm": 0.30423423647880554, "learning_rate": 9.611601360103627e-05, "loss": 1.6405, "step": 1920 }, { "epoch": 0.039264896373056996, "grad_norm": 0.31380829215049744, "learning_rate": 9.607553432642488e-05, "loss": 1.6447, "step": 1940 }, { "epoch": 0.03966968911917099, "grad_norm": 0.3169679641723633, "learning_rate": 9.603505505181347e-05, "loss": 1.6444, "step": 1960 }, { "epoch": 0.04007448186528497, "grad_norm": 0.3010060489177704, "learning_rate": 9.599457577720208e-05, "loss": 1.6508, "step": 1980 }, { "epoch": 0.04047927461139896, "grad_norm": 0.3122701644897461, "learning_rate": 9.595409650259068e-05, "loss": 1.6452, "step": 2000 }, { "epoch": 0.04088406735751295, "grad_norm": 0.3323874771595001, "learning_rate": 9.591361722797929e-05, "loss": 1.6457, "step": 2020 }, { "epoch": 0.041288860103626944, "grad_norm": 0.3105747401714325, "learning_rate": 9.587313795336788e-05, "loss": 1.6438, "step": 2040 }, { "epoch": 0.041693652849740935, "grad_norm": 0.30348384380340576, "learning_rate": 9.583265867875648e-05, "loss": 1.6385, "step": 2060 }, { "epoch": 0.04209844559585492, "grad_norm": 0.31071311235427856, "learning_rate": 9.579217940414507e-05, "loss": 1.6423, "step": 2080 }, { "epoch": 0.04250323834196891, "grad_norm": 0.3041668236255646, "learning_rate": 9.575170012953368e-05, "loss": 1.6477, "step": 2100 }, { "epoch": 0.0429080310880829, "grad_norm": 0.3035949170589447, "learning_rate": 9.571122085492227e-05, "loss": 1.6346, "step": 2120 }, { "epoch": 0.04331282383419689, "grad_norm": 0.29684510827064514, "learning_rate": 9.567074158031088e-05, "loss": 1.6402, "step": 2140 }, { "epoch": 0.043717616580310883, "grad_norm": 0.2991543114185333, "learning_rate": 9.563026230569949e-05, "loss": 1.6406, "step": 2160 }, { "epoch": 0.04412240932642487, "grad_norm": 0.28683456778526306, "learning_rate": 9.558978303108809e-05, "loss": 1.6386, "step": 2180 }, { "epoch": 0.04452720207253886, "grad_norm": 0.28657013177871704, "learning_rate": 9.55493037564767e-05, "loss": 1.6362, "step": 2200 }, { "epoch": 0.04493199481865285, "grad_norm": 0.29187464714050293, "learning_rate": 9.550882448186529e-05, "loss": 1.6411, "step": 2220 }, { "epoch": 0.04533678756476684, "grad_norm": 0.31265419721603394, "learning_rate": 9.54683452072539e-05, "loss": 1.6387, "step": 2240 }, { "epoch": 0.04574158031088083, "grad_norm": 0.32003772258758545, "learning_rate": 9.542786593264249e-05, "loss": 1.6353, "step": 2260 }, { "epoch": 0.046146373056994816, "grad_norm": 0.28650808334350586, "learning_rate": 9.53873866580311e-05, "loss": 1.6357, "step": 2280 }, { "epoch": 0.04655116580310881, "grad_norm": 0.30126532912254333, "learning_rate": 9.53469073834197e-05, "loss": 1.6365, "step": 2300 }, { "epoch": 0.0469559585492228, "grad_norm": 0.28478938341140747, "learning_rate": 9.53064281088083e-05, "loss": 1.632, "step": 2320 }, { "epoch": 0.04736075129533679, "grad_norm": 0.33386704325675964, "learning_rate": 9.52659488341969e-05, "loss": 1.6333, "step": 2340 }, { "epoch": 0.04776554404145078, "grad_norm": 0.2902257740497589, "learning_rate": 9.52254695595855e-05, "loss": 1.6363, "step": 2360 }, { "epoch": 0.048170336787564765, "grad_norm": 0.27843865752220154, "learning_rate": 9.51849902849741e-05, "loss": 1.6374, "step": 2380 }, { "epoch": 0.048575129533678756, "grad_norm": 0.294043630361557, "learning_rate": 9.51445110103627e-05, "loss": 1.6419, "step": 2400 }, { "epoch": 0.04897992227979275, "grad_norm": 0.3161308467388153, "learning_rate": 9.510403173575129e-05, "loss": 1.6393, "step": 2420 }, { "epoch": 0.04938471502590674, "grad_norm": 0.31150925159454346, "learning_rate": 9.50635524611399e-05, "loss": 1.6266, "step": 2440 }, { "epoch": 0.04978950777202073, "grad_norm": 0.2863672971725464, "learning_rate": 9.50230731865285e-05, "loss": 1.6282, "step": 2460 }, { "epoch": 0.05019430051813471, "grad_norm": 0.31472674012184143, "learning_rate": 9.49825939119171e-05, "loss": 1.6392, "step": 2480 }, { "epoch": 0.050599093264248704, "grad_norm": 0.2932111918926239, "learning_rate": 9.49421146373057e-05, "loss": 1.6387, "step": 2500 }, { "epoch": 0.051003886010362695, "grad_norm": 0.2922670245170593, "learning_rate": 9.49016353626943e-05, "loss": 1.6265, "step": 2520 }, { "epoch": 0.051408678756476686, "grad_norm": 0.34639719128608704, "learning_rate": 9.48611560880829e-05, "loss": 1.6331, "step": 2540 }, { "epoch": 0.05181347150259067, "grad_norm": 0.27828654646873474, "learning_rate": 9.482067681347151e-05, "loss": 1.6281, "step": 2560 }, { "epoch": 0.05221826424870466, "grad_norm": 0.2839568853378296, "learning_rate": 9.47801975388601e-05, "loss": 1.6388, "step": 2580 }, { "epoch": 0.05262305699481865, "grad_norm": 0.3215753734111786, "learning_rate": 9.473971826424871e-05, "loss": 1.6293, "step": 2600 }, { "epoch": 0.053027849740932644, "grad_norm": 0.3079935610294342, "learning_rate": 9.469923898963731e-05, "loss": 1.6352, "step": 2620 }, { "epoch": 0.053432642487046635, "grad_norm": 0.2840258777141571, "learning_rate": 9.465875971502592e-05, "loss": 1.6273, "step": 2640 }, { "epoch": 0.05383743523316062, "grad_norm": 0.2912546396255493, "learning_rate": 9.461828044041451e-05, "loss": 1.6365, "step": 2660 }, { "epoch": 0.05424222797927461, "grad_norm": 0.2899230122566223, "learning_rate": 9.457780116580312e-05, "loss": 1.6261, "step": 2680 }, { "epoch": 0.0546470207253886, "grad_norm": 0.2922183871269226, "learning_rate": 9.453732189119171e-05, "loss": 1.6325, "step": 2700 }, { "epoch": 0.05505181347150259, "grad_norm": 0.29106026887893677, "learning_rate": 9.449684261658032e-05, "loss": 1.6337, "step": 2720 }, { "epoch": 0.05545660621761658, "grad_norm": 0.287082314491272, "learning_rate": 9.445636334196892e-05, "loss": 1.6357, "step": 2740 }, { "epoch": 0.05586139896373057, "grad_norm": 0.2769261598587036, "learning_rate": 9.441588406735752e-05, "loss": 1.633, "step": 2760 }, { "epoch": 0.05626619170984456, "grad_norm": 0.29068219661712646, "learning_rate": 9.437540479274612e-05, "loss": 1.6248, "step": 2780 }, { "epoch": 0.05667098445595855, "grad_norm": 0.3079804480075836, "learning_rate": 9.433492551813471e-05, "loss": 1.6344, "step": 2800 }, { "epoch": 0.05707577720207254, "grad_norm": 0.2883777916431427, "learning_rate": 9.429444624352331e-05, "loss": 1.6309, "step": 2820 }, { "epoch": 0.05748056994818653, "grad_norm": 0.28808510303497314, "learning_rate": 9.425396696891192e-05, "loss": 1.6242, "step": 2840 }, { "epoch": 0.057885362694300516, "grad_norm": 0.28887176513671875, "learning_rate": 9.421348769430051e-05, "loss": 1.6361, "step": 2860 }, { "epoch": 0.05829015544041451, "grad_norm": 0.27448683977127075, "learning_rate": 9.417300841968912e-05, "loss": 1.6313, "step": 2880 }, { "epoch": 0.0586949481865285, "grad_norm": 0.2936413288116455, "learning_rate": 9.413252914507773e-05, "loss": 1.6276, "step": 2900 }, { "epoch": 0.05909974093264249, "grad_norm": 0.2889741063117981, "learning_rate": 9.409204987046632e-05, "loss": 1.6247, "step": 2920 }, { "epoch": 0.05950453367875648, "grad_norm": 0.29733484983444214, "learning_rate": 9.405157059585493e-05, "loss": 1.6306, "step": 2940 }, { "epoch": 0.059909326424870464, "grad_norm": 0.27515995502471924, "learning_rate": 9.401109132124353e-05, "loss": 1.632, "step": 2960 }, { "epoch": 0.060314119170984455, "grad_norm": 0.2737637162208557, "learning_rate": 9.397061204663214e-05, "loss": 1.6316, "step": 2980 }, { "epoch": 0.060718911917098446, "grad_norm": 0.3020285367965698, "learning_rate": 9.393013277202073e-05, "loss": 1.6219, "step": 3000 }, { "epoch": 0.06112370466321244, "grad_norm": 0.29369768500328064, "learning_rate": 9.388965349740934e-05, "loss": 1.6246, "step": 3020 }, { "epoch": 0.06152849740932642, "grad_norm": 0.28206026554107666, "learning_rate": 9.384917422279793e-05, "loss": 1.6229, "step": 3040 }, { "epoch": 0.06193329015544041, "grad_norm": 0.3169432580471039, "learning_rate": 9.380869494818654e-05, "loss": 1.6276, "step": 3060 }, { "epoch": 0.062338082901554404, "grad_norm": 0.277768611907959, "learning_rate": 9.376821567357514e-05, "loss": 1.6297, "step": 3080 }, { "epoch": 0.0627428756476684, "grad_norm": 0.2765139937400818, "learning_rate": 9.372773639896375e-05, "loss": 1.6258, "step": 3100 }, { "epoch": 0.06314766839378239, "grad_norm": 0.29441556334495544, "learning_rate": 9.368725712435234e-05, "loss": 1.625, "step": 3120 }, { "epoch": 0.06355246113989638, "grad_norm": 0.2726493775844574, "learning_rate": 9.364677784974094e-05, "loss": 1.6265, "step": 3140 }, { "epoch": 0.06395725388601037, "grad_norm": 0.27458056807518005, "learning_rate": 9.360629857512953e-05, "loss": 1.6319, "step": 3160 }, { "epoch": 0.06436204663212436, "grad_norm": 0.30123719573020935, "learning_rate": 9.356581930051814e-05, "loss": 1.6322, "step": 3180 }, { "epoch": 0.06476683937823834, "grad_norm": 0.2971813976764679, "learning_rate": 9.352534002590673e-05, "loss": 1.6239, "step": 3200 }, { "epoch": 0.06517163212435233, "grad_norm": 0.2962792217731476, "learning_rate": 9.348486075129534e-05, "loss": 1.6325, "step": 3220 }, { "epoch": 0.06557642487046632, "grad_norm": 0.314919650554657, "learning_rate": 9.344438147668394e-05, "loss": 1.6188, "step": 3240 }, { "epoch": 0.06598121761658031, "grad_norm": 0.28388074040412903, "learning_rate": 9.340390220207254e-05, "loss": 1.6261, "step": 3260 }, { "epoch": 0.0663860103626943, "grad_norm": 0.2844889461994171, "learning_rate": 9.336342292746114e-05, "loss": 1.6294, "step": 3280 }, { "epoch": 0.06679080310880829, "grad_norm": 0.2784148156642914, "learning_rate": 9.332294365284975e-05, "loss": 1.6195, "step": 3300 }, { "epoch": 0.06719559585492228, "grad_norm": 0.2885282337665558, "learning_rate": 9.328246437823834e-05, "loss": 1.6305, "step": 3320 }, { "epoch": 0.06760038860103627, "grad_norm": 0.2740572988986969, "learning_rate": 9.324198510362695e-05, "loss": 1.6197, "step": 3340 }, { "epoch": 0.06800518134715026, "grad_norm": 0.27449172735214233, "learning_rate": 9.320150582901555e-05, "loss": 1.6293, "step": 3360 }, { "epoch": 0.06840997409326424, "grad_norm": 0.27773913741111755, "learning_rate": 9.316102655440415e-05, "loss": 1.6214, "step": 3380 }, { "epoch": 0.06881476683937823, "grad_norm": 0.2885377109050751, "learning_rate": 9.312054727979275e-05, "loss": 1.6197, "step": 3400 }, { "epoch": 0.06921955958549222, "grad_norm": 0.2858436703681946, "learning_rate": 9.308006800518136e-05, "loss": 1.6275, "step": 3420 }, { "epoch": 0.06962435233160622, "grad_norm": 0.27571892738342285, "learning_rate": 9.303958873056995e-05, "loss": 1.6213, "step": 3440 }, { "epoch": 0.0700291450777202, "grad_norm": 0.28904303908348083, "learning_rate": 9.299910945595856e-05, "loss": 1.6201, "step": 3460 }, { "epoch": 0.0704339378238342, "grad_norm": 0.3249903917312622, "learning_rate": 9.295863018134716e-05, "loss": 1.6294, "step": 3480 }, { "epoch": 0.07083873056994819, "grad_norm": 0.28692933917045593, "learning_rate": 9.291815090673575e-05, "loss": 1.6239, "step": 3500 }, { "epoch": 0.07124352331606218, "grad_norm": 0.2774125933647156, "learning_rate": 9.287767163212436e-05, "loss": 1.6146, "step": 3520 }, { "epoch": 0.07164831606217617, "grad_norm": 0.2777795195579529, "learning_rate": 9.283719235751295e-05, "loss": 1.6231, "step": 3540 }, { "epoch": 0.07205310880829016, "grad_norm": 0.2956138849258423, "learning_rate": 9.279671308290155e-05, "loss": 1.6231, "step": 3560 }, { "epoch": 0.07245790155440414, "grad_norm": 0.2783866226673126, "learning_rate": 9.275623380829016e-05, "loss": 1.6192, "step": 3580 }, { "epoch": 0.07286269430051813, "grad_norm": 0.27538731694221497, "learning_rate": 9.271575453367875e-05, "loss": 1.6247, "step": 3600 }, { "epoch": 0.07326748704663212, "grad_norm": 0.28058475255966187, "learning_rate": 9.267527525906736e-05, "loss": 1.6208, "step": 3620 }, { "epoch": 0.07367227979274611, "grad_norm": 0.26387903094291687, "learning_rate": 9.263479598445595e-05, "loss": 1.6189, "step": 3640 }, { "epoch": 0.0740770725388601, "grad_norm": 0.2765548527240753, "learning_rate": 9.259431670984456e-05, "loss": 1.6282, "step": 3660 }, { "epoch": 0.0744818652849741, "grad_norm": 0.2841934561729431, "learning_rate": 9.255383743523317e-05, "loss": 1.6122, "step": 3680 }, { "epoch": 0.07488665803108809, "grad_norm": 0.2959246039390564, "learning_rate": 9.251335816062177e-05, "loss": 1.623, "step": 3700 }, { "epoch": 0.07529145077720208, "grad_norm": 0.2673608660697937, "learning_rate": 9.247287888601037e-05, "loss": 1.6101, "step": 3720 }, { "epoch": 0.07569624352331607, "grad_norm": 0.28112488985061646, "learning_rate": 9.243239961139897e-05, "loss": 1.6225, "step": 3740 }, { "epoch": 0.07610103626943006, "grad_norm": 0.2903779447078705, "learning_rate": 9.239192033678758e-05, "loss": 1.6254, "step": 3760 }, { "epoch": 0.07650582901554404, "grad_norm": 0.2653449773788452, "learning_rate": 9.235144106217617e-05, "loss": 1.6126, "step": 3780 }, { "epoch": 0.07691062176165803, "grad_norm": 0.2796909511089325, "learning_rate": 9.231096178756478e-05, "loss": 1.6236, "step": 3800 }, { "epoch": 0.07731541450777202, "grad_norm": 0.284966379404068, "learning_rate": 9.227048251295338e-05, "loss": 1.6108, "step": 3820 }, { "epoch": 0.07772020725388601, "grad_norm": 0.27886250615119934, "learning_rate": 9.223000323834198e-05, "loss": 1.6271, "step": 3840 }, { "epoch": 0.078125, "grad_norm": 0.2722945809364319, "learning_rate": 9.218952396373058e-05, "loss": 1.6111, "step": 3860 }, { "epoch": 0.07852979274611399, "grad_norm": 0.29298532009124756, "learning_rate": 9.214904468911917e-05, "loss": 1.6165, "step": 3880 }, { "epoch": 0.07893458549222798, "grad_norm": 0.27712517976760864, "learning_rate": 9.210856541450777e-05, "loss": 1.62, "step": 3900 }, { "epoch": 0.07933937823834197, "grad_norm": 0.280352383852005, "learning_rate": 9.206808613989638e-05, "loss": 1.6209, "step": 3920 }, { "epoch": 0.07974417098445596, "grad_norm": 0.26598048210144043, "learning_rate": 9.202760686528497e-05, "loss": 1.6128, "step": 3940 }, { "epoch": 0.08014896373056994, "grad_norm": 0.2670834958553314, "learning_rate": 9.198712759067358e-05, "loss": 1.6195, "step": 3960 }, { "epoch": 0.08055375647668393, "grad_norm": 0.29801610112190247, "learning_rate": 9.194664831606217e-05, "loss": 1.6226, "step": 3980 }, { "epoch": 0.08095854922279792, "grad_norm": 0.28279098868370056, "learning_rate": 9.190616904145078e-05, "loss": 1.6174, "step": 4000 }, { "epoch": 0.08136334196891191, "grad_norm": 0.26549097895622253, "learning_rate": 9.186568976683938e-05, "loss": 1.618, "step": 4020 }, { "epoch": 0.0817681347150259, "grad_norm": 0.28351449966430664, "learning_rate": 9.182521049222799e-05, "loss": 1.6106, "step": 4040 }, { "epoch": 0.0821729274611399, "grad_norm": 0.2808610200881958, "learning_rate": 9.178473121761658e-05, "loss": 1.6088, "step": 4060 }, { "epoch": 0.08257772020725389, "grad_norm": 0.2753288149833679, "learning_rate": 9.174425194300519e-05, "loss": 1.6218, "step": 4080 }, { "epoch": 0.08298251295336788, "grad_norm": 0.27658000588417053, "learning_rate": 9.170377266839378e-05, "loss": 1.6126, "step": 4100 }, { "epoch": 0.08338730569948187, "grad_norm": 0.2800763249397278, "learning_rate": 9.166329339378239e-05, "loss": 1.6086, "step": 4120 }, { "epoch": 0.08379209844559586, "grad_norm": 0.27390316128730774, "learning_rate": 9.162281411917099e-05, "loss": 1.6144, "step": 4140 }, { "epoch": 0.08419689119170984, "grad_norm": 0.269971638917923, "learning_rate": 9.15823348445596e-05, "loss": 1.6175, "step": 4160 }, { "epoch": 0.08460168393782383, "grad_norm": 0.276216983795166, "learning_rate": 9.154185556994819e-05, "loss": 1.6154, "step": 4180 }, { "epoch": 0.08500647668393782, "grad_norm": 0.28099966049194336, "learning_rate": 9.15013762953368e-05, "loss": 1.6145, "step": 4200 }, { "epoch": 0.08541126943005181, "grad_norm": 0.2742680609226227, "learning_rate": 9.14608970207254e-05, "loss": 1.6091, "step": 4220 }, { "epoch": 0.0858160621761658, "grad_norm": 0.27846774458885193, "learning_rate": 9.142041774611399e-05, "loss": 1.6162, "step": 4240 }, { "epoch": 0.0862208549222798, "grad_norm": 0.26901182532310486, "learning_rate": 9.13799384715026e-05, "loss": 1.6101, "step": 4260 }, { "epoch": 0.08662564766839378, "grad_norm": 0.27751854062080383, "learning_rate": 9.133945919689119e-05, "loss": 1.6142, "step": 4280 }, { "epoch": 0.08703044041450778, "grad_norm": 0.26768845319747925, "learning_rate": 9.129897992227979e-05, "loss": 1.6196, "step": 4300 }, { "epoch": 0.08743523316062177, "grad_norm": 0.26729562878608704, "learning_rate": 9.12585006476684e-05, "loss": 1.6164, "step": 4320 }, { "epoch": 0.08784002590673576, "grad_norm": 0.2670207917690277, "learning_rate": 9.121802137305699e-05, "loss": 1.6223, "step": 4340 }, { "epoch": 0.08824481865284974, "grad_norm": 0.2760709822177887, "learning_rate": 9.11775420984456e-05, "loss": 1.6121, "step": 4360 }, { "epoch": 0.08864961139896373, "grad_norm": 0.27203139662742615, "learning_rate": 9.11370628238342e-05, "loss": 1.6184, "step": 4380 }, { "epoch": 0.08905440414507772, "grad_norm": 0.26826927065849304, "learning_rate": 9.10965835492228e-05, "loss": 1.6108, "step": 4400 }, { "epoch": 0.08945919689119171, "grad_norm": 0.2661951780319214, "learning_rate": 9.10561042746114e-05, "loss": 1.619, "step": 4420 }, { "epoch": 0.0898639896373057, "grad_norm": 0.2781335711479187, "learning_rate": 9.1015625e-05, "loss": 1.6032, "step": 4440 }, { "epoch": 0.09026878238341969, "grad_norm": 0.25924572348594666, "learning_rate": 9.097514572538861e-05, "loss": 1.6162, "step": 4460 }, { "epoch": 0.09067357512953368, "grad_norm": 0.27154040336608887, "learning_rate": 9.093466645077721e-05, "loss": 1.6134, "step": 4480 }, { "epoch": 0.09107836787564767, "grad_norm": 0.2862963080406189, "learning_rate": 9.089418717616582e-05, "loss": 1.6058, "step": 4500 }, { "epoch": 0.09148316062176166, "grad_norm": 0.3281325399875641, "learning_rate": 9.085370790155441e-05, "loss": 1.617, "step": 4520 }, { "epoch": 0.09188795336787564, "grad_norm": 0.2645701467990875, "learning_rate": 9.081322862694302e-05, "loss": 1.6229, "step": 4540 }, { "epoch": 0.09229274611398963, "grad_norm": 0.2588047981262207, "learning_rate": 9.077274935233161e-05, "loss": 1.6189, "step": 4560 }, { "epoch": 0.09269753886010362, "grad_norm": 0.27967512607574463, "learning_rate": 9.073227007772022e-05, "loss": 1.6073, "step": 4580 }, { "epoch": 0.09310233160621761, "grad_norm": 0.2676015794277191, "learning_rate": 9.069179080310882e-05, "loss": 1.6064, "step": 4600 }, { "epoch": 0.0935071243523316, "grad_norm": 0.2819591164588928, "learning_rate": 9.065131152849741e-05, "loss": 1.6118, "step": 4620 }, { "epoch": 0.0939119170984456, "grad_norm": 0.2657508850097656, "learning_rate": 9.061083225388601e-05, "loss": 1.6142, "step": 4640 }, { "epoch": 0.09431670984455959, "grad_norm": 0.2876991927623749, "learning_rate": 9.057035297927462e-05, "loss": 1.6137, "step": 4660 }, { "epoch": 0.09472150259067358, "grad_norm": 0.273978590965271, "learning_rate": 9.052987370466321e-05, "loss": 1.6117, "step": 4680 }, { "epoch": 0.09512629533678757, "grad_norm": 0.26277440786361694, "learning_rate": 9.048939443005182e-05, "loss": 1.6194, "step": 4700 }, { "epoch": 0.09553108808290156, "grad_norm": 0.27823150157928467, "learning_rate": 9.044891515544041e-05, "loss": 1.6061, "step": 4720 }, { "epoch": 0.09593588082901554, "grad_norm": 0.2765551209449768, "learning_rate": 9.040843588082902e-05, "loss": 1.6131, "step": 4740 }, { "epoch": 0.09634067357512953, "grad_norm": 0.27406424283981323, "learning_rate": 9.036795660621762e-05, "loss": 1.623, "step": 4760 }, { "epoch": 0.09674546632124352, "grad_norm": 0.2836116552352905, "learning_rate": 9.032747733160623e-05, "loss": 1.6092, "step": 4780 }, { "epoch": 0.09715025906735751, "grad_norm": 0.2735360860824585, "learning_rate": 9.028699805699482e-05, "loss": 1.6135, "step": 4800 }, { "epoch": 0.0975550518134715, "grad_norm": 0.2815845012664795, "learning_rate": 9.024651878238343e-05, "loss": 1.6064, "step": 4820 }, { "epoch": 0.0979598445595855, "grad_norm": 0.25892502069473267, "learning_rate": 9.020603950777202e-05, "loss": 1.6038, "step": 4840 }, { "epoch": 0.09836463730569948, "grad_norm": 0.2765268385410309, "learning_rate": 9.016556023316063e-05, "loss": 1.6073, "step": 4860 }, { "epoch": 0.09876943005181348, "grad_norm": 0.259694367647171, "learning_rate": 9.012508095854923e-05, "loss": 1.6086, "step": 4880 }, { "epoch": 0.09917422279792747, "grad_norm": 0.26812949776649475, "learning_rate": 9.008460168393783e-05, "loss": 1.6141, "step": 4900 }, { "epoch": 0.09957901554404146, "grad_norm": 0.2639097571372986, "learning_rate": 9.004412240932643e-05, "loss": 1.6175, "step": 4920 }, { "epoch": 0.09998380829015543, "grad_norm": 0.27562886476516724, "learning_rate": 9.000364313471504e-05, "loss": 1.605, "step": 4940 }, { "epoch": 0.10038860103626943, "grad_norm": 0.26413583755493164, "learning_rate": 8.996316386010363e-05, "loss": 1.6114, "step": 4960 }, { "epoch": 0.10079339378238342, "grad_norm": 0.2925732433795929, "learning_rate": 8.992268458549223e-05, "loss": 1.6144, "step": 4980 }, { "epoch": 0.10119818652849741, "grad_norm": 0.27266770601272583, "learning_rate": 8.988220531088082e-05, "loss": 1.6072, "step": 5000 }, { "epoch": 0.1016029792746114, "grad_norm": 0.25890401005744934, "learning_rate": 8.984172603626943e-05, "loss": 1.6112, "step": 5020 }, { "epoch": 0.10200777202072539, "grad_norm": 0.268765389919281, "learning_rate": 8.980124676165803e-05, "loss": 1.611, "step": 5040 }, { "epoch": 0.10241256476683938, "grad_norm": 0.27027228474617004, "learning_rate": 8.976076748704663e-05, "loss": 1.608, "step": 5060 }, { "epoch": 0.10281735751295337, "grad_norm": 0.2634071111679077, "learning_rate": 8.972028821243523e-05, "loss": 1.6063, "step": 5080 }, { "epoch": 0.10322215025906736, "grad_norm": 0.26351749897003174, "learning_rate": 8.967980893782384e-05, "loss": 1.6089, "step": 5100 }, { "epoch": 0.10362694300518134, "grad_norm": 0.26906198263168335, "learning_rate": 8.963932966321243e-05, "loss": 1.6099, "step": 5120 }, { "epoch": 0.10403173575129533, "grad_norm": 0.2594146430492401, "learning_rate": 8.959885038860104e-05, "loss": 1.6146, "step": 5140 }, { "epoch": 0.10443652849740932, "grad_norm": 0.3110468089580536, "learning_rate": 8.955837111398964e-05, "loss": 1.6015, "step": 5160 }, { "epoch": 0.10484132124352331, "grad_norm": 0.29078271985054016, "learning_rate": 8.951789183937824e-05, "loss": 1.6179, "step": 5180 }, { "epoch": 0.1052461139896373, "grad_norm": 0.27192422747612, "learning_rate": 8.947741256476684e-05, "loss": 1.6152, "step": 5200 }, { "epoch": 0.1056509067357513, "grad_norm": 0.2628384232521057, "learning_rate": 8.943693329015545e-05, "loss": 1.607, "step": 5220 }, { "epoch": 0.10605569948186529, "grad_norm": 0.2642107903957367, "learning_rate": 8.939645401554406e-05, "loss": 1.6076, "step": 5240 }, { "epoch": 0.10646049222797928, "grad_norm": 0.2692490518093109, "learning_rate": 8.935597474093265e-05, "loss": 1.609, "step": 5260 }, { "epoch": 0.10686528497409327, "grad_norm": 0.27303391695022583, "learning_rate": 8.931549546632126e-05, "loss": 1.6109, "step": 5280 }, { "epoch": 0.10727007772020726, "grad_norm": 0.26163583993911743, "learning_rate": 8.927501619170985e-05, "loss": 1.6023, "step": 5300 }, { "epoch": 0.10767487046632124, "grad_norm": 0.2833383083343506, "learning_rate": 8.923453691709845e-05, "loss": 1.6083, "step": 5320 }, { "epoch": 0.10807966321243523, "grad_norm": 0.2624320089817047, "learning_rate": 8.919405764248706e-05, "loss": 1.6118, "step": 5340 }, { "epoch": 0.10848445595854922, "grad_norm": 0.267648309469223, "learning_rate": 8.915357836787565e-05, "loss": 1.6014, "step": 5360 }, { "epoch": 0.10888924870466321, "grad_norm": 0.25757792592048645, "learning_rate": 8.911309909326425e-05, "loss": 1.6146, "step": 5380 }, { "epoch": 0.1092940414507772, "grad_norm": 0.2680610418319702, "learning_rate": 8.907261981865285e-05, "loss": 1.6123, "step": 5400 }, { "epoch": 0.10969883419689119, "grad_norm": 0.2672348916530609, "learning_rate": 8.903214054404145e-05, "loss": 1.6126, "step": 5420 }, { "epoch": 0.11010362694300518, "grad_norm": 0.2596403956413269, "learning_rate": 8.899166126943006e-05, "loss": 1.6136, "step": 5440 }, { "epoch": 0.11050841968911918, "grad_norm": 0.2697707712650299, "learning_rate": 8.895118199481865e-05, "loss": 1.599, "step": 5460 }, { "epoch": 0.11091321243523317, "grad_norm": 0.2614070475101471, "learning_rate": 8.891070272020726e-05, "loss": 1.6138, "step": 5480 }, { "epoch": 0.11131800518134716, "grad_norm": 0.2745078206062317, "learning_rate": 8.887022344559586e-05, "loss": 1.61, "step": 5500 }, { "epoch": 0.11172279792746113, "grad_norm": 0.27104851603507996, "learning_rate": 8.882974417098446e-05, "loss": 1.6107, "step": 5520 }, { "epoch": 0.11212759067357513, "grad_norm": 0.26507166028022766, "learning_rate": 8.878926489637306e-05, "loss": 1.6102, "step": 5540 }, { "epoch": 0.11253238341968912, "grad_norm": 0.27362266182899475, "learning_rate": 8.874878562176167e-05, "loss": 1.6148, "step": 5560 }, { "epoch": 0.11293717616580311, "grad_norm": 0.26811984181404114, "learning_rate": 8.870830634715026e-05, "loss": 1.6017, "step": 5580 }, { "epoch": 0.1133419689119171, "grad_norm": 0.2647969722747803, "learning_rate": 8.866782707253887e-05, "loss": 1.6107, "step": 5600 }, { "epoch": 0.11374676165803109, "grad_norm": 0.27504459023475647, "learning_rate": 8.862734779792747e-05, "loss": 1.6078, "step": 5620 }, { "epoch": 0.11415155440414508, "grad_norm": 0.2890450060367584, "learning_rate": 8.858686852331607e-05, "loss": 1.6012, "step": 5640 }, { "epoch": 0.11455634715025907, "grad_norm": 0.261402428150177, "learning_rate": 8.854638924870467e-05, "loss": 1.6071, "step": 5660 }, { "epoch": 0.11496113989637306, "grad_norm": 0.25921958684921265, "learning_rate": 8.850590997409328e-05, "loss": 1.6018, "step": 5680 }, { "epoch": 0.11536593264248704, "grad_norm": 0.2773749530315399, "learning_rate": 8.846543069948187e-05, "loss": 1.6119, "step": 5700 }, { "epoch": 0.11577072538860103, "grad_norm": 0.2655571699142456, "learning_rate": 8.842495142487047e-05, "loss": 1.6045, "step": 5720 }, { "epoch": 0.11617551813471502, "grad_norm": 0.29013592004776, "learning_rate": 8.838447215025906e-05, "loss": 1.6074, "step": 5740 }, { "epoch": 0.11658031088082901, "grad_norm": 0.2719479501247406, "learning_rate": 8.834399287564767e-05, "loss": 1.6085, "step": 5760 }, { "epoch": 0.116985103626943, "grad_norm": 0.26245880126953125, "learning_rate": 8.830351360103626e-05, "loss": 1.5951, "step": 5780 }, { "epoch": 0.117389896373057, "grad_norm": 0.2847375273704529, "learning_rate": 8.826303432642487e-05, "loss": 1.6102, "step": 5800 }, { "epoch": 0.11779468911917099, "grad_norm": 0.2561042606830597, "learning_rate": 8.822255505181347e-05, "loss": 1.6066, "step": 5820 }, { "epoch": 0.11819948186528498, "grad_norm": 0.25724905729293823, "learning_rate": 8.818207577720208e-05, "loss": 1.6136, "step": 5840 }, { "epoch": 0.11860427461139897, "grad_norm": 0.2615273594856262, "learning_rate": 8.814159650259067e-05, "loss": 1.6095, "step": 5860 }, { "epoch": 0.11900906735751296, "grad_norm": 0.2703826129436493, "learning_rate": 8.810111722797928e-05, "loss": 1.6006, "step": 5880 }, { "epoch": 0.11941386010362694, "grad_norm": 0.263546884059906, "learning_rate": 8.806063795336787e-05, "loss": 1.6145, "step": 5900 }, { "epoch": 0.11981865284974093, "grad_norm": 0.26277732849121094, "learning_rate": 8.802015867875648e-05, "loss": 1.6069, "step": 5920 }, { "epoch": 0.12022344559585492, "grad_norm": 0.26237738132476807, "learning_rate": 8.797967940414508e-05, "loss": 1.6041, "step": 5940 }, { "epoch": 0.12062823834196891, "grad_norm": 0.26104891300201416, "learning_rate": 8.793920012953369e-05, "loss": 1.6132, "step": 5960 }, { "epoch": 0.1210330310880829, "grad_norm": 0.2681528925895691, "learning_rate": 8.789872085492228e-05, "loss": 1.6088, "step": 5980 }, { "epoch": 0.12143782383419689, "grad_norm": 0.2616095542907715, "learning_rate": 8.785824158031089e-05, "loss": 1.6112, "step": 6000 }, { "epoch": 0.12184261658031088, "grad_norm": 0.27907875180244446, "learning_rate": 8.78177623056995e-05, "loss": 1.6032, "step": 6020 }, { "epoch": 0.12224740932642487, "grad_norm": 0.26161473989486694, "learning_rate": 8.777728303108809e-05, "loss": 1.6037, "step": 6040 }, { "epoch": 0.12265220207253887, "grad_norm": 0.25914013385772705, "learning_rate": 8.773680375647669e-05, "loss": 1.6052, "step": 6060 }, { "epoch": 0.12305699481865284, "grad_norm": 0.26995646953582764, "learning_rate": 8.76963244818653e-05, "loss": 1.6113, "step": 6080 }, { "epoch": 0.12346178756476683, "grad_norm": 0.2624645531177521, "learning_rate": 8.765584520725389e-05, "loss": 1.6059, "step": 6100 }, { "epoch": 0.12386658031088082, "grad_norm": 0.25899842381477356, "learning_rate": 8.761536593264248e-05, "loss": 1.6037, "step": 6120 }, { "epoch": 0.12427137305699482, "grad_norm": 0.2654050886631012, "learning_rate": 8.75748866580311e-05, "loss": 1.6146, "step": 6140 }, { "epoch": 0.12467616580310881, "grad_norm": 0.26424846053123474, "learning_rate": 8.753440738341969e-05, "loss": 1.6015, "step": 6160 }, { "epoch": 0.1250809585492228, "grad_norm": 0.2507600784301758, "learning_rate": 8.74939281088083e-05, "loss": 1.6026, "step": 6180 }, { "epoch": 0.1254857512953368, "grad_norm": 0.2702467441558838, "learning_rate": 8.745344883419689e-05, "loss": 1.6098, "step": 6200 }, { "epoch": 0.12589054404145078, "grad_norm": 0.2667440176010132, "learning_rate": 8.74129695595855e-05, "loss": 1.6016, "step": 6220 }, { "epoch": 0.12629533678756477, "grad_norm": 0.27041861414909363, "learning_rate": 8.73724902849741e-05, "loss": 1.61, "step": 6240 }, { "epoch": 0.12670012953367876, "grad_norm": 0.25813111662864685, "learning_rate": 8.73320110103627e-05, "loss": 1.5951, "step": 6260 }, { "epoch": 0.12710492227979275, "grad_norm": 0.2637070119380951, "learning_rate": 8.72915317357513e-05, "loss": 1.6051, "step": 6280 }, { "epoch": 0.12750971502590674, "grad_norm": 0.24466776847839355, "learning_rate": 8.72510524611399e-05, "loss": 1.6072, "step": 6300 }, { "epoch": 0.12791450777202074, "grad_norm": 0.2632046341896057, "learning_rate": 8.72105731865285e-05, "loss": 1.6087, "step": 6320 }, { "epoch": 0.12831930051813473, "grad_norm": 0.24784623086452484, "learning_rate": 8.717009391191711e-05, "loss": 1.6003, "step": 6340 }, { "epoch": 0.12872409326424872, "grad_norm": 0.27260076999664307, "learning_rate": 8.71296146373057e-05, "loss": 1.6019, "step": 6360 }, { "epoch": 0.12912888601036268, "grad_norm": 0.25531041622161865, "learning_rate": 8.708913536269431e-05, "loss": 1.6044, "step": 6380 }, { "epoch": 0.12953367875647667, "grad_norm": 0.2816506028175354, "learning_rate": 8.704865608808291e-05, "loss": 1.6047, "step": 6400 }, { "epoch": 0.12993847150259066, "grad_norm": 0.2671644389629364, "learning_rate": 8.700817681347152e-05, "loss": 1.6012, "step": 6420 }, { "epoch": 0.13034326424870465, "grad_norm": 0.2529067099094391, "learning_rate": 8.696769753886011e-05, "loss": 1.6125, "step": 6440 }, { "epoch": 0.13074805699481865, "grad_norm": 0.2635551691055298, "learning_rate": 8.69272182642487e-05, "loss": 1.6008, "step": 6460 }, { "epoch": 0.13115284974093264, "grad_norm": 0.26499757170677185, "learning_rate": 8.68867389896373e-05, "loss": 1.6085, "step": 6480 }, { "epoch": 0.13155764248704663, "grad_norm": 0.2593876123428345, "learning_rate": 8.684625971502591e-05, "loss": 1.6066, "step": 6500 }, { "epoch": 0.13196243523316062, "grad_norm": 0.2517438232898712, "learning_rate": 8.68057804404145e-05, "loss": 1.5978, "step": 6520 }, { "epoch": 0.1323672279792746, "grad_norm": 0.265071302652359, "learning_rate": 8.676530116580311e-05, "loss": 1.6081, "step": 6540 }, { "epoch": 0.1327720207253886, "grad_norm": 0.2520970106124878, "learning_rate": 8.67248218911917e-05, "loss": 1.5981, "step": 6560 }, { "epoch": 0.1331768134715026, "grad_norm": 0.26376473903656006, "learning_rate": 8.668434261658031e-05, "loss": 1.6015, "step": 6580 }, { "epoch": 0.13358160621761658, "grad_norm": 0.24763721227645874, "learning_rate": 8.664386334196891e-05, "loss": 1.6005, "step": 6600 }, { "epoch": 0.13398639896373057, "grad_norm": 0.26865652203559875, "learning_rate": 8.660338406735752e-05, "loss": 1.6051, "step": 6620 }, { "epoch": 0.13439119170984457, "grad_norm": 0.2670711576938629, "learning_rate": 8.656290479274611e-05, "loss": 1.6044, "step": 6640 }, { "epoch": 0.13479598445595856, "grad_norm": 0.26412123441696167, "learning_rate": 8.652242551813472e-05, "loss": 1.6077, "step": 6660 }, { "epoch": 0.13520077720207255, "grad_norm": 0.2693142592906952, "learning_rate": 8.648194624352332e-05, "loss": 1.6114, "step": 6680 }, { "epoch": 0.13560556994818654, "grad_norm": 0.2563552260398865, "learning_rate": 8.644146696891192e-05, "loss": 1.6082, "step": 6700 }, { "epoch": 0.13601036269430053, "grad_norm": 0.2625761926174164, "learning_rate": 8.640098769430052e-05, "loss": 1.6041, "step": 6720 }, { "epoch": 0.13641515544041452, "grad_norm": 0.25878438353538513, "learning_rate": 8.636050841968913e-05, "loss": 1.6061, "step": 6740 }, { "epoch": 0.13681994818652848, "grad_norm": 0.2626986503601074, "learning_rate": 8.632002914507774e-05, "loss": 1.5991, "step": 6760 }, { "epoch": 0.13722474093264247, "grad_norm": 0.257108211517334, "learning_rate": 8.627954987046633e-05, "loss": 1.6035, "step": 6780 }, { "epoch": 0.13762953367875647, "grad_norm": 0.27164486050605774, "learning_rate": 8.623907059585493e-05, "loss": 1.601, "step": 6800 }, { "epoch": 0.13803432642487046, "grad_norm": 0.25914931297302246, "learning_rate": 8.619859132124352e-05, "loss": 1.5916, "step": 6820 }, { "epoch": 0.13843911917098445, "grad_norm": 0.27093029022216797, "learning_rate": 8.615811204663213e-05, "loss": 1.5957, "step": 6840 }, { "epoch": 0.13884391191709844, "grad_norm": 0.2503158152103424, "learning_rate": 8.611763277202072e-05, "loss": 1.6035, "step": 6860 }, { "epoch": 0.13924870466321243, "grad_norm": 0.24412749707698822, "learning_rate": 8.607715349740933e-05, "loss": 1.6027, "step": 6880 }, { "epoch": 0.13965349740932642, "grad_norm": 0.25988513231277466, "learning_rate": 8.603667422279793e-05, "loss": 1.6049, "step": 6900 }, { "epoch": 0.1400582901554404, "grad_norm": 0.27382171154022217, "learning_rate": 8.599619494818654e-05, "loss": 1.5995, "step": 6920 }, { "epoch": 0.1404630829015544, "grad_norm": 0.2554158568382263, "learning_rate": 8.595571567357513e-05, "loss": 1.6014, "step": 6940 }, { "epoch": 0.1408678756476684, "grad_norm": 0.2732117474079132, "learning_rate": 8.591523639896374e-05, "loss": 1.5991, "step": 6960 }, { "epoch": 0.14127266839378239, "grad_norm": 0.2615940570831299, "learning_rate": 8.587475712435233e-05, "loss": 1.6052, "step": 6980 }, { "epoch": 0.14167746113989638, "grad_norm": 0.263649046421051, "learning_rate": 8.583427784974094e-05, "loss": 1.599, "step": 7000 }, { "epoch": 0.14208225388601037, "grad_norm": 0.2557724416255951, "learning_rate": 8.579379857512954e-05, "loss": 1.5977, "step": 7020 }, { "epoch": 0.14248704663212436, "grad_norm": 0.27558931708335876, "learning_rate": 8.575331930051814e-05, "loss": 1.5995, "step": 7040 }, { "epoch": 0.14289183937823835, "grad_norm": 0.27249184250831604, "learning_rate": 8.571284002590674e-05, "loss": 1.6065, "step": 7060 }, { "epoch": 0.14329663212435234, "grad_norm": 0.2665605843067169, "learning_rate": 8.567236075129535e-05, "loss": 1.5931, "step": 7080 }, { "epoch": 0.14370142487046633, "grad_norm": 0.2519274353981018, "learning_rate": 8.563188147668394e-05, "loss": 1.6038, "step": 7100 }, { "epoch": 0.14410621761658032, "grad_norm": 0.2540017366409302, "learning_rate": 8.559140220207255e-05, "loss": 1.5922, "step": 7120 }, { "epoch": 0.14451101036269431, "grad_norm": 0.24940931797027588, "learning_rate": 8.555092292746115e-05, "loss": 1.6052, "step": 7140 }, { "epoch": 0.14491580310880828, "grad_norm": 0.2564987242221832, "learning_rate": 8.551044365284975e-05, "loss": 1.6017, "step": 7160 }, { "epoch": 0.14532059585492227, "grad_norm": 0.26273342967033386, "learning_rate": 8.546996437823835e-05, "loss": 1.6057, "step": 7180 }, { "epoch": 0.14572538860103626, "grad_norm": 0.25715669989585876, "learning_rate": 8.542948510362694e-05, "loss": 1.6033, "step": 7200 }, { "epoch": 0.14613018134715025, "grad_norm": 0.2579188346862793, "learning_rate": 8.538900582901554e-05, "loss": 1.6025, "step": 7220 }, { "epoch": 0.14653497409326424, "grad_norm": 0.25650057196617126, "learning_rate": 8.534852655440415e-05, "loss": 1.6066, "step": 7240 }, { "epoch": 0.14693976683937823, "grad_norm": 0.2665022909641266, "learning_rate": 8.530804727979274e-05, "loss": 1.5966, "step": 7260 }, { "epoch": 0.14734455958549222, "grad_norm": 0.260345458984375, "learning_rate": 8.526756800518135e-05, "loss": 1.5982, "step": 7280 }, { "epoch": 0.14774935233160622, "grad_norm": 0.24916279315948486, "learning_rate": 8.522708873056995e-05, "loss": 1.6017, "step": 7300 }, { "epoch": 0.1481541450777202, "grad_norm": 0.24506306648254395, "learning_rate": 8.518660945595855e-05, "loss": 1.5988, "step": 7320 }, { "epoch": 0.1485589378238342, "grad_norm": 0.2581236660480499, "learning_rate": 8.514613018134715e-05, "loss": 1.5995, "step": 7340 }, { "epoch": 0.1489637305699482, "grad_norm": 0.2588168680667877, "learning_rate": 8.510565090673576e-05, "loss": 1.6044, "step": 7360 }, { "epoch": 0.14936852331606218, "grad_norm": 0.2511528730392456, "learning_rate": 8.506517163212435e-05, "loss": 1.5957, "step": 7380 }, { "epoch": 0.14977331606217617, "grad_norm": 0.25623542070388794, "learning_rate": 8.502469235751296e-05, "loss": 1.6033, "step": 7400 }, { "epoch": 0.15017810880829016, "grad_norm": 0.24741052091121674, "learning_rate": 8.498421308290155e-05, "loss": 1.5998, "step": 7420 }, { "epoch": 0.15058290155440415, "grad_norm": 0.2567812204360962, "learning_rate": 8.494373380829016e-05, "loss": 1.6005, "step": 7440 }, { "epoch": 0.15098769430051814, "grad_norm": 0.24815219640731812, "learning_rate": 8.490325453367876e-05, "loss": 1.6014, "step": 7460 }, { "epoch": 0.15139248704663213, "grad_norm": 0.2515564262866974, "learning_rate": 8.486277525906737e-05, "loss": 1.5886, "step": 7480 }, { "epoch": 0.15179727979274613, "grad_norm": 0.26748186349868774, "learning_rate": 8.482229598445596e-05, "loss": 1.6049, "step": 7500 }, { "epoch": 0.15220207253886012, "grad_norm": 0.2765727937221527, "learning_rate": 8.478181670984457e-05, "loss": 1.5983, "step": 7520 }, { "epoch": 0.15260686528497408, "grad_norm": 0.285435289144516, "learning_rate": 8.474133743523316e-05, "loss": 1.6024, "step": 7540 }, { "epoch": 0.15301165803108807, "grad_norm": 0.2534791827201843, "learning_rate": 8.470085816062176e-05, "loss": 1.6097, "step": 7560 }, { "epoch": 0.15341645077720206, "grad_norm": 0.26324743032455444, "learning_rate": 8.466037888601037e-05, "loss": 1.5954, "step": 7580 }, { "epoch": 0.15382124352331605, "grad_norm": 0.25837934017181396, "learning_rate": 8.461989961139896e-05, "loss": 1.6026, "step": 7600 }, { "epoch": 0.15422603626943004, "grad_norm": 0.25932076573371887, "learning_rate": 8.457942033678757e-05, "loss": 1.6043, "step": 7620 }, { "epoch": 0.15463082901554404, "grad_norm": 0.25741469860076904, "learning_rate": 8.453894106217617e-05, "loss": 1.5967, "step": 7640 }, { "epoch": 0.15503562176165803, "grad_norm": 0.3117927610874176, "learning_rate": 8.449846178756477e-05, "loss": 1.5946, "step": 7660 }, { "epoch": 0.15544041450777202, "grad_norm": 0.2627263069152832, "learning_rate": 8.445798251295337e-05, "loss": 1.6041, "step": 7680 }, { "epoch": 0.155845207253886, "grad_norm": 0.2680657207965851, "learning_rate": 8.441750323834198e-05, "loss": 1.609, "step": 7700 }, { "epoch": 0.15625, "grad_norm": 0.24179504811763763, "learning_rate": 8.437702396373057e-05, "loss": 1.5988, "step": 7720 }, { "epoch": 0.156654792746114, "grad_norm": 0.25268909335136414, "learning_rate": 8.433654468911918e-05, "loss": 1.5906, "step": 7740 }, { "epoch": 0.15705958549222798, "grad_norm": 0.25415557622909546, "learning_rate": 8.429606541450778e-05, "loss": 1.5953, "step": 7760 }, { "epoch": 0.15746437823834197, "grad_norm": 0.2639238238334656, "learning_rate": 8.425558613989638e-05, "loss": 1.6066, "step": 7780 }, { "epoch": 0.15786917098445596, "grad_norm": 0.25282031297683716, "learning_rate": 8.421510686528498e-05, "loss": 1.5969, "step": 7800 }, { "epoch": 0.15827396373056996, "grad_norm": 0.2521458566188812, "learning_rate": 8.417462759067359e-05, "loss": 1.6041, "step": 7820 }, { "epoch": 0.15867875647668395, "grad_norm": 0.2629505395889282, "learning_rate": 8.413414831606218e-05, "loss": 1.5981, "step": 7840 }, { "epoch": 0.15908354922279794, "grad_norm": 0.26549121737480164, "learning_rate": 8.409366904145079e-05, "loss": 1.5986, "step": 7860 }, { "epoch": 0.15948834196891193, "grad_norm": 0.2565297782421112, "learning_rate": 8.405318976683938e-05, "loss": 1.5967, "step": 7880 }, { "epoch": 0.15989313471502592, "grad_norm": 0.26411643624305725, "learning_rate": 8.401271049222798e-05, "loss": 1.6113, "step": 7900 }, { "epoch": 0.16029792746113988, "grad_norm": 0.2610175907611847, "learning_rate": 8.397223121761659e-05, "loss": 1.5965, "step": 7920 }, { "epoch": 0.16070272020725387, "grad_norm": 0.26870352029800415, "learning_rate": 8.393175194300518e-05, "loss": 1.5994, "step": 7940 }, { "epoch": 0.16110751295336787, "grad_norm": 0.25182560086250305, "learning_rate": 8.389127266839378e-05, "loss": 1.6013, "step": 7960 }, { "epoch": 0.16151230569948186, "grad_norm": 0.265104204416275, "learning_rate": 8.385079339378239e-05, "loss": 1.6064, "step": 7980 }, { "epoch": 0.16191709844559585, "grad_norm": 0.25095048546791077, "learning_rate": 8.381031411917098e-05, "loss": 1.6009, "step": 8000 }, { "epoch": 0.16232189119170984, "grad_norm": 0.27897292375564575, "learning_rate": 8.376983484455959e-05, "loss": 1.6009, "step": 8020 }, { "epoch": 0.16272668393782383, "grad_norm": 0.26178786158561707, "learning_rate": 8.372935556994818e-05, "loss": 1.5974, "step": 8040 }, { "epoch": 0.16313147668393782, "grad_norm": 0.2626231908798218, "learning_rate": 8.368887629533679e-05, "loss": 1.6002, "step": 8060 }, { "epoch": 0.1635362694300518, "grad_norm": 0.2668093740940094, "learning_rate": 8.364839702072539e-05, "loss": 1.5975, "step": 8080 }, { "epoch": 0.1639410621761658, "grad_norm": 0.2522405683994293, "learning_rate": 8.3607917746114e-05, "loss": 1.5966, "step": 8100 }, { "epoch": 0.1643458549222798, "grad_norm": 0.26015549898147583, "learning_rate": 8.356743847150259e-05, "loss": 1.5938, "step": 8120 }, { "epoch": 0.16475064766839378, "grad_norm": 0.2642843723297119, "learning_rate": 8.35269591968912e-05, "loss": 1.5989, "step": 8140 }, { "epoch": 0.16515544041450778, "grad_norm": 0.25145819783210754, "learning_rate": 8.34864799222798e-05, "loss": 1.6058, "step": 8160 }, { "epoch": 0.16556023316062177, "grad_norm": 0.25260496139526367, "learning_rate": 8.34460006476684e-05, "loss": 1.6004, "step": 8180 }, { "epoch": 0.16596502590673576, "grad_norm": 0.2683722674846649, "learning_rate": 8.3405521373057e-05, "loss": 1.5967, "step": 8200 }, { "epoch": 0.16636981865284975, "grad_norm": 0.2496829777956009, "learning_rate": 8.33650420984456e-05, "loss": 1.6021, "step": 8220 }, { "epoch": 0.16677461139896374, "grad_norm": 0.2554880380630493, "learning_rate": 8.33245628238342e-05, "loss": 1.5932, "step": 8240 }, { "epoch": 0.16717940414507773, "grad_norm": 0.25362247228622437, "learning_rate": 8.328408354922281e-05, "loss": 1.6031, "step": 8260 }, { "epoch": 0.16758419689119172, "grad_norm": 0.256998747587204, "learning_rate": 8.32436042746114e-05, "loss": 1.5899, "step": 8280 }, { "epoch": 0.16798898963730569, "grad_norm": 0.2713709771633148, "learning_rate": 8.3203125e-05, "loss": 1.594, "step": 8300 }, { "epoch": 0.16839378238341968, "grad_norm": 0.26144126057624817, "learning_rate": 8.31626457253886e-05, "loss": 1.5885, "step": 8320 }, { "epoch": 0.16879857512953367, "grad_norm": 0.25603315234184265, "learning_rate": 8.31221664507772e-05, "loss": 1.602, "step": 8340 }, { "epoch": 0.16920336787564766, "grad_norm": 0.2540545165538788, "learning_rate": 8.308168717616581e-05, "loss": 1.6023, "step": 8360 }, { "epoch": 0.16960816062176165, "grad_norm": 0.24877284467220306, "learning_rate": 8.30412079015544e-05, "loss": 1.5982, "step": 8380 }, { "epoch": 0.17001295336787564, "grad_norm": 0.2585165202617645, "learning_rate": 8.300072862694301e-05, "loss": 1.5933, "step": 8400 }, { "epoch": 0.17041774611398963, "grad_norm": 0.25463396310806274, "learning_rate": 8.296024935233161e-05, "loss": 1.5982, "step": 8420 }, { "epoch": 0.17082253886010362, "grad_norm": 0.2551766037940979, "learning_rate": 8.291977007772022e-05, "loss": 1.5955, "step": 8440 }, { "epoch": 0.17122733160621761, "grad_norm": 0.24814505875110626, "learning_rate": 8.287929080310881e-05, "loss": 1.6041, "step": 8460 }, { "epoch": 0.1716321243523316, "grad_norm": 0.25221145153045654, "learning_rate": 8.283881152849742e-05, "loss": 1.6024, "step": 8480 }, { "epoch": 0.1720369170984456, "grad_norm": 0.2520849108695984, "learning_rate": 8.279833225388601e-05, "loss": 1.5883, "step": 8500 }, { "epoch": 0.1724417098445596, "grad_norm": 0.25663772225379944, "learning_rate": 8.275785297927462e-05, "loss": 1.6004, "step": 8520 }, { "epoch": 0.17284650259067358, "grad_norm": 0.26364174485206604, "learning_rate": 8.271737370466322e-05, "loss": 1.5922, "step": 8540 }, { "epoch": 0.17325129533678757, "grad_norm": 0.28015953302383423, "learning_rate": 8.267689443005183e-05, "loss": 1.6077, "step": 8560 }, { "epoch": 0.17365608808290156, "grad_norm": 0.24792265892028809, "learning_rate": 8.263641515544042e-05, "loss": 1.5975, "step": 8580 }, { "epoch": 0.17406088082901555, "grad_norm": 0.24993407726287842, "learning_rate": 8.259593588082903e-05, "loss": 1.6013, "step": 8600 }, { "epoch": 0.17446567357512954, "grad_norm": 0.25823453068733215, "learning_rate": 8.255545660621762e-05, "loss": 1.5988, "step": 8620 }, { "epoch": 0.17487046632124353, "grad_norm": 0.2657330632209778, "learning_rate": 8.251497733160622e-05, "loss": 1.5922, "step": 8640 }, { "epoch": 0.17527525906735753, "grad_norm": 0.2582317590713501, "learning_rate": 8.247449805699483e-05, "loss": 1.5983, "step": 8660 }, { "epoch": 0.17568005181347152, "grad_norm": 0.2535412609577179, "learning_rate": 8.243401878238342e-05, "loss": 1.5937, "step": 8680 }, { "epoch": 0.17608484455958548, "grad_norm": 0.26243895292282104, "learning_rate": 8.239353950777202e-05, "loss": 1.5976, "step": 8700 }, { "epoch": 0.17648963730569947, "grad_norm": 0.28185102343559265, "learning_rate": 8.235306023316062e-05, "loss": 1.5976, "step": 8720 }, { "epoch": 0.17689443005181346, "grad_norm": 0.2566089630126953, "learning_rate": 8.231258095854922e-05, "loss": 1.5926, "step": 8740 }, { "epoch": 0.17729922279792745, "grad_norm": 0.24741581082344055, "learning_rate": 8.227210168393783e-05, "loss": 1.6025, "step": 8760 }, { "epoch": 0.17770401554404144, "grad_norm": 0.264133095741272, "learning_rate": 8.223162240932642e-05, "loss": 1.6008, "step": 8780 }, { "epoch": 0.17810880829015543, "grad_norm": 0.2399357259273529, "learning_rate": 8.219114313471503e-05, "loss": 1.6012, "step": 8800 }, { "epoch": 0.17851360103626943, "grad_norm": 0.25826698541641235, "learning_rate": 8.215066386010363e-05, "loss": 1.5901, "step": 8820 }, { "epoch": 0.17891839378238342, "grad_norm": 0.24916908144950867, "learning_rate": 8.211018458549223e-05, "loss": 1.5909, "step": 8840 }, { "epoch": 0.1793231865284974, "grad_norm": 0.25491708517074585, "learning_rate": 8.206970531088083e-05, "loss": 1.5933, "step": 8860 }, { "epoch": 0.1797279792746114, "grad_norm": 0.26934361457824707, "learning_rate": 8.202922603626944e-05, "loss": 1.5945, "step": 8880 }, { "epoch": 0.1801327720207254, "grad_norm": 0.2536107003688812, "learning_rate": 8.198874676165803e-05, "loss": 1.5948, "step": 8900 }, { "epoch": 0.18053756476683938, "grad_norm": 0.26126623153686523, "learning_rate": 8.194826748704664e-05, "loss": 1.6032, "step": 8920 }, { "epoch": 0.18094235751295337, "grad_norm": 0.2602163255214691, "learning_rate": 8.190778821243524e-05, "loss": 1.6004, "step": 8940 }, { "epoch": 0.18134715025906736, "grad_norm": 0.25399208068847656, "learning_rate": 8.186730893782384e-05, "loss": 1.5996, "step": 8960 }, { "epoch": 0.18175194300518135, "grad_norm": 0.2924685478210449, "learning_rate": 8.182682966321244e-05, "loss": 1.6061, "step": 8980 }, { "epoch": 0.18215673575129535, "grad_norm": 0.26434484124183655, "learning_rate": 8.178635038860105e-05, "loss": 1.5985, "step": 9000 }, { "epoch": 0.18256152849740934, "grad_norm": 0.25423070788383484, "learning_rate": 8.174587111398964e-05, "loss": 1.5942, "step": 9020 }, { "epoch": 0.18296632124352333, "grad_norm": 0.2583799660205841, "learning_rate": 8.170539183937824e-05, "loss": 1.5945, "step": 9040 }, { "epoch": 0.18337111398963732, "grad_norm": 0.23799210786819458, "learning_rate": 8.166491256476683e-05, "loss": 1.5921, "step": 9060 }, { "epoch": 0.18377590673575128, "grad_norm": 0.260436087846756, "learning_rate": 8.162443329015544e-05, "loss": 1.6023, "step": 9080 }, { "epoch": 0.18418069948186527, "grad_norm": 0.24803146719932556, "learning_rate": 8.158395401554405e-05, "loss": 1.6034, "step": 9100 }, { "epoch": 0.18458549222797926, "grad_norm": 0.25979915261268616, "learning_rate": 8.154347474093264e-05, "loss": 1.5961, "step": 9120 }, { "epoch": 0.18499028497409326, "grad_norm": 0.2547805905342102, "learning_rate": 8.150299546632125e-05, "loss": 1.5949, "step": 9140 }, { "epoch": 0.18539507772020725, "grad_norm": 0.2539021670818329, "learning_rate": 8.146251619170985e-05, "loss": 1.5961, "step": 9160 }, { "epoch": 0.18579987046632124, "grad_norm": 0.25949668884277344, "learning_rate": 8.142203691709845e-05, "loss": 1.5981, "step": 9180 }, { "epoch": 0.18620466321243523, "grad_norm": 0.25621169805526733, "learning_rate": 8.138155764248705e-05, "loss": 1.5907, "step": 9200 }, { "epoch": 0.18660945595854922, "grad_norm": 0.2772185206413269, "learning_rate": 8.134107836787566e-05, "loss": 1.5928, "step": 9220 }, { "epoch": 0.1870142487046632, "grad_norm": 0.2553258240222931, "learning_rate": 8.130059909326425e-05, "loss": 1.591, "step": 9240 }, { "epoch": 0.1874190414507772, "grad_norm": 0.24664489924907684, "learning_rate": 8.126011981865286e-05, "loss": 1.591, "step": 9260 }, { "epoch": 0.1878238341968912, "grad_norm": 0.2554773688316345, "learning_rate": 8.121964054404146e-05, "loss": 1.5972, "step": 9280 }, { "epoch": 0.18822862694300518, "grad_norm": 0.2606831192970276, "learning_rate": 8.117916126943006e-05, "loss": 1.5977, "step": 9300 }, { "epoch": 0.18863341968911918, "grad_norm": 0.2572855055332184, "learning_rate": 8.113868199481866e-05, "loss": 1.5978, "step": 9320 }, { "epoch": 0.18903821243523317, "grad_norm": 0.27946576476097107, "learning_rate": 8.109820272020727e-05, "loss": 1.6013, "step": 9340 }, { "epoch": 0.18944300518134716, "grad_norm": 0.2458900809288025, "learning_rate": 8.105772344559586e-05, "loss": 1.5906, "step": 9360 }, { "epoch": 0.18984779792746115, "grad_norm": 0.2533491253852844, "learning_rate": 8.101724417098446e-05, "loss": 1.5926, "step": 9380 }, { "epoch": 0.19025259067357514, "grad_norm": 0.2676069438457489, "learning_rate": 8.097676489637305e-05, "loss": 1.5914, "step": 9400 }, { "epoch": 0.19065738341968913, "grad_norm": 0.24140405654907227, "learning_rate": 8.093628562176166e-05, "loss": 1.5967, "step": 9420 }, { "epoch": 0.19106217616580312, "grad_norm": 0.2524943947792053, "learning_rate": 8.089580634715026e-05, "loss": 1.5981, "step": 9440 }, { "epoch": 0.19146696891191708, "grad_norm": 0.2566254138946533, "learning_rate": 8.085532707253886e-05, "loss": 1.5959, "step": 9460 }, { "epoch": 0.19187176165803108, "grad_norm": 0.283772349357605, "learning_rate": 8.081484779792746e-05, "loss": 1.5959, "step": 9480 }, { "epoch": 0.19227655440414507, "grad_norm": 0.2549557089805603, "learning_rate": 8.077436852331607e-05, "loss": 1.5907, "step": 9500 }, { "epoch": 0.19268134715025906, "grad_norm": 0.25546470284461975, "learning_rate": 8.073388924870466e-05, "loss": 1.593, "step": 9520 }, { "epoch": 0.19308613989637305, "grad_norm": 0.2502375841140747, "learning_rate": 8.069340997409327e-05, "loss": 1.5975, "step": 9540 }, { "epoch": 0.19349093264248704, "grad_norm": 0.2540940046310425, "learning_rate": 8.065293069948186e-05, "loss": 1.5911, "step": 9560 }, { "epoch": 0.19389572538860103, "grad_norm": 0.255295991897583, "learning_rate": 8.061245142487047e-05, "loss": 1.6002, "step": 9580 }, { "epoch": 0.19430051813471502, "grad_norm": 0.2643044888973236, "learning_rate": 8.057197215025907e-05, "loss": 1.589, "step": 9600 }, { "epoch": 0.194705310880829, "grad_norm": 0.25527581572532654, "learning_rate": 8.053149287564768e-05, "loss": 1.5929, "step": 9620 }, { "epoch": 0.195110103626943, "grad_norm": 0.2616470754146576, "learning_rate": 8.049101360103627e-05, "loss": 1.5967, "step": 9640 }, { "epoch": 0.195514896373057, "grad_norm": 0.2519463002681732, "learning_rate": 8.045053432642488e-05, "loss": 1.5941, "step": 9660 }, { "epoch": 0.195919689119171, "grad_norm": 0.2523566484451294, "learning_rate": 8.041005505181347e-05, "loss": 1.5917, "step": 9680 }, { "epoch": 0.19632448186528498, "grad_norm": 0.2536655068397522, "learning_rate": 8.036957577720208e-05, "loss": 1.5929, "step": 9700 }, { "epoch": 0.19672927461139897, "grad_norm": 0.24879606068134308, "learning_rate": 8.032909650259068e-05, "loss": 1.6003, "step": 9720 }, { "epoch": 0.19713406735751296, "grad_norm": 0.256606787443161, "learning_rate": 8.028861722797929e-05, "loss": 1.5894, "step": 9740 }, { "epoch": 0.19753886010362695, "grad_norm": 0.25646525621414185, "learning_rate": 8.024813795336788e-05, "loss": 1.5919, "step": 9760 }, { "epoch": 0.19794365284974094, "grad_norm": 0.24462023377418518, "learning_rate": 8.020765867875648e-05, "loss": 1.5891, "step": 9780 }, { "epoch": 0.19834844559585493, "grad_norm": 0.24765078723430634, "learning_rate": 8.016717940414507e-05, "loss": 1.5976, "step": 9800 }, { "epoch": 0.19875323834196892, "grad_norm": 0.25651177763938904, "learning_rate": 8.012670012953368e-05, "loss": 1.5943, "step": 9820 }, { "epoch": 0.19915803108808292, "grad_norm": 0.2580338418483734, "learning_rate": 8.008622085492227e-05, "loss": 1.5915, "step": 9840 }, { "epoch": 0.19956282383419688, "grad_norm": 0.2531592845916748, "learning_rate": 8.004574158031088e-05, "loss": 1.5854, "step": 9860 }, { "epoch": 0.19996761658031087, "grad_norm": 0.25719401240348816, "learning_rate": 8.000526230569949e-05, "loss": 1.5939, "step": 9880 }, { "epoch": 0.20037240932642486, "grad_norm": 0.25002726912498474, "learning_rate": 7.996478303108809e-05, "loss": 1.6027, "step": 9900 }, { "epoch": 0.20077720207253885, "grad_norm": 0.24946655333042145, "learning_rate": 7.99243037564767e-05, "loss": 1.5894, "step": 9920 }, { "epoch": 0.20118199481865284, "grad_norm": 0.2550725042819977, "learning_rate": 7.988382448186529e-05, "loss": 1.5927, "step": 9940 }, { "epoch": 0.20158678756476683, "grad_norm": 0.24692833423614502, "learning_rate": 7.98433452072539e-05, "loss": 1.6007, "step": 9960 }, { "epoch": 0.20199158031088082, "grad_norm": 0.2629249691963196, "learning_rate": 7.980286593264249e-05, "loss": 1.5954, "step": 9980 }, { "epoch": 0.20239637305699482, "grad_norm": 0.25489285588264465, "learning_rate": 7.97623866580311e-05, "loss": 1.6002, "step": 10000 }, { "epoch": 0.2028011658031088, "grad_norm": 0.2512948513031006, "learning_rate": 7.97219073834197e-05, "loss": 1.5969, "step": 10020 }, { "epoch": 0.2032059585492228, "grad_norm": 0.2649076282978058, "learning_rate": 7.96814281088083e-05, "loss": 1.5888, "step": 10040 }, { "epoch": 0.2036107512953368, "grad_norm": 0.2525995373725891, "learning_rate": 7.96409488341969e-05, "loss": 1.5898, "step": 10060 }, { "epoch": 0.20401554404145078, "grad_norm": 0.2511776387691498, "learning_rate": 7.96004695595855e-05, "loss": 1.6033, "step": 10080 }, { "epoch": 0.20442033678756477, "grad_norm": 0.2539236545562744, "learning_rate": 7.95599902849741e-05, "loss": 1.5935, "step": 10100 }, { "epoch": 0.20482512953367876, "grad_norm": 0.25965458154678345, "learning_rate": 7.95195110103627e-05, "loss": 1.5984, "step": 10120 }, { "epoch": 0.20522992227979275, "grad_norm": 0.2601739466190338, "learning_rate": 7.947903173575129e-05, "loss": 1.5909, "step": 10140 }, { "epoch": 0.20563471502590674, "grad_norm": 0.24203066527843475, "learning_rate": 7.94385524611399e-05, "loss": 1.597, "step": 10160 }, { "epoch": 0.20603950777202074, "grad_norm": 0.2545487880706787, "learning_rate": 7.93980731865285e-05, "loss": 1.5904, "step": 10180 }, { "epoch": 0.20644430051813473, "grad_norm": 0.25239482522010803, "learning_rate": 7.93575939119171e-05, "loss": 1.5871, "step": 10200 }, { "epoch": 0.20684909326424872, "grad_norm": 0.2586691677570343, "learning_rate": 7.93171146373057e-05, "loss": 1.594, "step": 10220 }, { "epoch": 0.20725388601036268, "grad_norm": 0.258859783411026, "learning_rate": 7.92766353626943e-05, "loss": 1.595, "step": 10240 }, { "epoch": 0.20765867875647667, "grad_norm": 0.2675469219684601, "learning_rate": 7.92361560880829e-05, "loss": 1.5941, "step": 10260 }, { "epoch": 0.20806347150259066, "grad_norm": 0.26026174426078796, "learning_rate": 7.919567681347151e-05, "loss": 1.5891, "step": 10280 }, { "epoch": 0.20846826424870465, "grad_norm": 0.24982236325740814, "learning_rate": 7.91551975388601e-05, "loss": 1.6015, "step": 10300 }, { "epoch": 0.20887305699481865, "grad_norm": 0.24896402657032013, "learning_rate": 7.911471826424871e-05, "loss": 1.5911, "step": 10320 }, { "epoch": 0.20927784974093264, "grad_norm": 0.2681379020214081, "learning_rate": 7.90742389896373e-05, "loss": 1.5865, "step": 10340 }, { "epoch": 0.20968264248704663, "grad_norm": 0.25318673253059387, "learning_rate": 7.903375971502592e-05, "loss": 1.5868, "step": 10360 }, { "epoch": 0.21008743523316062, "grad_norm": 0.24674703180789948, "learning_rate": 7.899328044041451e-05, "loss": 1.5986, "step": 10380 }, { "epoch": 0.2104922279792746, "grad_norm": 0.2523845434188843, "learning_rate": 7.895280116580312e-05, "loss": 1.5928, "step": 10400 }, { "epoch": 0.2108970207253886, "grad_norm": 0.2503935396671295, "learning_rate": 7.891232189119171e-05, "loss": 1.5906, "step": 10420 }, { "epoch": 0.2113018134715026, "grad_norm": 0.2641115188598633, "learning_rate": 7.887184261658032e-05, "loss": 1.5978, "step": 10440 }, { "epoch": 0.21170660621761658, "grad_norm": 0.26203617453575134, "learning_rate": 7.883136334196892e-05, "loss": 1.5968, "step": 10460 }, { "epoch": 0.21211139896373057, "grad_norm": 0.264484167098999, "learning_rate": 7.879088406735751e-05, "loss": 1.5897, "step": 10480 }, { "epoch": 0.21251619170984457, "grad_norm": 0.24978435039520264, "learning_rate": 7.875040479274612e-05, "loss": 1.5827, "step": 10500 }, { "epoch": 0.21292098445595856, "grad_norm": 0.2524019479751587, "learning_rate": 7.870992551813471e-05, "loss": 1.5915, "step": 10520 }, { "epoch": 0.21332577720207255, "grad_norm": 0.24307753145694733, "learning_rate": 7.866944624352331e-05, "loss": 1.5919, "step": 10540 }, { "epoch": 0.21373056994818654, "grad_norm": 0.26245468854904175, "learning_rate": 7.862896696891192e-05, "loss": 1.5917, "step": 10560 }, { "epoch": 0.21413536269430053, "grad_norm": 0.27964532375335693, "learning_rate": 7.858848769430051e-05, "loss": 1.5892, "step": 10580 }, { "epoch": 0.21454015544041452, "grad_norm": 0.2509090006351471, "learning_rate": 7.854800841968912e-05, "loss": 1.5936, "step": 10600 }, { "epoch": 0.21494494818652848, "grad_norm": 0.25142747163772583, "learning_rate": 7.850752914507773e-05, "loss": 1.5924, "step": 10620 }, { "epoch": 0.21534974093264247, "grad_norm": 0.25443148612976074, "learning_rate": 7.846704987046632e-05, "loss": 1.5859, "step": 10640 }, { "epoch": 0.21575453367875647, "grad_norm": 0.26217466592788696, "learning_rate": 7.842657059585493e-05, "loss": 1.5919, "step": 10660 }, { "epoch": 0.21615932642487046, "grad_norm": 0.2519839107990265, "learning_rate": 7.838609132124353e-05, "loss": 1.6042, "step": 10680 }, { "epoch": 0.21656411917098445, "grad_norm": 0.2656114995479584, "learning_rate": 7.834561204663214e-05, "loss": 1.5939, "step": 10700 }, { "epoch": 0.21696891191709844, "grad_norm": 0.254607230424881, "learning_rate": 7.830513277202073e-05, "loss": 1.5982, "step": 10720 }, { "epoch": 0.21737370466321243, "grad_norm": 0.24768118560314178, "learning_rate": 7.826465349740934e-05, "loss": 1.6, "step": 10740 }, { "epoch": 0.21777849740932642, "grad_norm": 0.2539350688457489, "learning_rate": 7.822417422279793e-05, "loss": 1.6008, "step": 10760 }, { "epoch": 0.2181832901554404, "grad_norm": 0.2522987425327301, "learning_rate": 7.818369494818654e-05, "loss": 1.5951, "step": 10780 }, { "epoch": 0.2185880829015544, "grad_norm": 0.24638299643993378, "learning_rate": 7.814321567357514e-05, "loss": 1.5964, "step": 10800 }, { "epoch": 0.2189928756476684, "grad_norm": 0.24264419078826904, "learning_rate": 7.810273639896375e-05, "loss": 1.6019, "step": 10820 }, { "epoch": 0.21939766839378239, "grad_norm": 0.2462218999862671, "learning_rate": 7.806225712435234e-05, "loss": 1.5829, "step": 10840 }, { "epoch": 0.21980246113989638, "grad_norm": 0.26417502760887146, "learning_rate": 7.802177784974093e-05, "loss": 1.5988, "step": 10860 }, { "epoch": 0.22020725388601037, "grad_norm": 0.24230512976646423, "learning_rate": 7.798129857512953e-05, "loss": 1.6006, "step": 10880 }, { "epoch": 0.22061204663212436, "grad_norm": 0.25028443336486816, "learning_rate": 7.794081930051814e-05, "loss": 1.5975, "step": 10900 }, { "epoch": 0.22101683937823835, "grad_norm": 0.2536027729511261, "learning_rate": 7.790034002590673e-05, "loss": 1.5945, "step": 10920 }, { "epoch": 0.22142163212435234, "grad_norm": 0.2474135011434555, "learning_rate": 7.785986075129534e-05, "loss": 1.5988, "step": 10940 }, { "epoch": 0.22182642487046633, "grad_norm": 0.2442152500152588, "learning_rate": 7.781938147668394e-05, "loss": 1.5924, "step": 10960 }, { "epoch": 0.22223121761658032, "grad_norm": 0.2577674984931946, "learning_rate": 7.777890220207254e-05, "loss": 1.5934, "step": 10980 }, { "epoch": 0.22263601036269431, "grad_norm": 0.24965183436870575, "learning_rate": 7.773842292746114e-05, "loss": 1.5869, "step": 11000 }, { "epoch": 0.22304080310880828, "grad_norm": 0.27292531728744507, "learning_rate": 7.769794365284975e-05, "loss": 1.5905, "step": 11020 }, { "epoch": 0.22344559585492227, "grad_norm": 0.26551222801208496, "learning_rate": 7.765746437823834e-05, "loss": 1.5951, "step": 11040 }, { "epoch": 0.22385038860103626, "grad_norm": 0.2455325573682785, "learning_rate": 7.761698510362695e-05, "loss": 1.5888, "step": 11060 }, { "epoch": 0.22425518134715025, "grad_norm": 0.24571633338928223, "learning_rate": 7.757650582901555e-05, "loss": 1.5879, "step": 11080 }, { "epoch": 0.22465997409326424, "grad_norm": 0.2514854669570923, "learning_rate": 7.753602655440415e-05, "loss": 1.587, "step": 11100 }, { "epoch": 0.22506476683937823, "grad_norm": 0.2476220577955246, "learning_rate": 7.749554727979275e-05, "loss": 1.5988, "step": 11120 }, { "epoch": 0.22546955958549222, "grad_norm": 0.2546100318431854, "learning_rate": 7.745506800518136e-05, "loss": 1.6005, "step": 11140 }, { "epoch": 0.22587435233160622, "grad_norm": 0.2504321038722992, "learning_rate": 7.741458873056995e-05, "loss": 1.5948, "step": 11160 }, { "epoch": 0.2262791450777202, "grad_norm": 0.24210825562477112, "learning_rate": 7.737410945595856e-05, "loss": 1.5854, "step": 11180 }, { "epoch": 0.2266839378238342, "grad_norm": 0.24968461692333221, "learning_rate": 7.733363018134716e-05, "loss": 1.5932, "step": 11200 }, { "epoch": 0.2270887305699482, "grad_norm": 0.26049113273620605, "learning_rate": 7.729315090673575e-05, "loss": 1.5949, "step": 11220 }, { "epoch": 0.22749352331606218, "grad_norm": 0.2488970011472702, "learning_rate": 7.725267163212436e-05, "loss": 1.6005, "step": 11240 }, { "epoch": 0.22789831606217617, "grad_norm": 0.2410506308078766, "learning_rate": 7.721219235751295e-05, "loss": 1.5901, "step": 11260 }, { "epoch": 0.22830310880829016, "grad_norm": 0.24874204397201538, "learning_rate": 7.717171308290155e-05, "loss": 1.5854, "step": 11280 }, { "epoch": 0.22870790155440415, "grad_norm": 0.250304251909256, "learning_rate": 7.713123380829016e-05, "loss": 1.5893, "step": 11300 }, { "epoch": 0.22911269430051814, "grad_norm": 0.23917686939239502, "learning_rate": 7.709075453367875e-05, "loss": 1.5861, "step": 11320 }, { "epoch": 0.22951748704663213, "grad_norm": 0.26293718814849854, "learning_rate": 7.705027525906736e-05, "loss": 1.5859, "step": 11340 }, { "epoch": 0.22992227979274613, "grad_norm": 0.265781044960022, "learning_rate": 7.700979598445595e-05, "loss": 1.5944, "step": 11360 }, { "epoch": 0.23032707253886012, "grad_norm": 0.26201075315475464, "learning_rate": 7.696931670984456e-05, "loss": 1.5994, "step": 11380 }, { "epoch": 0.23073186528497408, "grad_norm": 0.24477145075798035, "learning_rate": 7.692883743523317e-05, "loss": 1.5861, "step": 11400 }, { "epoch": 0.23113665803108807, "grad_norm": 0.26161989569664, "learning_rate": 7.688835816062177e-05, "loss": 1.594, "step": 11420 }, { "epoch": 0.23154145077720206, "grad_norm": 0.24190570414066315, "learning_rate": 7.684787888601037e-05, "loss": 1.5909, "step": 11440 }, { "epoch": 0.23194624352331605, "grad_norm": 0.24429760873317719, "learning_rate": 7.680739961139897e-05, "loss": 1.5879, "step": 11460 }, { "epoch": 0.23235103626943004, "grad_norm": 0.25473883748054504, "learning_rate": 7.676692033678758e-05, "loss": 1.593, "step": 11480 }, { "epoch": 0.23275582901554404, "grad_norm": 0.25918036699295044, "learning_rate": 7.672644106217617e-05, "loss": 1.5873, "step": 11500 }, { "epoch": 0.23316062176165803, "grad_norm": 0.26128101348876953, "learning_rate": 7.668596178756478e-05, "loss": 1.5922, "step": 11520 }, { "epoch": 0.23356541450777202, "grad_norm": 0.25297680497169495, "learning_rate": 7.664548251295338e-05, "loss": 1.595, "step": 11540 }, { "epoch": 0.233970207253886, "grad_norm": 0.25346797704696655, "learning_rate": 7.660500323834197e-05, "loss": 1.5904, "step": 11560 }, { "epoch": 0.234375, "grad_norm": 0.24919146299362183, "learning_rate": 7.656452396373058e-05, "loss": 1.597, "step": 11580 }, { "epoch": 0.234779792746114, "grad_norm": 0.25233861804008484, "learning_rate": 7.652404468911917e-05, "loss": 1.5951, "step": 11600 }, { "epoch": 0.23518458549222798, "grad_norm": 0.2522681951522827, "learning_rate": 7.648356541450777e-05, "loss": 1.5943, "step": 11620 }, { "epoch": 0.23558937823834197, "grad_norm": 0.24918240308761597, "learning_rate": 7.644308613989638e-05, "loss": 1.5908, "step": 11640 }, { "epoch": 0.23599417098445596, "grad_norm": 0.24446940422058105, "learning_rate": 7.640260686528497e-05, "loss": 1.5887, "step": 11660 }, { "epoch": 0.23639896373056996, "grad_norm": 0.25252917408943176, "learning_rate": 7.636212759067358e-05, "loss": 1.5875, "step": 11680 }, { "epoch": 0.23680375647668395, "grad_norm": 0.2411518692970276, "learning_rate": 7.632164831606217e-05, "loss": 1.592, "step": 11700 }, { "epoch": 0.23720854922279794, "grad_norm": 0.25404930114746094, "learning_rate": 7.628116904145078e-05, "loss": 1.5943, "step": 11720 }, { "epoch": 0.23761334196891193, "grad_norm": 0.2620036005973816, "learning_rate": 7.624068976683938e-05, "loss": 1.5943, "step": 11740 }, { "epoch": 0.23801813471502592, "grad_norm": 0.2505120038986206, "learning_rate": 7.620021049222799e-05, "loss": 1.5925, "step": 11760 }, { "epoch": 0.23842292746113988, "grad_norm": 0.25527292490005493, "learning_rate": 7.615973121761658e-05, "loss": 1.5901, "step": 11780 }, { "epoch": 0.23882772020725387, "grad_norm": 0.2500896453857422, "learning_rate": 7.611925194300519e-05, "loss": 1.5955, "step": 11800 }, { "epoch": 0.23923251295336787, "grad_norm": 0.25833678245544434, "learning_rate": 7.607877266839378e-05, "loss": 1.5965, "step": 11820 }, { "epoch": 0.23963730569948186, "grad_norm": 0.2632737457752228, "learning_rate": 7.603829339378239e-05, "loss": 1.5966, "step": 11840 }, { "epoch": 0.24004209844559585, "grad_norm": 0.24234770238399506, "learning_rate": 7.599781411917099e-05, "loss": 1.587, "step": 11860 }, { "epoch": 0.24044689119170984, "grad_norm": 0.2508784532546997, "learning_rate": 7.59573348445596e-05, "loss": 1.5931, "step": 11880 }, { "epoch": 0.24085168393782383, "grad_norm": 0.25040122866630554, "learning_rate": 7.591685556994819e-05, "loss": 1.5842, "step": 11900 }, { "epoch": 0.24125647668393782, "grad_norm": 0.26462480425834656, "learning_rate": 7.58763762953368e-05, "loss": 1.5884, "step": 11920 }, { "epoch": 0.2416612694300518, "grad_norm": 0.26456332206726074, "learning_rate": 7.58358970207254e-05, "loss": 1.5898, "step": 11940 }, { "epoch": 0.2420660621761658, "grad_norm": 0.25766006112098694, "learning_rate": 7.579541774611399e-05, "loss": 1.5903, "step": 11960 }, { "epoch": 0.2424708549222798, "grad_norm": 0.25831377506256104, "learning_rate": 7.575493847150258e-05, "loss": 1.5964, "step": 11980 }, { "epoch": 0.24287564766839378, "grad_norm": 0.2469179481267929, "learning_rate": 7.571445919689119e-05, "loss": 1.5904, "step": 12000 }, { "epoch": 0.24328044041450778, "grad_norm": 0.2461230754852295, "learning_rate": 7.567397992227979e-05, "loss": 1.585, "step": 12020 }, { "epoch": 0.24368523316062177, "grad_norm": 0.24161659181118011, "learning_rate": 7.56335006476684e-05, "loss": 1.5994, "step": 12040 }, { "epoch": 0.24409002590673576, "grad_norm": 0.2427559196949005, "learning_rate": 7.559302137305699e-05, "loss": 1.5912, "step": 12060 }, { "epoch": 0.24449481865284975, "grad_norm": 0.25714871287345886, "learning_rate": 7.55525420984456e-05, "loss": 1.5948, "step": 12080 }, { "epoch": 0.24489961139896374, "grad_norm": 0.2645460069179535, "learning_rate": 7.551206282383419e-05, "loss": 1.5909, "step": 12100 }, { "epoch": 0.24530440414507773, "grad_norm": 0.2536097764968872, "learning_rate": 7.54715835492228e-05, "loss": 1.5924, "step": 12120 }, { "epoch": 0.24570919689119172, "grad_norm": 0.24942956864833832, "learning_rate": 7.54311042746114e-05, "loss": 1.5873, "step": 12140 }, { "epoch": 0.24611398963730569, "grad_norm": 0.25323471426963806, "learning_rate": 7.5390625e-05, "loss": 1.5881, "step": 12160 }, { "epoch": 0.24651878238341968, "grad_norm": 0.2649432122707367, "learning_rate": 7.535014572538861e-05, "loss": 1.5925, "step": 12180 }, { "epoch": 0.24692357512953367, "grad_norm": 0.2702746093273163, "learning_rate": 7.530966645077721e-05, "loss": 1.5998, "step": 12200 }, { "epoch": 0.24732836787564766, "grad_norm": 0.2526603639125824, "learning_rate": 7.526918717616582e-05, "loss": 1.5898, "step": 12220 }, { "epoch": 0.24773316062176165, "grad_norm": 0.24973192811012268, "learning_rate": 7.522870790155441e-05, "loss": 1.5911, "step": 12240 }, { "epoch": 0.24813795336787564, "grad_norm": 0.24923986196517944, "learning_rate": 7.518822862694302e-05, "loss": 1.5933, "step": 12260 }, { "epoch": 0.24854274611398963, "grad_norm": 0.2555299401283264, "learning_rate": 7.514774935233161e-05, "loss": 1.5865, "step": 12280 }, { "epoch": 0.24894753886010362, "grad_norm": 0.2410908043384552, "learning_rate": 7.510727007772021e-05, "loss": 1.5899, "step": 12300 }, { "epoch": 0.24935233160621761, "grad_norm": 0.2441083937883377, "learning_rate": 7.506679080310882e-05, "loss": 1.5919, "step": 12320 }, { "epoch": 0.2497571243523316, "grad_norm": 0.25222593545913696, "learning_rate": 7.502631152849741e-05, "loss": 1.5854, "step": 12340 }, { "epoch": 0.2501619170984456, "grad_norm": 0.24196963012218475, "learning_rate": 7.498583225388601e-05, "loss": 1.5835, "step": 12360 }, { "epoch": 0.2505667098445596, "grad_norm": 0.26270240545272827, "learning_rate": 7.494535297927462e-05, "loss": 1.579, "step": 12380 }, { "epoch": 0.2509715025906736, "grad_norm": 0.24174728989601135, "learning_rate": 7.490487370466321e-05, "loss": 1.5934, "step": 12400 }, { "epoch": 0.25137629533678757, "grad_norm": 0.25630810856819153, "learning_rate": 7.486439443005182e-05, "loss": 1.5925, "step": 12420 }, { "epoch": 0.25178108808290156, "grad_norm": 0.2534923255443573, "learning_rate": 7.482391515544041e-05, "loss": 1.5833, "step": 12440 }, { "epoch": 0.25218588082901555, "grad_norm": 0.2523658871650696, "learning_rate": 7.478343588082902e-05, "loss": 1.5895, "step": 12460 }, { "epoch": 0.25259067357512954, "grad_norm": 0.2542167901992798, "learning_rate": 7.474295660621762e-05, "loss": 1.5908, "step": 12480 }, { "epoch": 0.25299546632124353, "grad_norm": 0.25332310795783997, "learning_rate": 7.470247733160622e-05, "loss": 1.5847, "step": 12500 }, { "epoch": 0.2534002590673575, "grad_norm": 0.24558989703655243, "learning_rate": 7.466199805699482e-05, "loss": 1.592, "step": 12520 }, { "epoch": 0.2538050518134715, "grad_norm": 0.2485477328300476, "learning_rate": 7.462151878238343e-05, "loss": 1.589, "step": 12540 }, { "epoch": 0.2542098445595855, "grad_norm": 0.2606450617313385, "learning_rate": 7.458103950777202e-05, "loss": 1.5954, "step": 12560 }, { "epoch": 0.2546146373056995, "grad_norm": 0.24389731884002686, "learning_rate": 7.454056023316063e-05, "loss": 1.5929, "step": 12580 }, { "epoch": 0.2550194300518135, "grad_norm": 0.24685028195381165, "learning_rate": 7.450008095854923e-05, "loss": 1.5863, "step": 12600 }, { "epoch": 0.2554242227979275, "grad_norm": 0.265568345785141, "learning_rate": 7.445960168393783e-05, "loss": 1.5836, "step": 12620 }, { "epoch": 0.25582901554404147, "grad_norm": 0.24226327240467072, "learning_rate": 7.441912240932643e-05, "loss": 1.5877, "step": 12640 }, { "epoch": 0.25623380829015546, "grad_norm": 0.2536940276622772, "learning_rate": 7.437864313471504e-05, "loss": 1.5874, "step": 12660 }, { "epoch": 0.25663860103626945, "grad_norm": 0.2564062476158142, "learning_rate": 7.433816386010363e-05, "loss": 1.5923, "step": 12680 }, { "epoch": 0.25704339378238344, "grad_norm": 0.24665963649749756, "learning_rate": 7.429768458549223e-05, "loss": 1.5791, "step": 12700 }, { "epoch": 0.25744818652849744, "grad_norm": 0.24691948294639587, "learning_rate": 7.425720531088082e-05, "loss": 1.5826, "step": 12720 }, { "epoch": 0.25785297927461137, "grad_norm": 0.2513175308704376, "learning_rate": 7.421672603626943e-05, "loss": 1.5903, "step": 12740 }, { "epoch": 0.25825777202072536, "grad_norm": 0.2531511187553406, "learning_rate": 7.417624676165803e-05, "loss": 1.5907, "step": 12760 }, { "epoch": 0.25866256476683935, "grad_norm": 0.2446722388267517, "learning_rate": 7.413576748704663e-05, "loss": 1.5808, "step": 12780 }, { "epoch": 0.25906735751295334, "grad_norm": 0.24818077683448792, "learning_rate": 7.409528821243523e-05, "loss": 1.5958, "step": 12800 }, { "epoch": 0.25947215025906734, "grad_norm": 0.25071796774864197, "learning_rate": 7.405480893782384e-05, "loss": 1.5987, "step": 12820 }, { "epoch": 0.2598769430051813, "grad_norm": 0.2561854124069214, "learning_rate": 7.401432966321243e-05, "loss": 1.592, "step": 12840 }, { "epoch": 0.2602817357512953, "grad_norm": 0.25546035170555115, "learning_rate": 7.397385038860104e-05, "loss": 1.5834, "step": 12860 }, { "epoch": 0.2606865284974093, "grad_norm": 0.2649073302745819, "learning_rate": 7.393337111398964e-05, "loss": 1.5858, "step": 12880 }, { "epoch": 0.2610913212435233, "grad_norm": 0.2521732747554779, "learning_rate": 7.389289183937824e-05, "loss": 1.5852, "step": 12900 }, { "epoch": 0.2614961139896373, "grad_norm": 0.2643430531024933, "learning_rate": 7.385241256476684e-05, "loss": 1.5875, "step": 12920 }, { "epoch": 0.2619009067357513, "grad_norm": 0.2532569468021393, "learning_rate": 7.381193329015545e-05, "loss": 1.5919, "step": 12940 }, { "epoch": 0.2623056994818653, "grad_norm": 0.2558540999889374, "learning_rate": 7.377145401554405e-05, "loss": 1.5835, "step": 12960 }, { "epoch": 0.26271049222797926, "grad_norm": 0.2434634268283844, "learning_rate": 7.373097474093265e-05, "loss": 1.5883, "step": 12980 }, { "epoch": 0.26311528497409326, "grad_norm": 0.2713398337364197, "learning_rate": 7.369049546632126e-05, "loss": 1.5932, "step": 13000 }, { "epoch": 0.26352007772020725, "grad_norm": 0.25777509808540344, "learning_rate": 7.365001619170985e-05, "loss": 1.586, "step": 13020 }, { "epoch": 0.26392487046632124, "grad_norm": 0.24941089749336243, "learning_rate": 7.360953691709845e-05, "loss": 1.5946, "step": 13040 }, { "epoch": 0.26432966321243523, "grad_norm": 0.25711044669151306, "learning_rate": 7.356905764248704e-05, "loss": 1.5889, "step": 13060 }, { "epoch": 0.2647344559585492, "grad_norm": 0.2536850869655609, "learning_rate": 7.352857836787565e-05, "loss": 1.5881, "step": 13080 }, { "epoch": 0.2651392487046632, "grad_norm": 0.26024213433265686, "learning_rate": 7.348809909326425e-05, "loss": 1.5936, "step": 13100 }, { "epoch": 0.2655440414507772, "grad_norm": 0.30112138390541077, "learning_rate": 7.344761981865285e-05, "loss": 1.5874, "step": 13120 }, { "epoch": 0.2659488341968912, "grad_norm": 0.25028589367866516, "learning_rate": 7.340714054404145e-05, "loss": 1.5891, "step": 13140 }, { "epoch": 0.2663536269430052, "grad_norm": 0.24709436297416687, "learning_rate": 7.336666126943006e-05, "loss": 1.591, "step": 13160 }, { "epoch": 0.2667584196891192, "grad_norm": 0.2420380860567093, "learning_rate": 7.332618199481865e-05, "loss": 1.5885, "step": 13180 }, { "epoch": 0.26716321243523317, "grad_norm": 0.2557374835014343, "learning_rate": 7.328570272020726e-05, "loss": 1.5811, "step": 13200 }, { "epoch": 0.26756800518134716, "grad_norm": 0.26685893535614014, "learning_rate": 7.324522344559586e-05, "loss": 1.5875, "step": 13220 }, { "epoch": 0.26797279792746115, "grad_norm": 0.2509610056877136, "learning_rate": 7.320474417098446e-05, "loss": 1.5879, "step": 13240 }, { "epoch": 0.26837759067357514, "grad_norm": 0.25539469718933105, "learning_rate": 7.316426489637306e-05, "loss": 1.5912, "step": 13260 }, { "epoch": 0.26878238341968913, "grad_norm": 0.2548438012599945, "learning_rate": 7.312378562176167e-05, "loss": 1.5851, "step": 13280 }, { "epoch": 0.2691871761658031, "grad_norm": 0.24992695450782776, "learning_rate": 7.308330634715026e-05, "loss": 1.589, "step": 13300 }, { "epoch": 0.2695919689119171, "grad_norm": 0.254562646150589, "learning_rate": 7.304282707253887e-05, "loss": 1.5926, "step": 13320 }, { "epoch": 0.2699967616580311, "grad_norm": 0.2518201768398285, "learning_rate": 7.300234779792746e-05, "loss": 1.5924, "step": 13340 }, { "epoch": 0.2704015544041451, "grad_norm": 0.24649441242218018, "learning_rate": 7.296186852331607e-05, "loss": 1.5984, "step": 13360 }, { "epoch": 0.2708063471502591, "grad_norm": 0.2454669177532196, "learning_rate": 7.292138924870467e-05, "loss": 1.5895, "step": 13380 }, { "epoch": 0.2712111398963731, "grad_norm": 0.2537255585193634, "learning_rate": 7.288090997409328e-05, "loss": 1.5913, "step": 13400 }, { "epoch": 0.27161593264248707, "grad_norm": 0.2390950620174408, "learning_rate": 7.284043069948187e-05, "loss": 1.5894, "step": 13420 }, { "epoch": 0.27202072538860106, "grad_norm": 0.25758349895477295, "learning_rate": 7.279995142487047e-05, "loss": 1.5879, "step": 13440 }, { "epoch": 0.27242551813471505, "grad_norm": 0.2485639601945877, "learning_rate": 7.275947215025906e-05, "loss": 1.5857, "step": 13460 }, { "epoch": 0.27283031088082904, "grad_norm": 0.23955310881137848, "learning_rate": 7.271899287564767e-05, "loss": 1.5871, "step": 13480 }, { "epoch": 0.27323510362694303, "grad_norm": 0.25414761900901794, "learning_rate": 7.267851360103626e-05, "loss": 1.5942, "step": 13500 }, { "epoch": 0.27363989637305697, "grad_norm": 0.26145389676094055, "learning_rate": 7.263803432642487e-05, "loss": 1.5901, "step": 13520 }, { "epoch": 0.27404468911917096, "grad_norm": 0.2427663952112198, "learning_rate": 7.259755505181347e-05, "loss": 1.593, "step": 13540 }, { "epoch": 0.27444948186528495, "grad_norm": 0.25872233510017395, "learning_rate": 7.255707577720208e-05, "loss": 1.5844, "step": 13560 }, { "epoch": 0.27485427461139894, "grad_norm": 0.24715811014175415, "learning_rate": 7.251659650259067e-05, "loss": 1.5766, "step": 13580 }, { "epoch": 0.27525906735751293, "grad_norm": 0.23996925354003906, "learning_rate": 7.247611722797928e-05, "loss": 1.5863, "step": 13600 }, { "epoch": 0.2756638601036269, "grad_norm": 0.24570484459400177, "learning_rate": 7.243563795336787e-05, "loss": 1.5873, "step": 13620 }, { "epoch": 0.2760686528497409, "grad_norm": 0.25398343801498413, "learning_rate": 7.239515867875648e-05, "loss": 1.5833, "step": 13640 }, { "epoch": 0.2764734455958549, "grad_norm": 0.23981806635856628, "learning_rate": 7.235467940414508e-05, "loss": 1.5865, "step": 13660 }, { "epoch": 0.2768782383419689, "grad_norm": 0.2522233724594116, "learning_rate": 7.231420012953369e-05, "loss": 1.588, "step": 13680 }, { "epoch": 0.2772830310880829, "grad_norm": 0.24912554025650024, "learning_rate": 7.227372085492228e-05, "loss": 1.5885, "step": 13700 }, { "epoch": 0.2776878238341969, "grad_norm": 0.25113922357559204, "learning_rate": 7.223324158031089e-05, "loss": 1.592, "step": 13720 }, { "epoch": 0.27809261658031087, "grad_norm": 0.2483854442834854, "learning_rate": 7.21927623056995e-05, "loss": 1.5908, "step": 13740 }, { "epoch": 0.27849740932642486, "grad_norm": 0.25776833295822144, "learning_rate": 7.215228303108809e-05, "loss": 1.5916, "step": 13760 }, { "epoch": 0.27890220207253885, "grad_norm": 0.25033384561538696, "learning_rate": 7.211180375647669e-05, "loss": 1.5862, "step": 13780 }, { "epoch": 0.27930699481865284, "grad_norm": 0.24455024302005768, "learning_rate": 7.207132448186528e-05, "loss": 1.5848, "step": 13800 }, { "epoch": 0.27971178756476683, "grad_norm": 0.26531246304512024, "learning_rate": 7.203084520725389e-05, "loss": 1.5893, "step": 13820 }, { "epoch": 0.2801165803108808, "grad_norm": 0.2397710382938385, "learning_rate": 7.199036593264248e-05, "loss": 1.585, "step": 13840 }, { "epoch": 0.2805213730569948, "grad_norm": 0.26416775584220886, "learning_rate": 7.194988665803109e-05, "loss": 1.5802, "step": 13860 }, { "epoch": 0.2809261658031088, "grad_norm": 0.24373915791511536, "learning_rate": 7.190940738341969e-05, "loss": 1.5842, "step": 13880 }, { "epoch": 0.2813309585492228, "grad_norm": 0.25859859585762024, "learning_rate": 7.18689281088083e-05, "loss": 1.5881, "step": 13900 }, { "epoch": 0.2817357512953368, "grad_norm": 0.25132840871810913, "learning_rate": 7.182844883419689e-05, "loss": 1.59, "step": 13920 }, { "epoch": 0.2821405440414508, "grad_norm": 0.25717970728874207, "learning_rate": 7.17879695595855e-05, "loss": 1.5878, "step": 13940 }, { "epoch": 0.28254533678756477, "grad_norm": 0.24735580384731293, "learning_rate": 7.17474902849741e-05, "loss": 1.5938, "step": 13960 }, { "epoch": 0.28295012953367876, "grad_norm": 0.24012312293052673, "learning_rate": 7.17070110103627e-05, "loss": 1.5829, "step": 13980 }, { "epoch": 0.28335492227979275, "grad_norm": 0.25788387656211853, "learning_rate": 7.16665317357513e-05, "loss": 1.5855, "step": 14000 }, { "epoch": 0.28375971502590674, "grad_norm": 0.2538747787475586, "learning_rate": 7.16260524611399e-05, "loss": 1.583, "step": 14020 }, { "epoch": 0.28416450777202074, "grad_norm": 0.2840125560760498, "learning_rate": 7.15855731865285e-05, "loss": 1.5874, "step": 14040 }, { "epoch": 0.2845693005181347, "grad_norm": 0.25742262601852417, "learning_rate": 7.154509391191711e-05, "loss": 1.593, "step": 14060 }, { "epoch": 0.2849740932642487, "grad_norm": 0.2524792551994324, "learning_rate": 7.15046146373057e-05, "loss": 1.5747, "step": 14080 }, { "epoch": 0.2853788860103627, "grad_norm": 0.23668603599071503, "learning_rate": 7.146413536269431e-05, "loss": 1.5913, "step": 14100 }, { "epoch": 0.2857836787564767, "grad_norm": 0.2544792890548706, "learning_rate": 7.142365608808291e-05, "loss": 1.5813, "step": 14120 }, { "epoch": 0.2861884715025907, "grad_norm": 0.240151509642601, "learning_rate": 7.13831768134715e-05, "loss": 1.5858, "step": 14140 }, { "epoch": 0.2865932642487047, "grad_norm": 0.24934273958206177, "learning_rate": 7.134269753886011e-05, "loss": 1.5876, "step": 14160 }, { "epoch": 0.2869980569948187, "grad_norm": 0.2466878592967987, "learning_rate": 7.13022182642487e-05, "loss": 1.5869, "step": 14180 }, { "epoch": 0.28740284974093266, "grad_norm": 0.25210094451904297, "learning_rate": 7.12617389896373e-05, "loss": 1.5888, "step": 14200 }, { "epoch": 0.28780764248704666, "grad_norm": 0.2662893831729889, "learning_rate": 7.122125971502591e-05, "loss": 1.5838, "step": 14220 }, { "epoch": 0.28821243523316065, "grad_norm": 0.2559667229652405, "learning_rate": 7.11807804404145e-05, "loss": 1.5934, "step": 14240 }, { "epoch": 0.28861722797927464, "grad_norm": 0.2535194456577301, "learning_rate": 7.114030116580311e-05, "loss": 1.5892, "step": 14260 }, { "epoch": 0.28902202072538863, "grad_norm": 0.24408379197120667, "learning_rate": 7.10998218911917e-05, "loss": 1.5987, "step": 14280 }, { "epoch": 0.28942681347150256, "grad_norm": 0.24821212887763977, "learning_rate": 7.105934261658031e-05, "loss": 1.592, "step": 14300 }, { "epoch": 0.28983160621761656, "grad_norm": 0.24799767136573792, "learning_rate": 7.101886334196891e-05, "loss": 1.5816, "step": 14320 }, { "epoch": 0.29023639896373055, "grad_norm": 0.24958227574825287, "learning_rate": 7.097838406735752e-05, "loss": 1.5873, "step": 14340 }, { "epoch": 0.29064119170984454, "grad_norm": 0.23985649645328522, "learning_rate": 7.093790479274611e-05, "loss": 1.5853, "step": 14360 }, { "epoch": 0.29104598445595853, "grad_norm": 0.2610493004322052, "learning_rate": 7.089742551813472e-05, "loss": 1.5865, "step": 14380 }, { "epoch": 0.2914507772020725, "grad_norm": 0.24761810898780823, "learning_rate": 7.085694624352332e-05, "loss": 1.5867, "step": 14400 }, { "epoch": 0.2918555699481865, "grad_norm": 0.25687283277511597, "learning_rate": 7.081646696891192e-05, "loss": 1.5781, "step": 14420 }, { "epoch": 0.2922603626943005, "grad_norm": 0.24463453888893127, "learning_rate": 7.077598769430052e-05, "loss": 1.5923, "step": 14440 }, { "epoch": 0.2926651554404145, "grad_norm": 0.24728640913963318, "learning_rate": 7.073550841968913e-05, "loss": 1.5858, "step": 14460 }, { "epoch": 0.2930699481865285, "grad_norm": 0.2287147492170334, "learning_rate": 7.069502914507774e-05, "loss": 1.5812, "step": 14480 }, { "epoch": 0.2934747409326425, "grad_norm": 0.2467418611049652, "learning_rate": 7.065454987046633e-05, "loss": 1.5887, "step": 14500 }, { "epoch": 0.29387953367875647, "grad_norm": 0.24195000529289246, "learning_rate": 7.061407059585493e-05, "loss": 1.5761, "step": 14520 }, { "epoch": 0.29428432642487046, "grad_norm": 0.2450076788663864, "learning_rate": 7.057359132124352e-05, "loss": 1.5923, "step": 14540 }, { "epoch": 0.29468911917098445, "grad_norm": 0.24909541010856628, "learning_rate": 7.053311204663213e-05, "loss": 1.5921, "step": 14560 }, { "epoch": 0.29509391191709844, "grad_norm": 0.25438040494918823, "learning_rate": 7.049263277202072e-05, "loss": 1.59, "step": 14580 }, { "epoch": 0.29549870466321243, "grad_norm": 0.2520698606967926, "learning_rate": 7.045215349740933e-05, "loss": 1.5831, "step": 14600 }, { "epoch": 0.2959034974093264, "grad_norm": 0.249233216047287, "learning_rate": 7.041167422279793e-05, "loss": 1.5793, "step": 14620 }, { "epoch": 0.2963082901554404, "grad_norm": 0.25018006563186646, "learning_rate": 7.037119494818653e-05, "loss": 1.5846, "step": 14640 }, { "epoch": 0.2967130829015544, "grad_norm": 0.2645190954208374, "learning_rate": 7.033071567357513e-05, "loss": 1.5869, "step": 14660 }, { "epoch": 0.2971178756476684, "grad_norm": 0.2691304385662079, "learning_rate": 7.029023639896374e-05, "loss": 1.5836, "step": 14680 }, { "epoch": 0.2975226683937824, "grad_norm": 0.2494927942752838, "learning_rate": 7.024975712435233e-05, "loss": 1.5905, "step": 14700 }, { "epoch": 0.2979274611398964, "grad_norm": 0.2438342273235321, "learning_rate": 7.020927784974094e-05, "loss": 1.5909, "step": 14720 }, { "epoch": 0.29833225388601037, "grad_norm": 0.25513431429862976, "learning_rate": 7.016879857512954e-05, "loss": 1.5871, "step": 14740 }, { "epoch": 0.29873704663212436, "grad_norm": 0.2429569810628891, "learning_rate": 7.012831930051814e-05, "loss": 1.5883, "step": 14760 }, { "epoch": 0.29914183937823835, "grad_norm": 0.2394915670156479, "learning_rate": 7.008784002590674e-05, "loss": 1.5826, "step": 14780 }, { "epoch": 0.29954663212435234, "grad_norm": 0.2459208220243454, "learning_rate": 7.004736075129535e-05, "loss": 1.5888, "step": 14800 }, { "epoch": 0.29995142487046633, "grad_norm": 0.24765531718730927, "learning_rate": 7.000688147668394e-05, "loss": 1.5838, "step": 14820 }, { "epoch": 0.3003562176165803, "grad_norm": 0.24874268472194672, "learning_rate": 6.996640220207255e-05, "loss": 1.587, "step": 14840 }, { "epoch": 0.3007610103626943, "grad_norm": 0.24012985825538635, "learning_rate": 6.992592292746115e-05, "loss": 1.5867, "step": 14860 }, { "epoch": 0.3011658031088083, "grad_norm": 0.24804703891277313, "learning_rate": 6.988544365284974e-05, "loss": 1.5865, "step": 14880 }, { "epoch": 0.3015705958549223, "grad_norm": 0.2529352605342865, "learning_rate": 6.984496437823835e-05, "loss": 1.5948, "step": 14900 }, { "epoch": 0.3019753886010363, "grad_norm": 0.24283573031425476, "learning_rate": 6.980448510362694e-05, "loss": 1.5849, "step": 14920 }, { "epoch": 0.3023801813471503, "grad_norm": 0.25809407234191895, "learning_rate": 6.976400582901554e-05, "loss": 1.5809, "step": 14940 }, { "epoch": 0.30278497409326427, "grad_norm": 0.2503294050693512, "learning_rate": 6.972352655440415e-05, "loss": 1.583, "step": 14960 }, { "epoch": 0.30318976683937826, "grad_norm": 0.24834328889846802, "learning_rate": 6.968304727979274e-05, "loss": 1.5883, "step": 14980 }, { "epoch": 0.30359455958549225, "grad_norm": 0.24048243463039398, "learning_rate": 6.964256800518135e-05, "loss": 1.5863, "step": 15000 }, { "epoch": 0.30399935233160624, "grad_norm": 0.24172011017799377, "learning_rate": 6.960208873056994e-05, "loss": 1.579, "step": 15020 }, { "epoch": 0.30440414507772023, "grad_norm": 0.26162466406822205, "learning_rate": 6.956160945595855e-05, "loss": 1.5848, "step": 15040 }, { "epoch": 0.30480893782383417, "grad_norm": 0.2626149654388428, "learning_rate": 6.952113018134715e-05, "loss": 1.5875, "step": 15060 }, { "epoch": 0.30521373056994816, "grad_norm": 0.2432282567024231, "learning_rate": 6.948065090673576e-05, "loss": 1.5912, "step": 15080 }, { "epoch": 0.30561852331606215, "grad_norm": 0.2455858290195465, "learning_rate": 6.944017163212435e-05, "loss": 1.589, "step": 15100 }, { "epoch": 0.30602331606217614, "grad_norm": 0.250660240650177, "learning_rate": 6.939969235751296e-05, "loss": 1.5876, "step": 15120 }, { "epoch": 0.30642810880829013, "grad_norm": 0.24949415028095245, "learning_rate": 6.935921308290155e-05, "loss": 1.5814, "step": 15140 }, { "epoch": 0.3068329015544041, "grad_norm": 0.24110478162765503, "learning_rate": 6.931873380829016e-05, "loss": 1.5842, "step": 15160 }, { "epoch": 0.3072376943005181, "grad_norm": 0.25477948784828186, "learning_rate": 6.927825453367876e-05, "loss": 1.5916, "step": 15180 }, { "epoch": 0.3076424870466321, "grad_norm": 0.2550623416900635, "learning_rate": 6.923777525906737e-05, "loss": 1.5892, "step": 15200 }, { "epoch": 0.3080472797927461, "grad_norm": 0.2648305892944336, "learning_rate": 6.919729598445596e-05, "loss": 1.5877, "step": 15220 }, { "epoch": 0.3084520725388601, "grad_norm": 0.2501918077468872, "learning_rate": 6.915681670984457e-05, "loss": 1.5843, "step": 15240 }, { "epoch": 0.3088568652849741, "grad_norm": 0.25525906682014465, "learning_rate": 6.911633743523316e-05, "loss": 1.5888, "step": 15260 }, { "epoch": 0.30926165803108807, "grad_norm": 0.2414066642522812, "learning_rate": 6.907585816062176e-05, "loss": 1.5858, "step": 15280 }, { "epoch": 0.30966645077720206, "grad_norm": 0.24922241270542145, "learning_rate": 6.903537888601037e-05, "loss": 1.5807, "step": 15300 }, { "epoch": 0.31007124352331605, "grad_norm": 0.24168728291988373, "learning_rate": 6.899489961139896e-05, "loss": 1.5874, "step": 15320 }, { "epoch": 0.31047603626943004, "grad_norm": 0.24871079623699188, "learning_rate": 6.895442033678757e-05, "loss": 1.579, "step": 15340 }, { "epoch": 0.31088082901554404, "grad_norm": 0.24255718290805817, "learning_rate": 6.891394106217617e-05, "loss": 1.5832, "step": 15360 }, { "epoch": 0.311285621761658, "grad_norm": 0.2550848722457886, "learning_rate": 6.887346178756477e-05, "loss": 1.5912, "step": 15380 }, { "epoch": 0.311690414507772, "grad_norm": 0.2409135103225708, "learning_rate": 6.883298251295337e-05, "loss": 1.5799, "step": 15400 }, { "epoch": 0.312095207253886, "grad_norm": 0.2614773213863373, "learning_rate": 6.879250323834198e-05, "loss": 1.5858, "step": 15420 }, { "epoch": 0.3125, "grad_norm": 0.24690207839012146, "learning_rate": 6.875202396373057e-05, "loss": 1.5837, "step": 15440 }, { "epoch": 0.312904792746114, "grad_norm": 0.2487402856349945, "learning_rate": 6.871154468911918e-05, "loss": 1.5769, "step": 15460 }, { "epoch": 0.313309585492228, "grad_norm": 0.2449761927127838, "learning_rate": 6.867106541450777e-05, "loss": 1.585, "step": 15480 }, { "epoch": 0.313714378238342, "grad_norm": 0.2417188584804535, "learning_rate": 6.863058613989638e-05, "loss": 1.5881, "step": 15500 }, { "epoch": 0.31411917098445596, "grad_norm": 0.26612526178359985, "learning_rate": 6.859010686528498e-05, "loss": 1.5848, "step": 15520 }, { "epoch": 0.31452396373056996, "grad_norm": 0.2397749274969101, "learning_rate": 6.854962759067359e-05, "loss": 1.5891, "step": 15540 }, { "epoch": 0.31492875647668395, "grad_norm": 0.23989450931549072, "learning_rate": 6.850914831606218e-05, "loss": 1.5773, "step": 15560 }, { "epoch": 0.31533354922279794, "grad_norm": 0.23932969570159912, "learning_rate": 6.846866904145079e-05, "loss": 1.5889, "step": 15580 }, { "epoch": 0.31573834196891193, "grad_norm": 0.2363348752260208, "learning_rate": 6.842818976683938e-05, "loss": 1.5824, "step": 15600 }, { "epoch": 0.3161431347150259, "grad_norm": 0.26289528608322144, "learning_rate": 6.838771049222798e-05, "loss": 1.5824, "step": 15620 }, { "epoch": 0.3165479274611399, "grad_norm": 0.23674596846103668, "learning_rate": 6.834723121761657e-05, "loss": 1.5851, "step": 15640 }, { "epoch": 0.3169527202072539, "grad_norm": 0.24239905178546906, "learning_rate": 6.830675194300518e-05, "loss": 1.5884, "step": 15660 }, { "epoch": 0.3173575129533679, "grad_norm": 0.25282105803489685, "learning_rate": 6.826627266839378e-05, "loss": 1.5806, "step": 15680 }, { "epoch": 0.3177623056994819, "grad_norm": 0.25046810507774353, "learning_rate": 6.822579339378239e-05, "loss": 1.5815, "step": 15700 }, { "epoch": 0.3181670984455959, "grad_norm": 0.24764792621135712, "learning_rate": 6.818531411917098e-05, "loss": 1.58, "step": 15720 }, { "epoch": 0.31857189119170987, "grad_norm": 0.26262742280960083, "learning_rate": 6.814483484455959e-05, "loss": 1.5821, "step": 15740 }, { "epoch": 0.31897668393782386, "grad_norm": 0.24907901883125305, "learning_rate": 6.810435556994818e-05, "loss": 1.5907, "step": 15760 }, { "epoch": 0.31938147668393785, "grad_norm": 0.2536579668521881, "learning_rate": 6.806387629533679e-05, "loss": 1.5862, "step": 15780 }, { "epoch": 0.31978626943005184, "grad_norm": 0.24011904001235962, "learning_rate": 6.802339702072539e-05, "loss": 1.5861, "step": 15800 }, { "epoch": 0.32019106217616583, "grad_norm": 0.24442733824253082, "learning_rate": 6.7982917746114e-05, "loss": 1.5816, "step": 15820 }, { "epoch": 0.32059585492227977, "grad_norm": 0.25342857837677, "learning_rate": 6.794243847150259e-05, "loss": 1.5861, "step": 15840 }, { "epoch": 0.32100064766839376, "grad_norm": 0.25119903683662415, "learning_rate": 6.79019591968912e-05, "loss": 1.5819, "step": 15860 }, { "epoch": 0.32140544041450775, "grad_norm": 0.23586039245128632, "learning_rate": 6.78614799222798e-05, "loss": 1.5844, "step": 15880 }, { "epoch": 0.32181023316062174, "grad_norm": 0.2522569000720978, "learning_rate": 6.78210006476684e-05, "loss": 1.5849, "step": 15900 }, { "epoch": 0.32221502590673573, "grad_norm": 0.2512165606021881, "learning_rate": 6.7780521373057e-05, "loss": 1.5861, "step": 15920 }, { "epoch": 0.3226198186528497, "grad_norm": 0.24650046229362488, "learning_rate": 6.77400420984456e-05, "loss": 1.5864, "step": 15940 }, { "epoch": 0.3230246113989637, "grad_norm": 0.26559415459632874, "learning_rate": 6.76995628238342e-05, "loss": 1.5913, "step": 15960 }, { "epoch": 0.3234294041450777, "grad_norm": 0.24630406498908997, "learning_rate": 6.765908354922281e-05, "loss": 1.5829, "step": 15980 }, { "epoch": 0.3238341968911917, "grad_norm": 0.25118187069892883, "learning_rate": 6.76186042746114e-05, "loss": 1.5863, "step": 16000 }, { "epoch": 0.3242389896373057, "grad_norm": 0.24772891402244568, "learning_rate": 6.7578125e-05, "loss": 1.5789, "step": 16020 }, { "epoch": 0.3246437823834197, "grad_norm": 0.24637706577777863, "learning_rate": 6.75376457253886e-05, "loss": 1.5795, "step": 16040 }, { "epoch": 0.32504857512953367, "grad_norm": 0.2432299256324768, "learning_rate": 6.74971664507772e-05, "loss": 1.5859, "step": 16060 }, { "epoch": 0.32545336787564766, "grad_norm": 0.2433583289384842, "learning_rate": 6.745668717616581e-05, "loss": 1.5768, "step": 16080 }, { "epoch": 0.32585816062176165, "grad_norm": 0.2578054368495941, "learning_rate": 6.74162079015544e-05, "loss": 1.5834, "step": 16100 }, { "epoch": 0.32626295336787564, "grad_norm": 0.2513803243637085, "learning_rate": 6.737572862694301e-05, "loss": 1.5876, "step": 16120 }, { "epoch": 0.32666774611398963, "grad_norm": 0.24841587245464325, "learning_rate": 6.733524935233161e-05, "loss": 1.5768, "step": 16140 }, { "epoch": 0.3270725388601036, "grad_norm": 0.24936723709106445, "learning_rate": 6.729477007772022e-05, "loss": 1.5831, "step": 16160 }, { "epoch": 0.3274773316062176, "grad_norm": 0.24816541373729706, "learning_rate": 6.725429080310881e-05, "loss": 1.5875, "step": 16180 }, { "epoch": 0.3278821243523316, "grad_norm": 0.24738910794258118, "learning_rate": 6.721381152849742e-05, "loss": 1.5799, "step": 16200 }, { "epoch": 0.3282869170984456, "grad_norm": 0.24897895753383636, "learning_rate": 6.717333225388601e-05, "loss": 1.5842, "step": 16220 }, { "epoch": 0.3286917098445596, "grad_norm": 0.23734472692012787, "learning_rate": 6.713285297927462e-05, "loss": 1.5854, "step": 16240 }, { "epoch": 0.3290965025906736, "grad_norm": 0.2519455552101135, "learning_rate": 6.709237370466322e-05, "loss": 1.5836, "step": 16260 }, { "epoch": 0.32950129533678757, "grad_norm": 0.23885029554367065, "learning_rate": 6.705189443005183e-05, "loss": 1.5837, "step": 16280 }, { "epoch": 0.32990608808290156, "grad_norm": 0.24657045304775238, "learning_rate": 6.701141515544042e-05, "loss": 1.5873, "step": 16300 }, { "epoch": 0.33031088082901555, "grad_norm": 0.2508114278316498, "learning_rate": 6.697093588082903e-05, "loss": 1.5902, "step": 16320 }, { "epoch": 0.33071567357512954, "grad_norm": 0.4304337799549103, "learning_rate": 6.693045660621762e-05, "loss": 1.5861, "step": 16340 }, { "epoch": 0.33112046632124353, "grad_norm": 0.24405381083488464, "learning_rate": 6.688997733160622e-05, "loss": 1.5801, "step": 16360 }, { "epoch": 0.3315252590673575, "grad_norm": 0.24140587449073792, "learning_rate": 6.684949805699481e-05, "loss": 1.5813, "step": 16380 }, { "epoch": 0.3319300518134715, "grad_norm": 0.2539791762828827, "learning_rate": 6.680901878238342e-05, "loss": 1.5893, "step": 16400 }, { "epoch": 0.3323348445595855, "grad_norm": 0.24591654539108276, "learning_rate": 6.676853950777202e-05, "loss": 1.5816, "step": 16420 }, { "epoch": 0.3327396373056995, "grad_norm": 0.24630692601203918, "learning_rate": 6.672806023316062e-05, "loss": 1.5793, "step": 16440 }, { "epoch": 0.3331444300518135, "grad_norm": 0.25175702571868896, "learning_rate": 6.668758095854922e-05, "loss": 1.5816, "step": 16460 }, { "epoch": 0.3335492227979275, "grad_norm": 0.24350403249263763, "learning_rate": 6.664710168393783e-05, "loss": 1.5837, "step": 16480 }, { "epoch": 0.33395401554404147, "grad_norm": 0.25533396005630493, "learning_rate": 6.660662240932642e-05, "loss": 1.5841, "step": 16500 }, { "epoch": 0.33435880829015546, "grad_norm": 0.2518693804740906, "learning_rate": 6.656614313471503e-05, "loss": 1.5805, "step": 16520 }, { "epoch": 0.33476360103626945, "grad_norm": 0.25560125708580017, "learning_rate": 6.652566386010363e-05, "loss": 1.5901, "step": 16540 }, { "epoch": 0.33516839378238344, "grad_norm": 0.24111028015613556, "learning_rate": 6.648518458549223e-05, "loss": 1.5852, "step": 16560 }, { "epoch": 0.33557318652849744, "grad_norm": 0.2662928104400635, "learning_rate": 6.644470531088083e-05, "loss": 1.5831, "step": 16580 }, { "epoch": 0.33597797927461137, "grad_norm": 0.24652336537837982, "learning_rate": 6.640422603626944e-05, "loss": 1.5771, "step": 16600 }, { "epoch": 0.33638277202072536, "grad_norm": 0.24208605289459229, "learning_rate": 6.636374676165803e-05, "loss": 1.5878, "step": 16620 }, { "epoch": 0.33678756476683935, "grad_norm": 0.24391984939575195, "learning_rate": 6.632326748704664e-05, "loss": 1.5824, "step": 16640 }, { "epoch": 0.33719235751295334, "grad_norm": 0.2502712309360504, "learning_rate": 6.628278821243524e-05, "loss": 1.5855, "step": 16660 }, { "epoch": 0.33759715025906734, "grad_norm": 0.25470396876335144, "learning_rate": 6.624230893782384e-05, "loss": 1.5908, "step": 16680 }, { "epoch": 0.3380019430051813, "grad_norm": 0.25602418184280396, "learning_rate": 6.620182966321244e-05, "loss": 1.5872, "step": 16700 }, { "epoch": 0.3384067357512953, "grad_norm": 0.25245505571365356, "learning_rate": 6.616135038860103e-05, "loss": 1.5742, "step": 16720 }, { "epoch": 0.3388115284974093, "grad_norm": 0.24720321595668793, "learning_rate": 6.612087111398964e-05, "loss": 1.5918, "step": 16740 }, { "epoch": 0.3392163212435233, "grad_norm": 0.2769855558872223, "learning_rate": 6.608039183937824e-05, "loss": 1.5923, "step": 16760 }, { "epoch": 0.3396211139896373, "grad_norm": 0.2473434954881668, "learning_rate": 6.603991256476683e-05, "loss": 1.5899, "step": 16780 }, { "epoch": 0.3400259067357513, "grad_norm": 0.2437729835510254, "learning_rate": 6.599943329015544e-05, "loss": 1.5884, "step": 16800 }, { "epoch": 0.3404306994818653, "grad_norm": 0.24158480763435364, "learning_rate": 6.595895401554405e-05, "loss": 1.5779, "step": 16820 }, { "epoch": 0.34083549222797926, "grad_norm": 0.24786756932735443, "learning_rate": 6.591847474093264e-05, "loss": 1.5905, "step": 16840 }, { "epoch": 0.34124028497409326, "grad_norm": 0.24496886134147644, "learning_rate": 6.587799546632125e-05, "loss": 1.5861, "step": 16860 }, { "epoch": 0.34164507772020725, "grad_norm": 0.24919195473194122, "learning_rate": 6.583751619170985e-05, "loss": 1.5788, "step": 16880 }, { "epoch": 0.34204987046632124, "grad_norm": 0.23779380321502686, "learning_rate": 6.579703691709845e-05, "loss": 1.58, "step": 16900 }, { "epoch": 0.34245466321243523, "grad_norm": 0.24397283792495728, "learning_rate": 6.575655764248705e-05, "loss": 1.5859, "step": 16920 }, { "epoch": 0.3428594559585492, "grad_norm": 0.25981587171554565, "learning_rate": 6.571607836787566e-05, "loss": 1.585, "step": 16940 }, { "epoch": 0.3432642487046632, "grad_norm": 0.23750704526901245, "learning_rate": 6.567559909326425e-05, "loss": 1.5831, "step": 16960 }, { "epoch": 0.3436690414507772, "grad_norm": 0.2525272071361542, "learning_rate": 6.563511981865286e-05, "loss": 1.5844, "step": 16980 }, { "epoch": 0.3440738341968912, "grad_norm": 0.24770081043243408, "learning_rate": 6.559464054404146e-05, "loss": 1.5781, "step": 17000 }, { "epoch": 0.3444786269430052, "grad_norm": 0.24769261479377747, "learning_rate": 6.555416126943006e-05, "loss": 1.5872, "step": 17020 }, { "epoch": 0.3448834196891192, "grad_norm": 0.2667384445667267, "learning_rate": 6.551368199481866e-05, "loss": 1.5845, "step": 17040 }, { "epoch": 0.34528821243523317, "grad_norm": 0.2595469057559967, "learning_rate": 6.547320272020727e-05, "loss": 1.5916, "step": 17060 }, { "epoch": 0.34569300518134716, "grad_norm": 0.2431216835975647, "learning_rate": 6.543272344559586e-05, "loss": 1.5856, "step": 17080 }, { "epoch": 0.34609779792746115, "grad_norm": 0.2448737919330597, "learning_rate": 6.539224417098446e-05, "loss": 1.5839, "step": 17100 }, { "epoch": 0.34650259067357514, "grad_norm": 0.2503395080566406, "learning_rate": 6.535176489637305e-05, "loss": 1.5817, "step": 17120 }, { "epoch": 0.34690738341968913, "grad_norm": 0.25436753034591675, "learning_rate": 6.531128562176166e-05, "loss": 1.5866, "step": 17140 }, { "epoch": 0.3473121761658031, "grad_norm": 0.24557563662528992, "learning_rate": 6.527080634715025e-05, "loss": 1.5868, "step": 17160 }, { "epoch": 0.3477169689119171, "grad_norm": 0.25886863470077515, "learning_rate": 6.523032707253886e-05, "loss": 1.5929, "step": 17180 }, { "epoch": 0.3481217616580311, "grad_norm": 0.23841175436973572, "learning_rate": 6.518984779792746e-05, "loss": 1.5878, "step": 17200 }, { "epoch": 0.3485265544041451, "grad_norm": 0.252944678068161, "learning_rate": 6.514936852331607e-05, "loss": 1.5809, "step": 17220 }, { "epoch": 0.3489313471502591, "grad_norm": 0.23432187736034393, "learning_rate": 6.510888924870466e-05, "loss": 1.5792, "step": 17240 }, { "epoch": 0.3493361398963731, "grad_norm": 0.24210631847381592, "learning_rate": 6.506840997409327e-05, "loss": 1.5842, "step": 17260 }, { "epoch": 0.34974093264248707, "grad_norm": 0.2402646541595459, "learning_rate": 6.502793069948186e-05, "loss": 1.5859, "step": 17280 }, { "epoch": 0.35014572538860106, "grad_norm": 0.24938997626304626, "learning_rate": 6.498745142487047e-05, "loss": 1.5921, "step": 17300 }, { "epoch": 0.35055051813471505, "grad_norm": 0.24891935288906097, "learning_rate": 6.494697215025907e-05, "loss": 1.5843, "step": 17320 }, { "epoch": 0.35095531088082904, "grad_norm": 0.24175982177257538, "learning_rate": 6.490649287564768e-05, "loss": 1.5789, "step": 17340 }, { "epoch": 0.35136010362694303, "grad_norm": 0.24067789316177368, "learning_rate": 6.486601360103627e-05, "loss": 1.5843, "step": 17360 }, { "epoch": 0.35176489637305697, "grad_norm": 0.24425911903381348, "learning_rate": 6.482553432642488e-05, "loss": 1.5787, "step": 17380 }, { "epoch": 0.35216968911917096, "grad_norm": 0.24763159453868866, "learning_rate": 6.478505505181347e-05, "loss": 1.5907, "step": 17400 }, { "epoch": 0.35257448186528495, "grad_norm": 0.25963354110717773, "learning_rate": 6.474457577720208e-05, "loss": 1.5837, "step": 17420 }, { "epoch": 0.35297927461139894, "grad_norm": 0.2555966377258301, "learning_rate": 6.470409650259068e-05, "loss": 1.5828, "step": 17440 }, { "epoch": 0.35338406735751293, "grad_norm": 0.24680909514427185, "learning_rate": 6.466361722797927e-05, "loss": 1.5919, "step": 17460 }, { "epoch": 0.3537888601036269, "grad_norm": 0.24828499555587769, "learning_rate": 6.462313795336788e-05, "loss": 1.5819, "step": 17480 }, { "epoch": 0.3541936528497409, "grad_norm": 0.25551852583885193, "learning_rate": 6.458265867875648e-05, "loss": 1.5807, "step": 17500 }, { "epoch": 0.3545984455958549, "grad_norm": 0.24701139330863953, "learning_rate": 6.454217940414507e-05, "loss": 1.5887, "step": 17520 }, { "epoch": 0.3550032383419689, "grad_norm": 0.2554754614830017, "learning_rate": 6.450170012953368e-05, "loss": 1.5892, "step": 17540 }, { "epoch": 0.3554080310880829, "grad_norm": 0.23969683051109314, "learning_rate": 6.446122085492227e-05, "loss": 1.5739, "step": 17560 }, { "epoch": 0.3558128238341969, "grad_norm": 0.2561412751674652, "learning_rate": 6.442074158031088e-05, "loss": 1.5806, "step": 17580 }, { "epoch": 0.35621761658031087, "grad_norm": 0.24652843177318573, "learning_rate": 6.438026230569949e-05, "loss": 1.593, "step": 17600 }, { "epoch": 0.35662240932642486, "grad_norm": 0.24892421066761017, "learning_rate": 6.433978303108808e-05, "loss": 1.5834, "step": 17620 }, { "epoch": 0.35702720207253885, "grad_norm": 0.24280443787574768, "learning_rate": 6.429930375647669e-05, "loss": 1.5838, "step": 17640 }, { "epoch": 0.35743199481865284, "grad_norm": 0.25388795137405396, "learning_rate": 6.425882448186529e-05, "loss": 1.59, "step": 17660 }, { "epoch": 0.35783678756476683, "grad_norm": 0.25656792521476746, "learning_rate": 6.42183452072539e-05, "loss": 1.5816, "step": 17680 }, { "epoch": 0.3582415803108808, "grad_norm": 0.2431263029575348, "learning_rate": 6.417786593264249e-05, "loss": 1.5823, "step": 17700 }, { "epoch": 0.3586463730569948, "grad_norm": 0.24698467552661896, "learning_rate": 6.41373866580311e-05, "loss": 1.5756, "step": 17720 }, { "epoch": 0.3590511658031088, "grad_norm": 0.24756886065006256, "learning_rate": 6.40969073834197e-05, "loss": 1.5886, "step": 17740 }, { "epoch": 0.3594559585492228, "grad_norm": 0.251569002866745, "learning_rate": 6.40564281088083e-05, "loss": 1.5853, "step": 17760 }, { "epoch": 0.3598607512953368, "grad_norm": 0.24534490704536438, "learning_rate": 6.40159488341969e-05, "loss": 1.5804, "step": 17780 }, { "epoch": 0.3602655440414508, "grad_norm": 0.2370930165052414, "learning_rate": 6.397546955958549e-05, "loss": 1.5799, "step": 17800 }, { "epoch": 0.36067033678756477, "grad_norm": 0.2554572522640228, "learning_rate": 6.39349902849741e-05, "loss": 1.5801, "step": 17820 }, { "epoch": 0.36107512953367876, "grad_norm": 0.23955076932907104, "learning_rate": 6.38945110103627e-05, "loss": 1.5845, "step": 17840 }, { "epoch": 0.36147992227979275, "grad_norm": 0.24873732030391693, "learning_rate": 6.385403173575129e-05, "loss": 1.5854, "step": 17860 }, { "epoch": 0.36188471502590674, "grad_norm": 0.2509513795375824, "learning_rate": 6.38135524611399e-05, "loss": 1.588, "step": 17880 }, { "epoch": 0.36228950777202074, "grad_norm": 0.2500058710575104, "learning_rate": 6.37730731865285e-05, "loss": 1.5831, "step": 17900 }, { "epoch": 0.3626943005181347, "grad_norm": 0.25541752576828003, "learning_rate": 6.37325939119171e-05, "loss": 1.5727, "step": 17920 }, { "epoch": 0.3630990932642487, "grad_norm": 0.2410811483860016, "learning_rate": 6.36921146373057e-05, "loss": 1.5788, "step": 17940 }, { "epoch": 0.3635038860103627, "grad_norm": 0.24759268760681152, "learning_rate": 6.36516353626943e-05, "loss": 1.5793, "step": 17960 }, { "epoch": 0.3639086787564767, "grad_norm": 0.2572713792324066, "learning_rate": 6.36111560880829e-05, "loss": 1.5859, "step": 17980 }, { "epoch": 0.3643134715025907, "grad_norm": 0.24675361812114716, "learning_rate": 6.357067681347151e-05, "loss": 1.5874, "step": 18000 }, { "epoch": 0.3647182642487047, "grad_norm": 0.24932213127613068, "learning_rate": 6.35301975388601e-05, "loss": 1.5787, "step": 18020 }, { "epoch": 0.3651230569948187, "grad_norm": 0.23247256875038147, "learning_rate": 6.348971826424871e-05, "loss": 1.5891, "step": 18040 }, { "epoch": 0.36552784974093266, "grad_norm": 0.2456364780664444, "learning_rate": 6.34492389896373e-05, "loss": 1.5831, "step": 18060 }, { "epoch": 0.36593264248704666, "grad_norm": 0.24777454137802124, "learning_rate": 6.340875971502591e-05, "loss": 1.5865, "step": 18080 }, { "epoch": 0.36633743523316065, "grad_norm": 0.2401486486196518, "learning_rate": 6.336828044041451e-05, "loss": 1.5763, "step": 18100 }, { "epoch": 0.36674222797927464, "grad_norm": 0.2676100432872772, "learning_rate": 6.332780116580312e-05, "loss": 1.5815, "step": 18120 }, { "epoch": 0.36714702072538863, "grad_norm": 0.24729053676128387, "learning_rate": 6.328732189119171e-05, "loss": 1.5809, "step": 18140 }, { "epoch": 0.36755181347150256, "grad_norm": 0.24506054818630219, "learning_rate": 6.324684261658032e-05, "loss": 1.583, "step": 18160 }, { "epoch": 0.36795660621761656, "grad_norm": 0.24271951615810394, "learning_rate": 6.320636334196892e-05, "loss": 1.5843, "step": 18180 }, { "epoch": 0.36836139896373055, "grad_norm": 0.24985666573047638, "learning_rate": 6.316588406735751e-05, "loss": 1.5808, "step": 18200 }, { "epoch": 0.36876619170984454, "grad_norm": 0.24815523624420166, "learning_rate": 6.31254047927461e-05, "loss": 1.5704, "step": 18220 }, { "epoch": 0.36917098445595853, "grad_norm": 0.2408040165901184, "learning_rate": 6.308492551813471e-05, "loss": 1.5738, "step": 18240 }, { "epoch": 0.3695757772020725, "grad_norm": 0.24704653024673462, "learning_rate": 6.304444624352331e-05, "loss": 1.5837, "step": 18260 }, { "epoch": 0.3699805699481865, "grad_norm": 0.23697836697101593, "learning_rate": 6.300396696891192e-05, "loss": 1.5811, "step": 18280 }, { "epoch": 0.3703853626943005, "grad_norm": 0.243811696767807, "learning_rate": 6.296348769430051e-05, "loss": 1.5854, "step": 18300 }, { "epoch": 0.3707901554404145, "grad_norm": 0.24969127774238586, "learning_rate": 6.292300841968912e-05, "loss": 1.5844, "step": 18320 }, { "epoch": 0.3711949481865285, "grad_norm": 0.26196226477622986, "learning_rate": 6.288252914507773e-05, "loss": 1.5843, "step": 18340 }, { "epoch": 0.3715997409326425, "grad_norm": 0.2553781270980835, "learning_rate": 6.284204987046632e-05, "loss": 1.5794, "step": 18360 }, { "epoch": 0.37200453367875647, "grad_norm": 0.24024589359760284, "learning_rate": 6.280157059585493e-05, "loss": 1.5754, "step": 18380 }, { "epoch": 0.37240932642487046, "grad_norm": 0.24230243265628815, "learning_rate": 6.276109132124353e-05, "loss": 1.5834, "step": 18400 }, { "epoch": 0.37281411917098445, "grad_norm": 0.23252740502357483, "learning_rate": 6.272061204663214e-05, "loss": 1.5827, "step": 18420 }, { "epoch": 0.37321891191709844, "grad_norm": 0.2537935972213745, "learning_rate": 6.268013277202073e-05, "loss": 1.5897, "step": 18440 }, { "epoch": 0.37362370466321243, "grad_norm": 0.2460363656282425, "learning_rate": 6.263965349740934e-05, "loss": 1.5845, "step": 18460 }, { "epoch": 0.3740284974093264, "grad_norm": 0.2424551397562027, "learning_rate": 6.259917422279793e-05, "loss": 1.5837, "step": 18480 }, { "epoch": 0.3744332901554404, "grad_norm": 0.25361236929893494, "learning_rate": 6.255869494818654e-05, "loss": 1.5796, "step": 18500 }, { "epoch": 0.3748380829015544, "grad_norm": 0.27217602729797363, "learning_rate": 6.251821567357514e-05, "loss": 1.5812, "step": 18520 }, { "epoch": 0.3752428756476684, "grad_norm": 0.23510481417179108, "learning_rate": 6.247773639896373e-05, "loss": 1.5754, "step": 18540 }, { "epoch": 0.3756476683937824, "grad_norm": 0.23853644728660583, "learning_rate": 6.243725712435234e-05, "loss": 1.5802, "step": 18560 }, { "epoch": 0.3760524611398964, "grad_norm": 0.23858334124088287, "learning_rate": 6.239677784974093e-05, "loss": 1.5892, "step": 18580 }, { "epoch": 0.37645725388601037, "grad_norm": 0.23926375806331635, "learning_rate": 6.235629857512953e-05, "loss": 1.5854, "step": 18600 }, { "epoch": 0.37686204663212436, "grad_norm": 0.26723161339759827, "learning_rate": 6.231581930051814e-05, "loss": 1.5864, "step": 18620 }, { "epoch": 0.37726683937823835, "grad_norm": 0.23559319972991943, "learning_rate": 6.227534002590673e-05, "loss": 1.5804, "step": 18640 }, { "epoch": 0.37767163212435234, "grad_norm": 0.2563915252685547, "learning_rate": 6.223486075129534e-05, "loss": 1.584, "step": 18660 }, { "epoch": 0.37807642487046633, "grad_norm": 0.24648962914943695, "learning_rate": 6.219438147668394e-05, "loss": 1.5819, "step": 18680 }, { "epoch": 0.3784812176165803, "grad_norm": 0.2422814667224884, "learning_rate": 6.215390220207254e-05, "loss": 1.577, "step": 18700 }, { "epoch": 0.3788860103626943, "grad_norm": 0.25073567032814026, "learning_rate": 6.211342292746114e-05, "loss": 1.5782, "step": 18720 }, { "epoch": 0.3792908031088083, "grad_norm": 0.25149184465408325, "learning_rate": 6.207294365284975e-05, "loss": 1.578, "step": 18740 }, { "epoch": 0.3796955958549223, "grad_norm": 0.2399628460407257, "learning_rate": 6.203246437823834e-05, "loss": 1.5864, "step": 18760 }, { "epoch": 0.3801003886010363, "grad_norm": 0.2580591142177582, "learning_rate": 6.199198510362695e-05, "loss": 1.5866, "step": 18780 }, { "epoch": 0.3805051813471503, "grad_norm": 0.23749858140945435, "learning_rate": 6.195150582901555e-05, "loss": 1.5797, "step": 18800 }, { "epoch": 0.38090997409326427, "grad_norm": 0.2402142733335495, "learning_rate": 6.191102655440415e-05, "loss": 1.5818, "step": 18820 }, { "epoch": 0.38131476683937826, "grad_norm": 0.2343897819519043, "learning_rate": 6.187054727979275e-05, "loss": 1.5803, "step": 18840 }, { "epoch": 0.38171955958549225, "grad_norm": 0.2467813491821289, "learning_rate": 6.183006800518136e-05, "loss": 1.5797, "step": 18860 }, { "epoch": 0.38212435233160624, "grad_norm": 0.24918517470359802, "learning_rate": 6.178958873056995e-05, "loss": 1.5797, "step": 18880 }, { "epoch": 0.38252914507772023, "grad_norm": 0.24628393352031708, "learning_rate": 6.174910945595856e-05, "loss": 1.5812, "step": 18900 }, { "epoch": 0.38293393782383417, "grad_norm": 0.27033117413520813, "learning_rate": 6.170863018134715e-05, "loss": 1.5815, "step": 18920 }, { "epoch": 0.38333873056994816, "grad_norm": 0.24196670949459076, "learning_rate": 6.166815090673575e-05, "loss": 1.5871, "step": 18940 }, { "epoch": 0.38374352331606215, "grad_norm": 0.2414121776819229, "learning_rate": 6.162767163212434e-05, "loss": 1.5857, "step": 18960 }, { "epoch": 0.38414831606217614, "grad_norm": 0.23683834075927734, "learning_rate": 6.158719235751295e-05, "loss": 1.5855, "step": 18980 }, { "epoch": 0.38455310880829013, "grad_norm": 0.24126407504081726, "learning_rate": 6.154671308290155e-05, "loss": 1.5729, "step": 19000 }, { "epoch": 0.3849579015544041, "grad_norm": 0.25900810956954956, "learning_rate": 6.150623380829016e-05, "loss": 1.5869, "step": 19020 }, { "epoch": 0.3853626943005181, "grad_norm": 0.23714515566825867, "learning_rate": 6.146575453367875e-05, "loss": 1.5855, "step": 19040 }, { "epoch": 0.3857674870466321, "grad_norm": 0.24371197819709778, "learning_rate": 6.142527525906736e-05, "loss": 1.5813, "step": 19060 }, { "epoch": 0.3861722797927461, "grad_norm": 0.24776619672775269, "learning_rate": 6.138479598445595e-05, "loss": 1.5775, "step": 19080 }, { "epoch": 0.3865770725388601, "grad_norm": 0.23617494106292725, "learning_rate": 6.134431670984456e-05, "loss": 1.5795, "step": 19100 }, { "epoch": 0.3869818652849741, "grad_norm": 0.24063126742839813, "learning_rate": 6.130383743523317e-05, "loss": 1.5883, "step": 19120 }, { "epoch": 0.38738665803108807, "grad_norm": 0.25122806429862976, "learning_rate": 6.126335816062177e-05, "loss": 1.5836, "step": 19140 }, { "epoch": 0.38779145077720206, "grad_norm": 0.260867178440094, "learning_rate": 6.122287888601037e-05, "loss": 1.5772, "step": 19160 }, { "epoch": 0.38819624352331605, "grad_norm": 0.25371524691581726, "learning_rate": 6.118239961139897e-05, "loss": 1.5806, "step": 19180 }, { "epoch": 0.38860103626943004, "grad_norm": 0.2414660006761551, "learning_rate": 6.114192033678758e-05, "loss": 1.5786, "step": 19200 }, { "epoch": 0.38900582901554404, "grad_norm": 0.25280171632766724, "learning_rate": 6.110144106217617e-05, "loss": 1.5812, "step": 19220 }, { "epoch": 0.389410621761658, "grad_norm": 0.2529459297657013, "learning_rate": 6.106096178756478e-05, "loss": 1.5732, "step": 19240 }, { "epoch": 0.389815414507772, "grad_norm": 0.2489601969718933, "learning_rate": 6.102048251295337e-05, "loss": 1.5877, "step": 19260 }, { "epoch": 0.390220207253886, "grad_norm": 0.24018102884292603, "learning_rate": 6.098000323834198e-05, "loss": 1.5804, "step": 19280 }, { "epoch": 0.390625, "grad_norm": 0.2491941750049591, "learning_rate": 6.093952396373057e-05, "loss": 1.5868, "step": 19300 }, { "epoch": 0.391029792746114, "grad_norm": 0.2602066099643707, "learning_rate": 6.089904468911918e-05, "loss": 1.5735, "step": 19320 }, { "epoch": 0.391434585492228, "grad_norm": 0.23758846521377563, "learning_rate": 6.0858565414507775e-05, "loss": 1.578, "step": 19340 }, { "epoch": 0.391839378238342, "grad_norm": 0.24107594788074493, "learning_rate": 6.081808613989638e-05, "loss": 1.5813, "step": 19360 }, { "epoch": 0.39224417098445596, "grad_norm": 0.246624156832695, "learning_rate": 6.077760686528498e-05, "loss": 1.5779, "step": 19380 }, { "epoch": 0.39264896373056996, "grad_norm": 0.24543163180351257, "learning_rate": 6.073712759067358e-05, "loss": 1.5768, "step": 19400 }, { "epoch": 0.39305375647668395, "grad_norm": 0.24981147050857544, "learning_rate": 6.0696648316062174e-05, "loss": 1.5771, "step": 19420 }, { "epoch": 0.39345854922279794, "grad_norm": 0.255155473947525, "learning_rate": 6.065616904145078e-05, "loss": 1.5753, "step": 19440 }, { "epoch": 0.39386334196891193, "grad_norm": 0.24615484476089478, "learning_rate": 6.061568976683938e-05, "loss": 1.5925, "step": 19460 }, { "epoch": 0.3942681347150259, "grad_norm": 0.24118971824645996, "learning_rate": 6.0575210492227986e-05, "loss": 1.5835, "step": 19480 }, { "epoch": 0.3946729274611399, "grad_norm": 0.2553645968437195, "learning_rate": 6.053473121761658e-05, "loss": 1.5778, "step": 19500 }, { "epoch": 0.3950777202072539, "grad_norm": 0.246222585439682, "learning_rate": 6.049425194300519e-05, "loss": 1.582, "step": 19520 }, { "epoch": 0.3954825129533679, "grad_norm": 0.2432546317577362, "learning_rate": 6.0453772668393784e-05, "loss": 1.5754, "step": 19540 }, { "epoch": 0.3958873056994819, "grad_norm": 0.2417793869972229, "learning_rate": 6.0413293393782386e-05, "loss": 1.5821, "step": 19560 }, { "epoch": 0.3962920984455959, "grad_norm": 0.27505579590797424, "learning_rate": 6.037281411917099e-05, "loss": 1.5773, "step": 19580 }, { "epoch": 0.39669689119170987, "grad_norm": 0.2470622956752777, "learning_rate": 6.033233484455959e-05, "loss": 1.5808, "step": 19600 }, { "epoch": 0.39710168393782386, "grad_norm": 0.2491145133972168, "learning_rate": 6.0291855569948184e-05, "loss": 1.5768, "step": 19620 }, { "epoch": 0.39750647668393785, "grad_norm": 0.25042808055877686, "learning_rate": 6.025137629533679e-05, "loss": 1.5821, "step": 19640 }, { "epoch": 0.39791126943005184, "grad_norm": 0.25375276803970337, "learning_rate": 6.021089702072539e-05, "loss": 1.5769, "step": 19660 }, { "epoch": 0.39831606217616583, "grad_norm": 0.25480642914772034, "learning_rate": 6.0170417746113995e-05, "loss": 1.5826, "step": 19680 }, { "epoch": 0.39872085492227977, "grad_norm": 0.25960665941238403, "learning_rate": 6.012993847150259e-05, "loss": 1.5901, "step": 19700 }, { "epoch": 0.39912564766839376, "grad_norm": 0.24707049131393433, "learning_rate": 6.00894591968912e-05, "loss": 1.5787, "step": 19720 }, { "epoch": 0.39953044041450775, "grad_norm": 0.2617241144180298, "learning_rate": 6.004897992227979e-05, "loss": 1.5821, "step": 19740 }, { "epoch": 0.39993523316062174, "grad_norm": 0.24066516757011414, "learning_rate": 6.0008500647668395e-05, "loss": 1.577, "step": 19760 }, { "epoch": 0.40034002590673573, "grad_norm": 0.2523469030857086, "learning_rate": 5.996802137305699e-05, "loss": 1.5865, "step": 19780 }, { "epoch": 0.4007448186528497, "grad_norm": 0.24109965562820435, "learning_rate": 5.99275420984456e-05, "loss": 1.5863, "step": 19800 }, { "epoch": 0.4011496113989637, "grad_norm": 0.2575005888938904, "learning_rate": 5.988706282383419e-05, "loss": 1.5862, "step": 19820 }, { "epoch": 0.4015544041450777, "grad_norm": 0.24747398495674133, "learning_rate": 5.98465835492228e-05, "loss": 1.5776, "step": 19840 }, { "epoch": 0.4019591968911917, "grad_norm": 0.243538960814476, "learning_rate": 5.9806104274611396e-05, "loss": 1.5739, "step": 19860 }, { "epoch": 0.4023639896373057, "grad_norm": 0.24632398784160614, "learning_rate": 5.9765625000000004e-05, "loss": 1.5715, "step": 19880 }, { "epoch": 0.4027687823834197, "grad_norm": 0.2460109144449234, "learning_rate": 5.972514572538861e-05, "loss": 1.5753, "step": 19900 }, { "epoch": 0.40317357512953367, "grad_norm": 0.242745041847229, "learning_rate": 5.968466645077721e-05, "loss": 1.5763, "step": 19920 }, { "epoch": 0.40357836787564766, "grad_norm": 0.2384742647409439, "learning_rate": 5.964418717616581e-05, "loss": 1.5801, "step": 19940 }, { "epoch": 0.40398316062176165, "grad_norm": 0.26010578870773315, "learning_rate": 5.9603707901554404e-05, "loss": 1.586, "step": 19960 }, { "epoch": 0.40438795336787564, "grad_norm": 0.2514192759990692, "learning_rate": 5.956322862694301e-05, "loss": 1.5864, "step": 19980 }, { "epoch": 0.40479274611398963, "grad_norm": 0.251191109418869, "learning_rate": 5.952274935233161e-05, "loss": 1.5858, "step": 20000 }, { "epoch": 0.4051975388601036, "grad_norm": 0.2459646612405777, "learning_rate": 5.9482270077720215e-05, "loss": 1.5862, "step": 20020 }, { "epoch": 0.4056023316062176, "grad_norm": 0.24906879663467407, "learning_rate": 5.944179080310881e-05, "loss": 1.5773, "step": 20040 }, { "epoch": 0.4060071243523316, "grad_norm": 0.25234705209732056, "learning_rate": 5.940131152849742e-05, "loss": 1.585, "step": 20060 }, { "epoch": 0.4064119170984456, "grad_norm": 0.2533915936946869, "learning_rate": 5.9360832253886013e-05, "loss": 1.5782, "step": 20080 }, { "epoch": 0.4068167098445596, "grad_norm": 0.2521636188030243, "learning_rate": 5.9320352979274615e-05, "loss": 1.5798, "step": 20100 }, { "epoch": 0.4072215025906736, "grad_norm": 0.2462073415517807, "learning_rate": 5.927987370466322e-05, "loss": 1.5781, "step": 20120 }, { "epoch": 0.40762629533678757, "grad_norm": 0.23441433906555176, "learning_rate": 5.923939443005182e-05, "loss": 1.5823, "step": 20140 }, { "epoch": 0.40803108808290156, "grad_norm": 0.2570268213748932, "learning_rate": 5.919891515544041e-05, "loss": 1.585, "step": 20160 }, { "epoch": 0.40843588082901555, "grad_norm": 0.23781001567840576, "learning_rate": 5.915843588082902e-05, "loss": 1.5822, "step": 20180 }, { "epoch": 0.40884067357512954, "grad_norm": 0.24744266271591187, "learning_rate": 5.9117956606217616e-05, "loss": 1.5829, "step": 20200 }, { "epoch": 0.40924546632124353, "grad_norm": 0.2499535232782364, "learning_rate": 5.9077477331606225e-05, "loss": 1.5836, "step": 20220 }, { "epoch": 0.4096502590673575, "grad_norm": 0.24637281894683838, "learning_rate": 5.903699805699482e-05, "loss": 1.5832, "step": 20240 }, { "epoch": 0.4100550518134715, "grad_norm": 0.26862719655036926, "learning_rate": 5.899651878238343e-05, "loss": 1.5849, "step": 20260 }, { "epoch": 0.4104598445595855, "grad_norm": 0.25509053468704224, "learning_rate": 5.895603950777202e-05, "loss": 1.589, "step": 20280 }, { "epoch": 0.4108646373056995, "grad_norm": 0.2569196820259094, "learning_rate": 5.8915560233160624e-05, "loss": 1.5843, "step": 20300 }, { "epoch": 0.4112694300518135, "grad_norm": 0.24363401532173157, "learning_rate": 5.887508095854922e-05, "loss": 1.5799, "step": 20320 }, { "epoch": 0.4116742227979275, "grad_norm": 0.2461979240179062, "learning_rate": 5.883460168393783e-05, "loss": 1.5804, "step": 20340 }, { "epoch": 0.41207901554404147, "grad_norm": 0.2458895444869995, "learning_rate": 5.879412240932642e-05, "loss": 1.5785, "step": 20360 }, { "epoch": 0.41248380829015546, "grad_norm": 0.2490442544221878, "learning_rate": 5.875364313471503e-05, "loss": 1.5825, "step": 20380 }, { "epoch": 0.41288860103626945, "grad_norm": 0.2461615949869156, "learning_rate": 5.8713163860103625e-05, "loss": 1.5763, "step": 20400 }, { "epoch": 0.41329339378238344, "grad_norm": 0.24871477484703064, "learning_rate": 5.8672684585492234e-05, "loss": 1.5823, "step": 20420 }, { "epoch": 0.41369818652849744, "grad_norm": 0.24982692301273346, "learning_rate": 5.863220531088083e-05, "loss": 1.5797, "step": 20440 }, { "epoch": 0.41410297927461137, "grad_norm": 0.2501813769340515, "learning_rate": 5.859172603626944e-05, "loss": 1.5769, "step": 20460 }, { "epoch": 0.41450777202072536, "grad_norm": 0.2521221339702606, "learning_rate": 5.855124676165803e-05, "loss": 1.5864, "step": 20480 }, { "epoch": 0.41491256476683935, "grad_norm": 0.23976652324199677, "learning_rate": 5.8510767487046633e-05, "loss": 1.588, "step": 20500 }, { "epoch": 0.41531735751295334, "grad_norm": 0.2517457902431488, "learning_rate": 5.847028821243523e-05, "loss": 1.5843, "step": 20520 }, { "epoch": 0.41572215025906734, "grad_norm": 0.24530543386936188, "learning_rate": 5.842980893782384e-05, "loss": 1.577, "step": 20540 }, { "epoch": 0.4161269430051813, "grad_norm": 0.24865570664405823, "learning_rate": 5.838932966321243e-05, "loss": 1.5812, "step": 20560 }, { "epoch": 0.4165317357512953, "grad_norm": 0.2549337148666382, "learning_rate": 5.834885038860104e-05, "loss": 1.5744, "step": 20580 }, { "epoch": 0.4169365284974093, "grad_norm": 0.25526952743530273, "learning_rate": 5.8308371113989635e-05, "loss": 1.58, "step": 20600 }, { "epoch": 0.4173413212435233, "grad_norm": 0.2334432154893875, "learning_rate": 5.826789183937824e-05, "loss": 1.5838, "step": 20620 }, { "epoch": 0.4177461139896373, "grad_norm": 0.24034000933170319, "learning_rate": 5.822741256476684e-05, "loss": 1.5796, "step": 20640 }, { "epoch": 0.4181509067357513, "grad_norm": 0.2580803334712982, "learning_rate": 5.8186933290155446e-05, "loss": 1.5883, "step": 20660 }, { "epoch": 0.4185556994818653, "grad_norm": 0.23873770236968994, "learning_rate": 5.814645401554405e-05, "loss": 1.5761, "step": 20680 }, { "epoch": 0.41896049222797926, "grad_norm": 0.24756355583667755, "learning_rate": 5.810597474093264e-05, "loss": 1.577, "step": 20700 }, { "epoch": 0.41936528497409326, "grad_norm": 0.2489444464445114, "learning_rate": 5.806549546632125e-05, "loss": 1.5833, "step": 20720 }, { "epoch": 0.41977007772020725, "grad_norm": 0.24590247869491577, "learning_rate": 5.8025016191709846e-05, "loss": 1.576, "step": 20740 }, { "epoch": 0.42017487046632124, "grad_norm": 0.2543652653694153, "learning_rate": 5.7984536917098454e-05, "loss": 1.5865, "step": 20760 }, { "epoch": 0.42057966321243523, "grad_norm": 0.24202346801757812, "learning_rate": 5.794405764248705e-05, "loss": 1.5829, "step": 20780 }, { "epoch": 0.4209844559585492, "grad_norm": 0.24817276000976562, "learning_rate": 5.790357836787566e-05, "loss": 1.5743, "step": 20800 }, { "epoch": 0.4213892487046632, "grad_norm": 0.24931757152080536, "learning_rate": 5.786309909326425e-05, "loss": 1.5771, "step": 20820 }, { "epoch": 0.4217940414507772, "grad_norm": 0.251913845539093, "learning_rate": 5.7822619818652854e-05, "loss": 1.5839, "step": 20840 }, { "epoch": 0.4221988341968912, "grad_norm": 0.25857794284820557, "learning_rate": 5.778214054404145e-05, "loss": 1.5835, "step": 20860 }, { "epoch": 0.4226036269430052, "grad_norm": 0.2661075294017792, "learning_rate": 5.774166126943006e-05, "loss": 1.5869, "step": 20880 }, { "epoch": 0.4230084196891192, "grad_norm": 0.2429809719324112, "learning_rate": 5.770118199481865e-05, "loss": 1.5712, "step": 20900 }, { "epoch": 0.42341321243523317, "grad_norm": 0.26113590598106384, "learning_rate": 5.766070272020726e-05, "loss": 1.5806, "step": 20920 }, { "epoch": 0.42381800518134716, "grad_norm": 0.24728338420391083, "learning_rate": 5.7620223445595855e-05, "loss": 1.5877, "step": 20940 }, { "epoch": 0.42422279792746115, "grad_norm": 0.24681703746318817, "learning_rate": 5.7579744170984463e-05, "loss": 1.5713, "step": 20960 }, { "epoch": 0.42462759067357514, "grad_norm": 0.24867713451385498, "learning_rate": 5.753926489637306e-05, "loss": 1.5851, "step": 20980 }, { "epoch": 0.42503238341968913, "grad_norm": 0.24595098197460175, "learning_rate": 5.749878562176167e-05, "loss": 1.584, "step": 21000 }, { "epoch": 0.4254371761658031, "grad_norm": 0.2380772978067398, "learning_rate": 5.745830634715026e-05, "loss": 1.5791, "step": 21020 }, { "epoch": 0.4258419689119171, "grad_norm": 0.2467007339000702, "learning_rate": 5.741782707253886e-05, "loss": 1.5764, "step": 21040 }, { "epoch": 0.4262467616580311, "grad_norm": 0.2519536316394806, "learning_rate": 5.737734779792746e-05, "loss": 1.5748, "step": 21060 }, { "epoch": 0.4266515544041451, "grad_norm": 0.25651559233665466, "learning_rate": 5.7336868523316066e-05, "loss": 1.5766, "step": 21080 }, { "epoch": 0.4270563471502591, "grad_norm": 0.2506333887577057, "learning_rate": 5.729638924870466e-05, "loss": 1.5794, "step": 21100 }, { "epoch": 0.4274611398963731, "grad_norm": 0.24100102484226227, "learning_rate": 5.725590997409327e-05, "loss": 1.5853, "step": 21120 }, { "epoch": 0.42786593264248707, "grad_norm": 0.24229608476161957, "learning_rate": 5.7215430699481864e-05, "loss": 1.5712, "step": 21140 }, { "epoch": 0.42827072538860106, "grad_norm": 0.25491997599601746, "learning_rate": 5.717495142487047e-05, "loss": 1.5818, "step": 21160 }, { "epoch": 0.42867551813471505, "grad_norm": 0.24251653254032135, "learning_rate": 5.713447215025907e-05, "loss": 1.5769, "step": 21180 }, { "epoch": 0.42908031088082904, "grad_norm": 0.24870935082435608, "learning_rate": 5.7093992875647676e-05, "loss": 1.5766, "step": 21200 }, { "epoch": 0.42948510362694303, "grad_norm": 0.24288956820964813, "learning_rate": 5.705351360103627e-05, "loss": 1.5753, "step": 21220 }, { "epoch": 0.42988989637305697, "grad_norm": 0.2404078096151352, "learning_rate": 5.701303432642487e-05, "loss": 1.5879, "step": 21240 }, { "epoch": 0.43029468911917096, "grad_norm": 0.2513575851917267, "learning_rate": 5.697255505181347e-05, "loss": 1.5785, "step": 21260 }, { "epoch": 0.43069948186528495, "grad_norm": 0.24376849830150604, "learning_rate": 5.6932075777202075e-05, "loss": 1.5889, "step": 21280 }, { "epoch": 0.43110427461139894, "grad_norm": 0.24590036273002625, "learning_rate": 5.689159650259067e-05, "loss": 1.5772, "step": 21300 }, { "epoch": 0.43150906735751293, "grad_norm": 0.2622210681438446, "learning_rate": 5.685111722797928e-05, "loss": 1.5832, "step": 21320 }, { "epoch": 0.4319138601036269, "grad_norm": 0.2427460253238678, "learning_rate": 5.6810637953367873e-05, "loss": 1.5786, "step": 21340 }, { "epoch": 0.4323186528497409, "grad_norm": 0.2481246292591095, "learning_rate": 5.677015867875648e-05, "loss": 1.5773, "step": 21360 }, { "epoch": 0.4327234455958549, "grad_norm": 0.2460995465517044, "learning_rate": 5.672967940414508e-05, "loss": 1.5808, "step": 21380 }, { "epoch": 0.4331282383419689, "grad_norm": 0.24575719237327576, "learning_rate": 5.6689200129533685e-05, "loss": 1.5739, "step": 21400 }, { "epoch": 0.4335330310880829, "grad_norm": 0.2622489631175995, "learning_rate": 5.664872085492228e-05, "loss": 1.5758, "step": 21420 }, { "epoch": 0.4339378238341969, "grad_norm": 0.24803383648395538, "learning_rate": 5.660824158031088e-05, "loss": 1.5744, "step": 21440 }, { "epoch": 0.43434261658031087, "grad_norm": 0.2456912398338318, "learning_rate": 5.656776230569949e-05, "loss": 1.5937, "step": 21460 }, { "epoch": 0.43474740932642486, "grad_norm": 0.24504289031028748, "learning_rate": 5.6527283031088085e-05, "loss": 1.5737, "step": 21480 }, { "epoch": 0.43515220207253885, "grad_norm": 0.24220041930675507, "learning_rate": 5.648680375647669e-05, "loss": 1.5776, "step": 21500 }, { "epoch": 0.43555699481865284, "grad_norm": 0.24564076960086823, "learning_rate": 5.644632448186529e-05, "loss": 1.5819, "step": 21520 }, { "epoch": 0.43596178756476683, "grad_norm": 0.2491656094789505, "learning_rate": 5.6405845207253896e-05, "loss": 1.5823, "step": 21540 }, { "epoch": 0.4363665803108808, "grad_norm": 0.25434157252311707, "learning_rate": 5.636536593264249e-05, "loss": 1.5747, "step": 21560 }, { "epoch": 0.4367713730569948, "grad_norm": 0.24141888320446014, "learning_rate": 5.632488665803109e-05, "loss": 1.5832, "step": 21580 }, { "epoch": 0.4371761658031088, "grad_norm": 0.24144545197486877, "learning_rate": 5.628440738341969e-05, "loss": 1.5825, "step": 21600 }, { "epoch": 0.4375809585492228, "grad_norm": 0.23808708786964417, "learning_rate": 5.6243928108808296e-05, "loss": 1.5776, "step": 21620 }, { "epoch": 0.4379857512953368, "grad_norm": 0.24379181861877441, "learning_rate": 5.620344883419689e-05, "loss": 1.5768, "step": 21640 }, { "epoch": 0.4383905440414508, "grad_norm": 0.25400081276893616, "learning_rate": 5.61629695595855e-05, "loss": 1.5814, "step": 21660 }, { "epoch": 0.43879533678756477, "grad_norm": 0.258651465177536, "learning_rate": 5.6122490284974094e-05, "loss": 1.5776, "step": 21680 }, { "epoch": 0.43920012953367876, "grad_norm": 0.2372705042362213, "learning_rate": 5.60820110103627e-05, "loss": 1.5748, "step": 21700 }, { "epoch": 0.43960492227979275, "grad_norm": 0.24959991872310638, "learning_rate": 5.60415317357513e-05, "loss": 1.5776, "step": 21720 }, { "epoch": 0.44000971502590674, "grad_norm": 0.2398136705160141, "learning_rate": 5.6001052461139905e-05, "loss": 1.5784, "step": 21740 }, { "epoch": 0.44041450777202074, "grad_norm": 0.2493603229522705, "learning_rate": 5.59605731865285e-05, "loss": 1.5823, "step": 21760 }, { "epoch": 0.4408193005181347, "grad_norm": 0.25303173065185547, "learning_rate": 5.59200939119171e-05, "loss": 1.5814, "step": 21780 }, { "epoch": 0.4412240932642487, "grad_norm": 0.2565922439098358, "learning_rate": 5.58796146373057e-05, "loss": 1.5775, "step": 21800 }, { "epoch": 0.4416288860103627, "grad_norm": 0.2370874583721161, "learning_rate": 5.5839135362694305e-05, "loss": 1.5743, "step": 21820 }, { "epoch": 0.4420336787564767, "grad_norm": 0.25969821214675903, "learning_rate": 5.57986560880829e-05, "loss": 1.5768, "step": 21840 }, { "epoch": 0.4424384715025907, "grad_norm": 0.24763131141662598, "learning_rate": 5.575817681347151e-05, "loss": 1.5853, "step": 21860 }, { "epoch": 0.4428432642487047, "grad_norm": 0.23896072804927826, "learning_rate": 5.57176975388601e-05, "loss": 1.5787, "step": 21880 }, { "epoch": 0.4432480569948187, "grad_norm": 0.25860148668289185, "learning_rate": 5.567721826424871e-05, "loss": 1.583, "step": 21900 }, { "epoch": 0.44365284974093266, "grad_norm": 0.2405397891998291, "learning_rate": 5.5636738989637306e-05, "loss": 1.5774, "step": 21920 }, { "epoch": 0.44405764248704666, "grad_norm": 0.2514347434043884, "learning_rate": 5.5596259715025915e-05, "loss": 1.5753, "step": 21940 }, { "epoch": 0.44446243523316065, "grad_norm": 0.25299617648124695, "learning_rate": 5.555578044041451e-05, "loss": 1.5746, "step": 21960 }, { "epoch": 0.44486722797927464, "grad_norm": 0.2502700984477997, "learning_rate": 5.551530116580311e-05, "loss": 1.5783, "step": 21980 }, { "epoch": 0.44527202072538863, "grad_norm": 0.23508520424365997, "learning_rate": 5.5474821891191706e-05, "loss": 1.5796, "step": 22000 }, { "epoch": 0.44567681347150256, "grad_norm": 0.2410711944103241, "learning_rate": 5.5434342616580314e-05, "loss": 1.5758, "step": 22020 }, { "epoch": 0.44608160621761656, "grad_norm": 0.2589190900325775, "learning_rate": 5.539386334196891e-05, "loss": 1.5727, "step": 22040 }, { "epoch": 0.44648639896373055, "grad_norm": 0.26362332701683044, "learning_rate": 5.535338406735752e-05, "loss": 1.5847, "step": 22060 }, { "epoch": 0.44689119170984454, "grad_norm": 0.24742160737514496, "learning_rate": 5.531290479274611e-05, "loss": 1.5741, "step": 22080 }, { "epoch": 0.44729598445595853, "grad_norm": 0.2454523742198944, "learning_rate": 5.527242551813472e-05, "loss": 1.5804, "step": 22100 }, { "epoch": 0.4477007772020725, "grad_norm": 0.2404516637325287, "learning_rate": 5.5231946243523315e-05, "loss": 1.5793, "step": 22120 }, { "epoch": 0.4481055699481865, "grad_norm": 0.24256962537765503, "learning_rate": 5.519146696891192e-05, "loss": 1.5754, "step": 22140 }, { "epoch": 0.4485103626943005, "grad_norm": 0.23618970811367035, "learning_rate": 5.515098769430052e-05, "loss": 1.5725, "step": 22160 }, { "epoch": 0.4489151554404145, "grad_norm": 0.23993033170700073, "learning_rate": 5.511050841968912e-05, "loss": 1.576, "step": 22180 }, { "epoch": 0.4493199481865285, "grad_norm": 0.25129324197769165, "learning_rate": 5.507002914507773e-05, "loss": 1.5878, "step": 22200 }, { "epoch": 0.4497247409326425, "grad_norm": 0.24356697499752045, "learning_rate": 5.5029549870466323e-05, "loss": 1.5784, "step": 22220 }, { "epoch": 0.45012953367875647, "grad_norm": 0.24752739071846008, "learning_rate": 5.498907059585493e-05, "loss": 1.5841, "step": 22240 }, { "epoch": 0.45053432642487046, "grad_norm": 0.24744142591953278, "learning_rate": 5.4948591321243527e-05, "loss": 1.5799, "step": 22260 }, { "epoch": 0.45093911917098445, "grad_norm": 0.26096445322036743, "learning_rate": 5.4908112046632135e-05, "loss": 1.5769, "step": 22280 }, { "epoch": 0.45134391191709844, "grad_norm": 0.24349306523799896, "learning_rate": 5.486763277202073e-05, "loss": 1.5803, "step": 22300 }, { "epoch": 0.45174870466321243, "grad_norm": 0.2466139793395996, "learning_rate": 5.482715349740933e-05, "loss": 1.5712, "step": 22320 }, { "epoch": 0.4521534974093264, "grad_norm": 0.24382758140563965, "learning_rate": 5.4786674222797926e-05, "loss": 1.5762, "step": 22340 }, { "epoch": 0.4525582901554404, "grad_norm": 0.25856950879096985, "learning_rate": 5.4746194948186535e-05, "loss": 1.5824, "step": 22360 }, { "epoch": 0.4529630829015544, "grad_norm": 0.2511434853076935, "learning_rate": 5.470571567357513e-05, "loss": 1.5678, "step": 22380 }, { "epoch": 0.4533678756476684, "grad_norm": 0.2630114257335663, "learning_rate": 5.466523639896374e-05, "loss": 1.5843, "step": 22400 }, { "epoch": 0.4537726683937824, "grad_norm": 0.24505408108234406, "learning_rate": 5.462475712435233e-05, "loss": 1.577, "step": 22420 }, { "epoch": 0.4541774611398964, "grad_norm": 0.25258609652519226, "learning_rate": 5.458427784974094e-05, "loss": 1.5817, "step": 22440 }, { "epoch": 0.45458225388601037, "grad_norm": 0.24487236142158508, "learning_rate": 5.4543798575129536e-05, "loss": 1.5723, "step": 22460 }, { "epoch": 0.45498704663212436, "grad_norm": 0.250759482383728, "learning_rate": 5.4503319300518144e-05, "loss": 1.5754, "step": 22480 }, { "epoch": 0.45539183937823835, "grad_norm": 0.24549604952335358, "learning_rate": 5.446284002590674e-05, "loss": 1.5866, "step": 22500 }, { "epoch": 0.45579663212435234, "grad_norm": 0.23796741664409637, "learning_rate": 5.442236075129534e-05, "loss": 1.5774, "step": 22520 }, { "epoch": 0.45620142487046633, "grad_norm": 0.2658981382846832, "learning_rate": 5.4381881476683935e-05, "loss": 1.5713, "step": 22540 }, { "epoch": 0.4566062176165803, "grad_norm": 0.24504336714744568, "learning_rate": 5.4341402202072544e-05, "loss": 1.5842, "step": 22560 }, { "epoch": 0.4570110103626943, "grad_norm": 0.24717310070991516, "learning_rate": 5.430092292746114e-05, "loss": 1.5821, "step": 22580 }, { "epoch": 0.4574158031088083, "grad_norm": 0.23919735848903656, "learning_rate": 5.426044365284975e-05, "loss": 1.5764, "step": 22600 }, { "epoch": 0.4578205958549223, "grad_norm": 0.236130490899086, "learning_rate": 5.421996437823834e-05, "loss": 1.5837, "step": 22620 }, { "epoch": 0.4582253886010363, "grad_norm": 0.25422632694244385, "learning_rate": 5.417948510362695e-05, "loss": 1.5771, "step": 22640 }, { "epoch": 0.4586301813471503, "grad_norm": 0.25385910272598267, "learning_rate": 5.4139005829015545e-05, "loss": 1.5739, "step": 22660 }, { "epoch": 0.45903497409326427, "grad_norm": 0.24151968955993652, "learning_rate": 5.4098526554404147e-05, "loss": 1.5682, "step": 22680 }, { "epoch": 0.45943976683937826, "grad_norm": 0.24316099286079407, "learning_rate": 5.405804727979275e-05, "loss": 1.5841, "step": 22700 }, { "epoch": 0.45984455958549225, "grad_norm": 0.2554795742034912, "learning_rate": 5.401756800518135e-05, "loss": 1.5777, "step": 22720 }, { "epoch": 0.46024935233160624, "grad_norm": 0.24739374220371246, "learning_rate": 5.3977088730569945e-05, "loss": 1.576, "step": 22740 }, { "epoch": 0.46065414507772023, "grad_norm": 0.24370045959949493, "learning_rate": 5.393660945595855e-05, "loss": 1.5714, "step": 22760 }, { "epoch": 0.46105893782383417, "grad_norm": 0.23917914927005768, "learning_rate": 5.389613018134715e-05, "loss": 1.5839, "step": 22780 }, { "epoch": 0.46146373056994816, "grad_norm": 0.2589251399040222, "learning_rate": 5.3855650906735756e-05, "loss": 1.5793, "step": 22800 }, { "epoch": 0.46186852331606215, "grad_norm": 0.24770036339759827, "learning_rate": 5.381517163212435e-05, "loss": 1.5759, "step": 22820 }, { "epoch": 0.46227331606217614, "grad_norm": 0.2605452835559845, "learning_rate": 5.377469235751296e-05, "loss": 1.5801, "step": 22840 }, { "epoch": 0.46267810880829013, "grad_norm": 0.2473832219839096, "learning_rate": 5.3734213082901554e-05, "loss": 1.5769, "step": 22860 }, { "epoch": 0.4630829015544041, "grad_norm": 0.241891548037529, "learning_rate": 5.3693733808290156e-05, "loss": 1.5788, "step": 22880 }, { "epoch": 0.4634876943005181, "grad_norm": 0.23434248566627502, "learning_rate": 5.365325453367875e-05, "loss": 1.5867, "step": 22900 }, { "epoch": 0.4638924870466321, "grad_norm": 0.24268528819084167, "learning_rate": 5.361277525906736e-05, "loss": 1.5812, "step": 22920 }, { "epoch": 0.4642972797927461, "grad_norm": 0.24057017266750336, "learning_rate": 5.3572295984455954e-05, "loss": 1.5797, "step": 22940 }, { "epoch": 0.4647020725388601, "grad_norm": 0.25855252146720886, "learning_rate": 5.353181670984456e-05, "loss": 1.5786, "step": 22960 }, { "epoch": 0.4651068652849741, "grad_norm": 0.23885799944400787, "learning_rate": 5.349133743523317e-05, "loss": 1.5714, "step": 22980 }, { "epoch": 0.46551165803108807, "grad_norm": 0.24406635761260986, "learning_rate": 5.3450858160621765e-05, "loss": 1.5802, "step": 23000 }, { "epoch": 0.46591645077720206, "grad_norm": 0.25215572118759155, "learning_rate": 5.3410378886010374e-05, "loss": 1.5733, "step": 23020 }, { "epoch": 0.46632124352331605, "grad_norm": 0.24372753500938416, "learning_rate": 5.336989961139897e-05, "loss": 1.585, "step": 23040 }, { "epoch": 0.46672603626943004, "grad_norm": 0.25348028540611267, "learning_rate": 5.332942033678757e-05, "loss": 1.583, "step": 23060 }, { "epoch": 0.46713082901554404, "grad_norm": 0.2472178339958191, "learning_rate": 5.3288941062176165e-05, "loss": 1.5832, "step": 23080 }, { "epoch": 0.467535621761658, "grad_norm": 0.24889615178108215, "learning_rate": 5.324846178756477e-05, "loss": 1.5769, "step": 23100 }, { "epoch": 0.467940414507772, "grad_norm": 0.2583886981010437, "learning_rate": 5.320798251295337e-05, "loss": 1.5806, "step": 23120 }, { "epoch": 0.468345207253886, "grad_norm": 0.25065723061561584, "learning_rate": 5.3167503238341977e-05, "loss": 1.5782, "step": 23140 }, { "epoch": 0.46875, "grad_norm": 0.25068336725234985, "learning_rate": 5.312702396373057e-05, "loss": 1.5777, "step": 23160 }, { "epoch": 0.469154792746114, "grad_norm": 0.24238821864128113, "learning_rate": 5.308654468911918e-05, "loss": 1.5719, "step": 23180 }, { "epoch": 0.469559585492228, "grad_norm": 0.25946658849716187, "learning_rate": 5.3046065414507775e-05, "loss": 1.5827, "step": 23200 }, { "epoch": 0.469964378238342, "grad_norm": 0.25044023990631104, "learning_rate": 5.3005586139896376e-05, "loss": 1.5852, "step": 23220 }, { "epoch": 0.47036917098445596, "grad_norm": 0.24602341651916504, "learning_rate": 5.296510686528498e-05, "loss": 1.592, "step": 23240 }, { "epoch": 0.47077396373056996, "grad_norm": 0.24163663387298584, "learning_rate": 5.292462759067358e-05, "loss": 1.5743, "step": 23260 }, { "epoch": 0.47117875647668395, "grad_norm": 0.2581930160522461, "learning_rate": 5.2884148316062174e-05, "loss": 1.5836, "step": 23280 }, { "epoch": 0.47158354922279794, "grad_norm": 0.2380174696445465, "learning_rate": 5.284366904145078e-05, "loss": 1.5842, "step": 23300 }, { "epoch": 0.47198834196891193, "grad_norm": 0.24762296676635742, "learning_rate": 5.280318976683938e-05, "loss": 1.5822, "step": 23320 }, { "epoch": 0.4723931347150259, "grad_norm": 0.24455010890960693, "learning_rate": 5.2762710492227986e-05, "loss": 1.5744, "step": 23340 }, { "epoch": 0.4727979274611399, "grad_norm": 0.25161269307136536, "learning_rate": 5.272223121761658e-05, "loss": 1.5854, "step": 23360 }, { "epoch": 0.4732027202072539, "grad_norm": 0.2503201365470886, "learning_rate": 5.268175194300519e-05, "loss": 1.5788, "step": 23380 }, { "epoch": 0.4736075129533679, "grad_norm": 0.2520042955875397, "learning_rate": 5.2641272668393784e-05, "loss": 1.5801, "step": 23400 }, { "epoch": 0.4740123056994819, "grad_norm": 0.27100247144699097, "learning_rate": 5.2600793393782385e-05, "loss": 1.5741, "step": 23420 }, { "epoch": 0.4744170984455959, "grad_norm": 0.246733158826828, "learning_rate": 5.256031411917099e-05, "loss": 1.5814, "step": 23440 }, { "epoch": 0.47482189119170987, "grad_norm": 0.24831706285476685, "learning_rate": 5.251983484455959e-05, "loss": 1.5743, "step": 23460 }, { "epoch": 0.47522668393782386, "grad_norm": 0.2432733029127121, "learning_rate": 5.247935556994818e-05, "loss": 1.5747, "step": 23480 }, { "epoch": 0.47563147668393785, "grad_norm": 0.24692484736442566, "learning_rate": 5.243887629533679e-05, "loss": 1.573, "step": 23500 }, { "epoch": 0.47603626943005184, "grad_norm": 0.2464066594839096, "learning_rate": 5.2398397020725387e-05, "loss": 1.5733, "step": 23520 }, { "epoch": 0.47644106217616583, "grad_norm": 0.2494385987520218, "learning_rate": 5.2357917746113995e-05, "loss": 1.5632, "step": 23540 }, { "epoch": 0.47684585492227977, "grad_norm": 0.2580525875091553, "learning_rate": 5.231743847150259e-05, "loss": 1.5775, "step": 23560 }, { "epoch": 0.47725064766839376, "grad_norm": 0.25116607546806335, "learning_rate": 5.22769591968912e-05, "loss": 1.5795, "step": 23580 }, { "epoch": 0.47765544041450775, "grad_norm": 0.24967427551746368, "learning_rate": 5.223647992227979e-05, "loss": 1.5858, "step": 23600 }, { "epoch": 0.47806023316062174, "grad_norm": 0.2454247623682022, "learning_rate": 5.2196000647668395e-05, "loss": 1.575, "step": 23620 }, { "epoch": 0.47846502590673573, "grad_norm": 0.2419043928384781, "learning_rate": 5.215552137305699e-05, "loss": 1.5847, "step": 23640 }, { "epoch": 0.4788698186528497, "grad_norm": 0.2583029568195343, "learning_rate": 5.21150420984456e-05, "loss": 1.5753, "step": 23660 }, { "epoch": 0.4792746113989637, "grad_norm": 0.24209900200366974, "learning_rate": 5.207456282383419e-05, "loss": 1.5902, "step": 23680 }, { "epoch": 0.4796794041450777, "grad_norm": 0.24333138763904572, "learning_rate": 5.20340835492228e-05, "loss": 1.5853, "step": 23700 }, { "epoch": 0.4800841968911917, "grad_norm": 0.2539040446281433, "learning_rate": 5.1993604274611396e-05, "loss": 1.5781, "step": 23720 }, { "epoch": 0.4804889896373057, "grad_norm": 0.24744194746017456, "learning_rate": 5.1953125000000004e-05, "loss": 1.582, "step": 23740 }, { "epoch": 0.4808937823834197, "grad_norm": 0.2569595277309418, "learning_rate": 5.1912645725388606e-05, "loss": 1.5853, "step": 23760 }, { "epoch": 0.48129857512953367, "grad_norm": 0.24229907989501953, "learning_rate": 5.187216645077721e-05, "loss": 1.5715, "step": 23780 }, { "epoch": 0.48170336787564766, "grad_norm": 0.2564829885959625, "learning_rate": 5.183168717616581e-05, "loss": 1.5801, "step": 23800 }, { "epoch": 0.48210816062176165, "grad_norm": 0.25123292207717896, "learning_rate": 5.1791207901554404e-05, "loss": 1.5748, "step": 23820 }, { "epoch": 0.48251295336787564, "grad_norm": 0.25094032287597656, "learning_rate": 5.175072862694301e-05, "loss": 1.5817, "step": 23840 }, { "epoch": 0.48291774611398963, "grad_norm": 0.24289348721504211, "learning_rate": 5.171024935233161e-05, "loss": 1.5787, "step": 23860 }, { "epoch": 0.4833225388601036, "grad_norm": 0.25674450397491455, "learning_rate": 5.1669770077720215e-05, "loss": 1.5787, "step": 23880 }, { "epoch": 0.4837273316062176, "grad_norm": 0.24390515685081482, "learning_rate": 5.162929080310881e-05, "loss": 1.5835, "step": 23900 }, { "epoch": 0.4841321243523316, "grad_norm": 0.25798797607421875, "learning_rate": 5.158881152849742e-05, "loss": 1.5684, "step": 23920 }, { "epoch": 0.4845369170984456, "grad_norm": 0.24052247405052185, "learning_rate": 5.154833225388601e-05, "loss": 1.5862, "step": 23940 }, { "epoch": 0.4849417098445596, "grad_norm": 0.24717338383197784, "learning_rate": 5.1507852979274615e-05, "loss": 1.5833, "step": 23960 }, { "epoch": 0.4853465025906736, "grad_norm": 0.2509436309337616, "learning_rate": 5.1467373704663217e-05, "loss": 1.583, "step": 23980 }, { "epoch": 0.48575129533678757, "grad_norm": 0.38470444083213806, "learning_rate": 5.142689443005182e-05, "loss": 1.573, "step": 24000 }, { "epoch": 0.48615608808290156, "grad_norm": 0.2504602372646332, "learning_rate": 5.138641515544041e-05, "loss": 1.5783, "step": 24020 }, { "epoch": 0.48656088082901555, "grad_norm": 0.24124808609485626, "learning_rate": 5.134593588082902e-05, "loss": 1.5808, "step": 24040 }, { "epoch": 0.48696567357512954, "grad_norm": 0.23642854392528534, "learning_rate": 5.1305456606217616e-05, "loss": 1.5763, "step": 24060 }, { "epoch": 0.48737046632124353, "grad_norm": 0.24842064082622528, "learning_rate": 5.1264977331606225e-05, "loss": 1.5717, "step": 24080 }, { "epoch": 0.4877752590673575, "grad_norm": 0.2563605308532715, "learning_rate": 5.122449805699482e-05, "loss": 1.5801, "step": 24100 }, { "epoch": 0.4881800518134715, "grad_norm": 0.24562868475914001, "learning_rate": 5.118401878238343e-05, "loss": 1.5765, "step": 24120 }, { "epoch": 0.4885848445595855, "grad_norm": 0.2645089030265808, "learning_rate": 5.114353950777202e-05, "loss": 1.5845, "step": 24140 }, { "epoch": 0.4889896373056995, "grad_norm": 0.23864537477493286, "learning_rate": 5.1103060233160624e-05, "loss": 1.5788, "step": 24160 }, { "epoch": 0.4893944300518135, "grad_norm": 0.24127276241779327, "learning_rate": 5.106258095854922e-05, "loss": 1.5783, "step": 24180 }, { "epoch": 0.4897992227979275, "grad_norm": 0.24852131307125092, "learning_rate": 5.102210168393783e-05, "loss": 1.5729, "step": 24200 }, { "epoch": 0.49020401554404147, "grad_norm": 0.24345511198043823, "learning_rate": 5.098162240932642e-05, "loss": 1.5844, "step": 24220 }, { "epoch": 0.49060880829015546, "grad_norm": 0.2450394332408905, "learning_rate": 5.094114313471503e-05, "loss": 1.5832, "step": 24240 }, { "epoch": 0.49101360103626945, "grad_norm": 0.23919980227947235, "learning_rate": 5.0900663860103625e-05, "loss": 1.5766, "step": 24260 }, { "epoch": 0.49141839378238344, "grad_norm": 0.24199815094470978, "learning_rate": 5.0860184585492234e-05, "loss": 1.5708, "step": 24280 }, { "epoch": 0.49182318652849744, "grad_norm": 0.2552061676979065, "learning_rate": 5.081970531088083e-05, "loss": 1.5755, "step": 24300 }, { "epoch": 0.49222797927461137, "grad_norm": 0.24190489947795868, "learning_rate": 5.077922603626944e-05, "loss": 1.5811, "step": 24320 }, { "epoch": 0.49263277202072536, "grad_norm": 0.25832515954971313, "learning_rate": 5.073874676165803e-05, "loss": 1.5834, "step": 24340 }, { "epoch": 0.49303756476683935, "grad_norm": 0.2520054280757904, "learning_rate": 5.069826748704663e-05, "loss": 1.5821, "step": 24360 }, { "epoch": 0.49344235751295334, "grad_norm": 0.23544861376285553, "learning_rate": 5.065778821243523e-05, "loss": 1.5803, "step": 24380 }, { "epoch": 0.49384715025906734, "grad_norm": 0.2454274594783783, "learning_rate": 5.0617308937823837e-05, "loss": 1.5793, "step": 24400 }, { "epoch": 0.4942519430051813, "grad_norm": 0.24478788673877716, "learning_rate": 5.057682966321243e-05, "loss": 1.5801, "step": 24420 }, { "epoch": 0.4946567357512953, "grad_norm": 0.25029295682907104, "learning_rate": 5.053635038860104e-05, "loss": 1.5783, "step": 24440 }, { "epoch": 0.4950615284974093, "grad_norm": 0.24456411600112915, "learning_rate": 5.0495871113989635e-05, "loss": 1.5825, "step": 24460 }, { "epoch": 0.4954663212435233, "grad_norm": 0.24189241230487823, "learning_rate": 5.045539183937824e-05, "loss": 1.5811, "step": 24480 }, { "epoch": 0.4958711139896373, "grad_norm": 0.2593652606010437, "learning_rate": 5.041491256476684e-05, "loss": 1.5805, "step": 24500 }, { "epoch": 0.4962759067357513, "grad_norm": 0.24172408878803253, "learning_rate": 5.0374433290155446e-05, "loss": 1.5841, "step": 24520 }, { "epoch": 0.4966806994818653, "grad_norm": 0.23994328081607819, "learning_rate": 5.033395401554405e-05, "loss": 1.5827, "step": 24540 }, { "epoch": 0.49708549222797926, "grad_norm": 0.2519098222255707, "learning_rate": 5.029347474093264e-05, "loss": 1.5817, "step": 24560 }, { "epoch": 0.49749028497409326, "grad_norm": 0.25192949175834656, "learning_rate": 5.025299546632125e-05, "loss": 1.5795, "step": 24580 }, { "epoch": 0.49789507772020725, "grad_norm": 0.2477131187915802, "learning_rate": 5.0212516191709846e-05, "loss": 1.573, "step": 24600 }, { "epoch": 0.49829987046632124, "grad_norm": 0.23865529894828796, "learning_rate": 5.0172036917098454e-05, "loss": 1.5811, "step": 24620 }, { "epoch": 0.49870466321243523, "grad_norm": 0.24538937211036682, "learning_rate": 5.013155764248705e-05, "loss": 1.5814, "step": 24640 }, { "epoch": 0.4991094559585492, "grad_norm": 0.26017871499061584, "learning_rate": 5.009107836787566e-05, "loss": 1.5746, "step": 24660 }, { "epoch": 0.4995142487046632, "grad_norm": 0.2426372468471527, "learning_rate": 5.005059909326425e-05, "loss": 1.5789, "step": 24680 }, { "epoch": 0.4999190414507772, "grad_norm": 0.2468426674604416, "learning_rate": 5.0010119818652854e-05, "loss": 1.5784, "step": 24700 }, { "epoch": 0.5003238341968912, "grad_norm": 0.2450643628835678, "learning_rate": 4.996964054404145e-05, "loss": 1.5823, "step": 24720 }, { "epoch": 0.5007286269430051, "grad_norm": 0.23434686660766602, "learning_rate": 4.992916126943005e-05, "loss": 1.5757, "step": 24740 }, { "epoch": 0.5011334196891192, "grad_norm": 0.253591924905777, "learning_rate": 4.988868199481865e-05, "loss": 1.5726, "step": 24760 }, { "epoch": 0.5015382124352331, "grad_norm": 0.24490448832511902, "learning_rate": 4.984820272020725e-05, "loss": 1.5817, "step": 24780 }, { "epoch": 0.5019430051813472, "grad_norm": 0.25187695026397705, "learning_rate": 4.9807723445595855e-05, "loss": 1.579, "step": 24800 }, { "epoch": 0.5023477979274611, "grad_norm": 0.249691903591156, "learning_rate": 4.9767244170984456e-05, "loss": 1.5797, "step": 24820 }, { "epoch": 0.5027525906735751, "grad_norm": 0.24403421580791473, "learning_rate": 4.972676489637306e-05, "loss": 1.5769, "step": 24840 }, { "epoch": 0.5031573834196891, "grad_norm": 0.23787952959537506, "learning_rate": 4.968628562176166e-05, "loss": 1.5707, "step": 24860 }, { "epoch": 0.5035621761658031, "grad_norm": 0.2436736822128296, "learning_rate": 4.964580634715026e-05, "loss": 1.5793, "step": 24880 }, { "epoch": 0.5039669689119171, "grad_norm": 0.25199687480926514, "learning_rate": 4.960532707253886e-05, "loss": 1.5784, "step": 24900 }, { "epoch": 0.5043717616580311, "grad_norm": 0.2547791004180908, "learning_rate": 4.9564847797927464e-05, "loss": 1.5748, "step": 24920 }, { "epoch": 0.504776554404145, "grad_norm": 0.23858702182769775, "learning_rate": 4.9524368523316066e-05, "loss": 1.5749, "step": 24940 }, { "epoch": 0.5051813471502591, "grad_norm": 0.2450331300497055, "learning_rate": 4.948388924870467e-05, "loss": 1.5851, "step": 24960 }, { "epoch": 0.505586139896373, "grad_norm": 0.25886282324790955, "learning_rate": 4.944340997409327e-05, "loss": 1.5849, "step": 24980 }, { "epoch": 0.5059909326424871, "grad_norm": 0.24725942313671112, "learning_rate": 4.940293069948187e-05, "loss": 1.5737, "step": 25000 }, { "epoch": 0.506395725388601, "grad_norm": 0.24251313507556915, "learning_rate": 4.936245142487047e-05, "loss": 1.5783, "step": 25020 }, { "epoch": 0.506800518134715, "grad_norm": 0.24619217216968536, "learning_rate": 4.9321972150259074e-05, "loss": 1.5796, "step": 25040 }, { "epoch": 0.507205310880829, "grad_norm": 0.2532688081264496, "learning_rate": 4.9281492875647676e-05, "loss": 1.575, "step": 25060 }, { "epoch": 0.507610103626943, "grad_norm": 0.2544226348400116, "learning_rate": 4.924101360103627e-05, "loss": 1.5743, "step": 25080 }, { "epoch": 0.508014896373057, "grad_norm": 0.2437015026807785, "learning_rate": 4.920053432642487e-05, "loss": 1.5742, "step": 25100 }, { "epoch": 0.508419689119171, "grad_norm": 0.2431849092245102, "learning_rate": 4.9160055051813474e-05, "loss": 1.5762, "step": 25120 }, { "epoch": 0.508824481865285, "grad_norm": 0.2514197528362274, "learning_rate": 4.9119575777202075e-05, "loss": 1.5783, "step": 25140 }, { "epoch": 0.509229274611399, "grad_norm": 0.243146613240242, "learning_rate": 4.907909650259068e-05, "loss": 1.577, "step": 25160 }, { "epoch": 0.5096340673575129, "grad_norm": 0.2571418583393097, "learning_rate": 4.903861722797928e-05, "loss": 1.583, "step": 25180 }, { "epoch": 0.510038860103627, "grad_norm": 0.2512859106063843, "learning_rate": 4.899813795336788e-05, "loss": 1.5833, "step": 25200 }, { "epoch": 0.5104436528497409, "grad_norm": 0.24603405594825745, "learning_rate": 4.895765867875648e-05, "loss": 1.5694, "step": 25220 }, { "epoch": 0.510848445595855, "grad_norm": 0.24326656758785248, "learning_rate": 4.891717940414508e-05, "loss": 1.5854, "step": 25240 }, { "epoch": 0.5112532383419689, "grad_norm": 0.24897156655788422, "learning_rate": 4.887670012953368e-05, "loss": 1.5798, "step": 25260 }, { "epoch": 0.5116580310880829, "grad_norm": 0.24572646617889404, "learning_rate": 4.883622085492228e-05, "loss": 1.5747, "step": 25280 }, { "epoch": 0.5120628238341969, "grad_norm": 0.25053149461746216, "learning_rate": 4.879574158031088e-05, "loss": 1.5772, "step": 25300 }, { "epoch": 0.5124676165803109, "grad_norm": 0.26112374663352966, "learning_rate": 4.875526230569948e-05, "loss": 1.5784, "step": 25320 }, { "epoch": 0.5128724093264249, "grad_norm": 0.25159648060798645, "learning_rate": 4.8714783031088084e-05, "loss": 1.577, "step": 25340 }, { "epoch": 0.5132772020725389, "grad_norm": 0.24899369478225708, "learning_rate": 4.8674303756476686e-05, "loss": 1.5757, "step": 25360 }, { "epoch": 0.5136819948186528, "grad_norm": 0.2519722282886505, "learning_rate": 4.863382448186529e-05, "loss": 1.5818, "step": 25380 }, { "epoch": 0.5140867875647669, "grad_norm": 0.2588302791118622, "learning_rate": 4.859334520725389e-05, "loss": 1.5764, "step": 25400 }, { "epoch": 0.5144915803108808, "grad_norm": 0.25766751170158386, "learning_rate": 4.855286593264249e-05, "loss": 1.5814, "step": 25420 }, { "epoch": 0.5148963730569949, "grad_norm": 0.2474324256181717, "learning_rate": 4.851238665803109e-05, "loss": 1.5754, "step": 25440 }, { "epoch": 0.5153011658031088, "grad_norm": 0.24520163238048553, "learning_rate": 4.847190738341969e-05, "loss": 1.5782, "step": 25460 }, { "epoch": 0.5157059585492227, "grad_norm": 0.25748249888420105, "learning_rate": 4.843142810880829e-05, "loss": 1.5793, "step": 25480 }, { "epoch": 0.5161107512953368, "grad_norm": 0.2577782869338989, "learning_rate": 4.839094883419689e-05, "loss": 1.5828, "step": 25500 }, { "epoch": 0.5165155440414507, "grad_norm": 0.2427847981452942, "learning_rate": 4.835046955958549e-05, "loss": 1.5767, "step": 25520 }, { "epoch": 0.5169203367875648, "grad_norm": 0.2464686781167984, "learning_rate": 4.8309990284974094e-05, "loss": 1.5777, "step": 25540 }, { "epoch": 0.5173251295336787, "grad_norm": 0.24446095526218414, "learning_rate": 4.8269511010362695e-05, "loss": 1.5725, "step": 25560 }, { "epoch": 0.5177299222797928, "grad_norm": 0.2630501389503479, "learning_rate": 4.82290317357513e-05, "loss": 1.5769, "step": 25580 }, { "epoch": 0.5181347150259067, "grad_norm": 0.24367742240428925, "learning_rate": 4.81885524611399e-05, "loss": 1.5765, "step": 25600 }, { "epoch": 0.5185395077720207, "grad_norm": 0.24906082451343536, "learning_rate": 4.81480731865285e-05, "loss": 1.581, "step": 25620 }, { "epoch": 0.5189443005181347, "grad_norm": 0.2511107623577118, "learning_rate": 4.8107593911917095e-05, "loss": 1.5743, "step": 25640 }, { "epoch": 0.5193490932642487, "grad_norm": 0.23770417273044586, "learning_rate": 4.8067114637305696e-05, "loss": 1.5828, "step": 25660 }, { "epoch": 0.5197538860103627, "grad_norm": 0.24414324760437012, "learning_rate": 4.8026635362694305e-05, "loss": 1.5722, "step": 25680 }, { "epoch": 0.5201586787564767, "grad_norm": 0.25923070311546326, "learning_rate": 4.7986156088082906e-05, "loss": 1.5817, "step": 25700 }, { "epoch": 0.5205634715025906, "grad_norm": 0.24711006879806519, "learning_rate": 4.794567681347151e-05, "loss": 1.5683, "step": 25720 }, { "epoch": 0.5209682642487047, "grad_norm": 0.24478749930858612, "learning_rate": 4.790519753886011e-05, "loss": 1.5721, "step": 25740 }, { "epoch": 0.5213730569948186, "grad_norm": 0.2445567101240158, "learning_rate": 4.786471826424871e-05, "loss": 1.5755, "step": 25760 }, { "epoch": 0.5217778497409327, "grad_norm": 0.24971835315227509, "learning_rate": 4.782423898963731e-05, "loss": 1.5756, "step": 25780 }, { "epoch": 0.5221826424870466, "grad_norm": 0.24110785126686096, "learning_rate": 4.778375971502591e-05, "loss": 1.5809, "step": 25800 }, { "epoch": 0.5225874352331606, "grad_norm": 0.24619248509407043, "learning_rate": 4.774328044041451e-05, "loss": 1.5747, "step": 25820 }, { "epoch": 0.5229922279792746, "grad_norm": 0.24404242634773254, "learning_rate": 4.770280116580311e-05, "loss": 1.5746, "step": 25840 }, { "epoch": 0.5233970207253886, "grad_norm": 0.24865297973155975, "learning_rate": 4.766232189119171e-05, "loss": 1.5777, "step": 25860 }, { "epoch": 0.5238018134715026, "grad_norm": 0.25221002101898193, "learning_rate": 4.7621842616580314e-05, "loss": 1.5803, "step": 25880 }, { "epoch": 0.5242066062176166, "grad_norm": 0.24077841639518738, "learning_rate": 4.7581363341968916e-05, "loss": 1.5795, "step": 25900 }, { "epoch": 0.5246113989637305, "grad_norm": 0.24999049305915833, "learning_rate": 4.754088406735752e-05, "loss": 1.5743, "step": 25920 }, { "epoch": 0.5250161917098446, "grad_norm": 0.23617279529571533, "learning_rate": 4.750040479274612e-05, "loss": 1.5729, "step": 25940 }, { "epoch": 0.5254209844559585, "grad_norm": 0.2422950714826584, "learning_rate": 4.745992551813472e-05, "loss": 1.5816, "step": 25960 }, { "epoch": 0.5258257772020726, "grad_norm": 0.2582608163356781, "learning_rate": 4.741944624352332e-05, "loss": 1.589, "step": 25980 }, { "epoch": 0.5262305699481865, "grad_norm": 0.24811051785945892, "learning_rate": 4.737896696891192e-05, "loss": 1.5782, "step": 26000 }, { "epoch": 0.5266353626943006, "grad_norm": 0.27666935324668884, "learning_rate": 4.733848769430052e-05, "loss": 1.5789, "step": 26020 }, { "epoch": 0.5270401554404145, "grad_norm": 0.24608214199543, "learning_rate": 4.729800841968912e-05, "loss": 1.5756, "step": 26040 }, { "epoch": 0.5274449481865285, "grad_norm": 0.2632744312286377, "learning_rate": 4.725752914507772e-05, "loss": 1.5715, "step": 26060 }, { "epoch": 0.5278497409326425, "grad_norm": 0.2810420095920563, "learning_rate": 4.721704987046632e-05, "loss": 1.5761, "step": 26080 }, { "epoch": 0.5282545336787565, "grad_norm": 0.2552616000175476, "learning_rate": 4.7176570595854925e-05, "loss": 1.5777, "step": 26100 }, { "epoch": 0.5286593264248705, "grad_norm": 0.24986442923545837, "learning_rate": 4.7136091321243526e-05, "loss": 1.579, "step": 26120 }, { "epoch": 0.5290641191709845, "grad_norm": 0.2487795650959015, "learning_rate": 4.709561204663213e-05, "loss": 1.5739, "step": 26140 }, { "epoch": 0.5294689119170984, "grad_norm": 0.2430865615606308, "learning_rate": 4.705513277202073e-05, "loss": 1.5733, "step": 26160 }, { "epoch": 0.5298737046632125, "grad_norm": 0.2541719675064087, "learning_rate": 4.7014653497409324e-05, "loss": 1.5837, "step": 26180 }, { "epoch": 0.5302784974093264, "grad_norm": 0.2468152791261673, "learning_rate": 4.6974174222797926e-05, "loss": 1.583, "step": 26200 }, { "epoch": 0.5306832901554405, "grad_norm": 0.2389470636844635, "learning_rate": 4.693369494818653e-05, "loss": 1.5774, "step": 26220 }, { "epoch": 0.5310880829015544, "grad_norm": 0.24157211184501648, "learning_rate": 4.689321567357513e-05, "loss": 1.5753, "step": 26240 }, { "epoch": 0.5314928756476683, "grad_norm": 0.259877473115921, "learning_rate": 4.685273639896373e-05, "loss": 1.5735, "step": 26260 }, { "epoch": 0.5318976683937824, "grad_norm": 0.252538800239563, "learning_rate": 4.681225712435233e-05, "loss": 1.5781, "step": 26280 }, { "epoch": 0.5323024611398963, "grad_norm": 0.24980610609054565, "learning_rate": 4.6771777849740934e-05, "loss": 1.5841, "step": 26300 }, { "epoch": 0.5327072538860104, "grad_norm": 0.24372923374176025, "learning_rate": 4.6731298575129536e-05, "loss": 1.572, "step": 26320 }, { "epoch": 0.5331120466321243, "grad_norm": 0.25396573543548584, "learning_rate": 4.669081930051814e-05, "loss": 1.5784, "step": 26340 }, { "epoch": 0.5335168393782384, "grad_norm": 0.24497637152671814, "learning_rate": 4.665034002590674e-05, "loss": 1.5758, "step": 26360 }, { "epoch": 0.5339216321243523, "grad_norm": 0.24501706659793854, "learning_rate": 4.6609860751295334e-05, "loss": 1.5746, "step": 26380 }, { "epoch": 0.5343264248704663, "grad_norm": 0.24592188000679016, "learning_rate": 4.6569381476683935e-05, "loss": 1.5834, "step": 26400 }, { "epoch": 0.5347312176165803, "grad_norm": 0.24027900397777557, "learning_rate": 4.652890220207254e-05, "loss": 1.5786, "step": 26420 }, { "epoch": 0.5351360103626943, "grad_norm": 0.2622084617614746, "learning_rate": 4.648842292746114e-05, "loss": 1.5714, "step": 26440 }, { "epoch": 0.5355408031088082, "grad_norm": 0.24835343658924103, "learning_rate": 4.644794365284975e-05, "loss": 1.5856, "step": 26460 }, { "epoch": 0.5359455958549223, "grad_norm": 0.2647353708744049, "learning_rate": 4.640746437823835e-05, "loss": 1.5736, "step": 26480 }, { "epoch": 0.5363503886010362, "grad_norm": 0.2492465227842331, "learning_rate": 4.636698510362695e-05, "loss": 1.5679, "step": 26500 }, { "epoch": 0.5367551813471503, "grad_norm": 0.25600072741508484, "learning_rate": 4.632650582901555e-05, "loss": 1.5746, "step": 26520 }, { "epoch": 0.5371599740932642, "grad_norm": 0.2557211220264435, "learning_rate": 4.6286026554404146e-05, "loss": 1.5737, "step": 26540 }, { "epoch": 0.5375647668393783, "grad_norm": 0.25615766644477844, "learning_rate": 4.624554727979275e-05, "loss": 1.5847, "step": 26560 }, { "epoch": 0.5379695595854922, "grad_norm": 0.2541336119174957, "learning_rate": 4.620506800518135e-05, "loss": 1.5767, "step": 26580 }, { "epoch": 0.5383743523316062, "grad_norm": 0.2521383464336395, "learning_rate": 4.616458873056995e-05, "loss": 1.5766, "step": 26600 }, { "epoch": 0.5387791450777202, "grad_norm": 0.24747107923030853, "learning_rate": 4.612410945595855e-05, "loss": 1.5829, "step": 26620 }, { "epoch": 0.5391839378238342, "grad_norm": 0.2570858299732208, "learning_rate": 4.6083630181347154e-05, "loss": 1.5778, "step": 26640 }, { "epoch": 0.5395887305699482, "grad_norm": 0.23550689220428467, "learning_rate": 4.6043150906735756e-05, "loss": 1.577, "step": 26660 }, { "epoch": 0.5399935233160622, "grad_norm": 0.255535751581192, "learning_rate": 4.600267163212436e-05, "loss": 1.5735, "step": 26680 }, { "epoch": 0.5403983160621761, "grad_norm": 0.23784370720386505, "learning_rate": 4.596219235751296e-05, "loss": 1.5775, "step": 26700 }, { "epoch": 0.5408031088082902, "grad_norm": 0.2587854862213135, "learning_rate": 4.5921713082901554e-05, "loss": 1.5885, "step": 26720 }, { "epoch": 0.5412079015544041, "grad_norm": 0.250474750995636, "learning_rate": 4.5881233808290156e-05, "loss": 1.5819, "step": 26740 }, { "epoch": 0.5416126943005182, "grad_norm": 0.2470523715019226, "learning_rate": 4.584075453367876e-05, "loss": 1.5742, "step": 26760 }, { "epoch": 0.5420174870466321, "grad_norm": 0.24264921247959137, "learning_rate": 4.580027525906736e-05, "loss": 1.5732, "step": 26780 }, { "epoch": 0.5424222797927462, "grad_norm": 0.2485053986310959, "learning_rate": 4.575979598445596e-05, "loss": 1.5781, "step": 26800 }, { "epoch": 0.5428270725388601, "grad_norm": 0.2515389025211334, "learning_rate": 4.571931670984456e-05, "loss": 1.5748, "step": 26820 }, { "epoch": 0.5432318652849741, "grad_norm": 0.24164317548274994, "learning_rate": 4.5678837435233164e-05, "loss": 1.5787, "step": 26840 }, { "epoch": 0.5436366580310881, "grad_norm": 0.2493463009595871, "learning_rate": 4.5638358160621765e-05, "loss": 1.571, "step": 26860 }, { "epoch": 0.5440414507772021, "grad_norm": 0.24559026956558228, "learning_rate": 4.559787888601037e-05, "loss": 1.5824, "step": 26880 }, { "epoch": 0.544446243523316, "grad_norm": 0.23529697954654694, "learning_rate": 4.555739961139897e-05, "loss": 1.5769, "step": 26900 }, { "epoch": 0.5448510362694301, "grad_norm": 0.23835629224777222, "learning_rate": 4.551692033678756e-05, "loss": 1.5719, "step": 26920 }, { "epoch": 0.545255829015544, "grad_norm": 0.24895823001861572, "learning_rate": 4.5476441062176165e-05, "loss": 1.578, "step": 26940 }, { "epoch": 0.5456606217616581, "grad_norm": 0.2723689377307892, "learning_rate": 4.5435961787564766e-05, "loss": 1.5691, "step": 26960 }, { "epoch": 0.546065414507772, "grad_norm": 0.27683934569358826, "learning_rate": 4.539548251295337e-05, "loss": 1.5768, "step": 26980 }, { "epoch": 0.5464702072538861, "grad_norm": 0.24492651224136353, "learning_rate": 4.535500323834197e-05, "loss": 1.5786, "step": 27000 }, { "epoch": 0.546875, "grad_norm": 0.23444582521915436, "learning_rate": 4.531452396373057e-05, "loss": 1.5718, "step": 27020 }, { "epoch": 0.5472797927461139, "grad_norm": 0.2427140325307846, "learning_rate": 4.527404468911917e-05, "loss": 1.5778, "step": 27040 }, { "epoch": 0.547684585492228, "grad_norm": 0.24684864282608032, "learning_rate": 4.5233565414507774e-05, "loss": 1.5805, "step": 27060 }, { "epoch": 0.5480893782383419, "grad_norm": 0.2557682394981384, "learning_rate": 4.5193086139896376e-05, "loss": 1.5854, "step": 27080 }, { "epoch": 0.548494170984456, "grad_norm": 0.2521653473377228, "learning_rate": 4.515260686528498e-05, "loss": 1.5756, "step": 27100 }, { "epoch": 0.5488989637305699, "grad_norm": 0.25644028186798096, "learning_rate": 4.511212759067357e-05, "loss": 1.5766, "step": 27120 }, { "epoch": 0.549303756476684, "grad_norm": 0.2427394688129425, "learning_rate": 4.5071648316062174e-05, "loss": 1.5723, "step": 27140 }, { "epoch": 0.5497085492227979, "grad_norm": 0.25224652886390686, "learning_rate": 4.5031169041450776e-05, "loss": 1.569, "step": 27160 }, { "epoch": 0.5501133419689119, "grad_norm": 0.2507936358451843, "learning_rate": 4.499068976683938e-05, "loss": 1.5739, "step": 27180 }, { "epoch": 0.5505181347150259, "grad_norm": 0.24373170733451843, "learning_rate": 4.495021049222798e-05, "loss": 1.578, "step": 27200 }, { "epoch": 0.5509229274611399, "grad_norm": 0.2559431195259094, "learning_rate": 4.490973121761659e-05, "loss": 1.578, "step": 27220 }, { "epoch": 0.5513277202072538, "grad_norm": 0.2675119936466217, "learning_rate": 4.486925194300519e-05, "loss": 1.58, "step": 27240 }, { "epoch": 0.5517325129533679, "grad_norm": 0.23623517155647278, "learning_rate": 4.4828772668393784e-05, "loss": 1.5771, "step": 27260 }, { "epoch": 0.5521373056994818, "grad_norm": 0.25080692768096924, "learning_rate": 4.4788293393782385e-05, "loss": 1.5813, "step": 27280 }, { "epoch": 0.5525420984455959, "grad_norm": 0.2448391616344452, "learning_rate": 4.474781411917099e-05, "loss": 1.57, "step": 27300 }, { "epoch": 0.5529468911917098, "grad_norm": 0.24628101289272308, "learning_rate": 4.470733484455959e-05, "loss": 1.5767, "step": 27320 }, { "epoch": 0.5533516839378239, "grad_norm": 0.2511506974697113, "learning_rate": 4.466685556994819e-05, "loss": 1.5674, "step": 27340 }, { "epoch": 0.5537564766839378, "grad_norm": 0.23988975584506989, "learning_rate": 4.462637629533679e-05, "loss": 1.5847, "step": 27360 }, { "epoch": 0.5541612694300518, "grad_norm": 0.23415102064609528, "learning_rate": 4.458589702072539e-05, "loss": 1.5827, "step": 27380 }, { "epoch": 0.5545660621761658, "grad_norm": 0.25428295135498047, "learning_rate": 4.4545417746113995e-05, "loss": 1.5763, "step": 27400 }, { "epoch": 0.5549708549222798, "grad_norm": 0.24063487350940704, "learning_rate": 4.4504938471502596e-05, "loss": 1.5708, "step": 27420 }, { "epoch": 0.5553756476683938, "grad_norm": 0.25658825039863586, "learning_rate": 4.44644591968912e-05, "loss": 1.5741, "step": 27440 }, { "epoch": 0.5557804404145078, "grad_norm": 0.24409863352775574, "learning_rate": 4.442397992227979e-05, "loss": 1.5828, "step": 27460 }, { "epoch": 0.5561852331606217, "grad_norm": 0.2456551492214203, "learning_rate": 4.4383500647668394e-05, "loss": 1.5758, "step": 27480 }, { "epoch": 0.5565900259067358, "grad_norm": 0.2531892657279968, "learning_rate": 4.4343021373056996e-05, "loss": 1.5725, "step": 27500 }, { "epoch": 0.5569948186528497, "grad_norm": 0.2560547888278961, "learning_rate": 4.43025420984456e-05, "loss": 1.5705, "step": 27520 }, { "epoch": 0.5573996113989638, "grad_norm": 0.2510719895362854, "learning_rate": 4.42620628238342e-05, "loss": 1.5736, "step": 27540 }, { "epoch": 0.5578044041450777, "grad_norm": 0.25160086154937744, "learning_rate": 4.42215835492228e-05, "loss": 1.5804, "step": 27560 }, { "epoch": 0.5582091968911918, "grad_norm": 0.2495107650756836, "learning_rate": 4.41811042746114e-05, "loss": 1.5727, "step": 27580 }, { "epoch": 0.5586139896373057, "grad_norm": 0.25935500860214233, "learning_rate": 4.4140625000000004e-05, "loss": 1.5699, "step": 27600 }, { "epoch": 0.5590187823834197, "grad_norm": 0.2415279895067215, "learning_rate": 4.4100145725388606e-05, "loss": 1.5783, "step": 27620 }, { "epoch": 0.5594235751295337, "grad_norm": 0.24605855345726013, "learning_rate": 4.405966645077721e-05, "loss": 1.575, "step": 27640 }, { "epoch": 0.5598283678756477, "grad_norm": 0.2420377880334854, "learning_rate": 4.40191871761658e-05, "loss": 1.5764, "step": 27660 }, { "epoch": 0.5602331606217616, "grad_norm": 0.25794553756713867, "learning_rate": 4.3978707901554404e-05, "loss": 1.5758, "step": 27680 }, { "epoch": 0.5606379533678757, "grad_norm": 0.2595760226249695, "learning_rate": 4.3938228626943005e-05, "loss": 1.5796, "step": 27700 }, { "epoch": 0.5610427461139896, "grad_norm": 0.24343647062778473, "learning_rate": 4.389774935233161e-05, "loss": 1.5855, "step": 27720 }, { "epoch": 0.5614475388601037, "grad_norm": 0.2568787932395935, "learning_rate": 4.385727007772021e-05, "loss": 1.5645, "step": 27740 }, { "epoch": 0.5618523316062176, "grad_norm": 0.2740406095981598, "learning_rate": 4.381679080310881e-05, "loss": 1.5766, "step": 27760 }, { "epoch": 0.5622571243523317, "grad_norm": 0.2507692575454712, "learning_rate": 4.377631152849741e-05, "loss": 1.5735, "step": 27780 }, { "epoch": 0.5626619170984456, "grad_norm": 0.25449472665786743, "learning_rate": 4.373583225388601e-05, "loss": 1.5775, "step": 27800 }, { "epoch": 0.5630667098445595, "grad_norm": 0.2573375403881073, "learning_rate": 4.3695352979274615e-05, "loss": 1.5692, "step": 27820 }, { "epoch": 0.5634715025906736, "grad_norm": 0.2517239451408386, "learning_rate": 4.365487370466321e-05, "loss": 1.5719, "step": 27840 }, { "epoch": 0.5638762953367875, "grad_norm": 0.2670283019542694, "learning_rate": 4.361439443005181e-05, "loss": 1.5675, "step": 27860 }, { "epoch": 0.5642810880829016, "grad_norm": 0.24845652282238007, "learning_rate": 4.357391515544041e-05, "loss": 1.5744, "step": 27880 }, { "epoch": 0.5646858808290155, "grad_norm": 0.2603214383125305, "learning_rate": 4.3533435880829014e-05, "loss": 1.5736, "step": 27900 }, { "epoch": 0.5650906735751295, "grad_norm": 0.24786892533302307, "learning_rate": 4.3492956606217616e-05, "loss": 1.5822, "step": 27920 }, { "epoch": 0.5654954663212435, "grad_norm": 0.25323328375816345, "learning_rate": 4.345247733160622e-05, "loss": 1.5817, "step": 27940 }, { "epoch": 0.5659002590673575, "grad_norm": 0.25558632612228394, "learning_rate": 4.341199805699482e-05, "loss": 1.5843, "step": 27960 }, { "epoch": 0.5663050518134715, "grad_norm": 0.2442002147436142, "learning_rate": 4.337151878238342e-05, "loss": 1.5711, "step": 27980 }, { "epoch": 0.5667098445595855, "grad_norm": 0.24709028005599976, "learning_rate": 4.333103950777202e-05, "loss": 1.5847, "step": 28000 }, { "epoch": 0.5671146373056994, "grad_norm": 0.23754726350307465, "learning_rate": 4.3290560233160624e-05, "loss": 1.5751, "step": 28020 }, { "epoch": 0.5675194300518135, "grad_norm": 0.261986643075943, "learning_rate": 4.3250080958549226e-05, "loss": 1.5722, "step": 28040 }, { "epoch": 0.5679242227979274, "grad_norm": 0.25125253200531006, "learning_rate": 4.320960168393783e-05, "loss": 1.5805, "step": 28060 }, { "epoch": 0.5683290155440415, "grad_norm": 0.24576328694820404, "learning_rate": 4.316912240932643e-05, "loss": 1.5705, "step": 28080 }, { "epoch": 0.5687338082901554, "grad_norm": 0.2455444633960724, "learning_rate": 4.312864313471503e-05, "loss": 1.5757, "step": 28100 }, { "epoch": 0.5691386010362695, "grad_norm": 0.2670045793056488, "learning_rate": 4.308816386010363e-05, "loss": 1.5758, "step": 28120 }, { "epoch": 0.5695433937823834, "grad_norm": 0.25476908683776855, "learning_rate": 4.3047684585492234e-05, "loss": 1.5762, "step": 28140 }, { "epoch": 0.5699481865284974, "grad_norm": 0.25058963894844055, "learning_rate": 4.3007205310880835e-05, "loss": 1.5738, "step": 28160 }, { "epoch": 0.5703529792746114, "grad_norm": 0.23909297585487366, "learning_rate": 4.296672603626944e-05, "loss": 1.5759, "step": 28180 }, { "epoch": 0.5707577720207254, "grad_norm": 0.25474146008491516, "learning_rate": 4.292624676165803e-05, "loss": 1.5816, "step": 28200 }, { "epoch": 0.5711625647668394, "grad_norm": 0.24288970232009888, "learning_rate": 4.288576748704663e-05, "loss": 1.5785, "step": 28220 }, { "epoch": 0.5715673575129534, "grad_norm": 0.24538229405879974, "learning_rate": 4.2845288212435235e-05, "loss": 1.5683, "step": 28240 }, { "epoch": 0.5719721502590673, "grad_norm": 0.2516593337059021, "learning_rate": 4.2804808937823836e-05, "loss": 1.5758, "step": 28260 }, { "epoch": 0.5723769430051814, "grad_norm": 0.2479175627231598, "learning_rate": 4.276432966321244e-05, "loss": 1.5788, "step": 28280 }, { "epoch": 0.5727817357512953, "grad_norm": 0.26313814520835876, "learning_rate": 4.272385038860104e-05, "loss": 1.5666, "step": 28300 }, { "epoch": 0.5731865284974094, "grad_norm": 0.2614689767360687, "learning_rate": 4.268337111398964e-05, "loss": 1.5755, "step": 28320 }, { "epoch": 0.5735913212435233, "grad_norm": 0.24811792373657227, "learning_rate": 4.264289183937824e-05, "loss": 1.5733, "step": 28340 }, { "epoch": 0.5739961139896373, "grad_norm": 0.24781277775764465, "learning_rate": 4.2602412564766844e-05, "loss": 1.5815, "step": 28360 }, { "epoch": 0.5744009067357513, "grad_norm": 0.24626316130161285, "learning_rate": 4.256193329015544e-05, "loss": 1.5746, "step": 28380 }, { "epoch": 0.5748056994818653, "grad_norm": 0.24195964634418488, "learning_rate": 4.252145401554404e-05, "loss": 1.5756, "step": 28400 }, { "epoch": 0.5752104922279793, "grad_norm": 0.25192534923553467, "learning_rate": 4.248097474093264e-05, "loss": 1.5733, "step": 28420 }, { "epoch": 0.5756152849740933, "grad_norm": 0.25554537773132324, "learning_rate": 4.2440495466321244e-05, "loss": 1.5786, "step": 28440 }, { "epoch": 0.5760200777202072, "grad_norm": 0.24462340772151947, "learning_rate": 4.2400016191709846e-05, "loss": 1.5805, "step": 28460 }, { "epoch": 0.5764248704663213, "grad_norm": 0.25285804271698, "learning_rate": 4.235953691709845e-05, "loss": 1.5763, "step": 28480 }, { "epoch": 0.5768296632124352, "grad_norm": 0.25259068608283997, "learning_rate": 4.231905764248705e-05, "loss": 1.575, "step": 28500 }, { "epoch": 0.5772344559585493, "grad_norm": 0.2638585567474365, "learning_rate": 4.227857836787565e-05, "loss": 1.5753, "step": 28520 }, { "epoch": 0.5776392487046632, "grad_norm": 0.2519742548465729, "learning_rate": 4.223809909326425e-05, "loss": 1.5719, "step": 28540 }, { "epoch": 0.5780440414507773, "grad_norm": 0.24417388439178467, "learning_rate": 4.2197619818652854e-05, "loss": 1.5676, "step": 28560 }, { "epoch": 0.5784488341968912, "grad_norm": 0.2448301464319229, "learning_rate": 4.215714054404145e-05, "loss": 1.5764, "step": 28580 }, { "epoch": 0.5788536269430051, "grad_norm": 0.27442774176597595, "learning_rate": 4.211666126943005e-05, "loss": 1.581, "step": 28600 }, { "epoch": 0.5792584196891192, "grad_norm": 0.24819692969322205, "learning_rate": 4.207618199481865e-05, "loss": 1.5769, "step": 28620 }, { "epoch": 0.5796632124352331, "grad_norm": 0.2525196969509125, "learning_rate": 4.203570272020725e-05, "loss": 1.5833, "step": 28640 }, { "epoch": 0.5800680051813472, "grad_norm": 0.24566295742988586, "learning_rate": 4.1995223445595855e-05, "loss": 1.5833, "step": 28660 }, { "epoch": 0.5804727979274611, "grad_norm": 0.24645259976387024, "learning_rate": 4.1954744170984456e-05, "loss": 1.5743, "step": 28680 }, { "epoch": 0.5808775906735751, "grad_norm": 0.24392972886562347, "learning_rate": 4.191426489637306e-05, "loss": 1.5654, "step": 28700 }, { "epoch": 0.5812823834196891, "grad_norm": 0.2385888248682022, "learning_rate": 4.187378562176166e-05, "loss": 1.5759, "step": 28720 }, { "epoch": 0.5816871761658031, "grad_norm": 0.2419498711824417, "learning_rate": 4.183330634715026e-05, "loss": 1.5731, "step": 28740 }, { "epoch": 0.5820919689119171, "grad_norm": 0.2589682638645172, "learning_rate": 4.179282707253886e-05, "loss": 1.5785, "step": 28760 }, { "epoch": 0.5824967616580311, "grad_norm": 0.27007555961608887, "learning_rate": 4.1752347797927464e-05, "loss": 1.5739, "step": 28780 }, { "epoch": 0.582901554404145, "grad_norm": 0.25139927864074707, "learning_rate": 4.1711868523316066e-05, "loss": 1.5751, "step": 28800 }, { "epoch": 0.5833063471502591, "grad_norm": 0.24956010282039642, "learning_rate": 4.167138924870467e-05, "loss": 1.5661, "step": 28820 }, { "epoch": 0.583711139896373, "grad_norm": 0.2451709359884262, "learning_rate": 4.163090997409327e-05, "loss": 1.5717, "step": 28840 }, { "epoch": 0.5841159326424871, "grad_norm": 0.24961628019809723, "learning_rate": 4.159043069948187e-05, "loss": 1.5814, "step": 28860 }, { "epoch": 0.584520725388601, "grad_norm": 0.23765291273593903, "learning_rate": 4.154995142487047e-05, "loss": 1.5673, "step": 28880 }, { "epoch": 0.584925518134715, "grad_norm": 0.2598576843738556, "learning_rate": 4.1509472150259074e-05, "loss": 1.5829, "step": 28900 }, { "epoch": 0.585330310880829, "grad_norm": 0.24838699400424957, "learning_rate": 4.146899287564767e-05, "loss": 1.576, "step": 28920 }, { "epoch": 0.585735103626943, "grad_norm": 0.25361859798431396, "learning_rate": 4.142851360103627e-05, "loss": 1.5793, "step": 28940 }, { "epoch": 0.586139896373057, "grad_norm": 0.25748369097709656, "learning_rate": 4.138803432642487e-05, "loss": 1.5778, "step": 28960 }, { "epoch": 0.586544689119171, "grad_norm": 0.24620041251182556, "learning_rate": 4.1347555051813474e-05, "loss": 1.5762, "step": 28980 }, { "epoch": 0.586949481865285, "grad_norm": 0.24041464924812317, "learning_rate": 4.1307075777202075e-05, "loss": 1.5729, "step": 29000 }, { "epoch": 0.587354274611399, "grad_norm": 0.28080734610557556, "learning_rate": 4.126659650259068e-05, "loss": 1.5768, "step": 29020 }, { "epoch": 0.5877590673575129, "grad_norm": 0.25217050313949585, "learning_rate": 4.122611722797928e-05, "loss": 1.579, "step": 29040 }, { "epoch": 0.588163860103627, "grad_norm": 0.24903340637683868, "learning_rate": 4.118563795336788e-05, "loss": 1.5741, "step": 29060 }, { "epoch": 0.5885686528497409, "grad_norm": 0.25236716866493225, "learning_rate": 4.114515867875648e-05, "loss": 1.5704, "step": 29080 }, { "epoch": 0.588973445595855, "grad_norm": 0.2478567361831665, "learning_rate": 4.110467940414508e-05, "loss": 1.5759, "step": 29100 }, { "epoch": 0.5893782383419689, "grad_norm": 0.23734483122825623, "learning_rate": 4.106420012953368e-05, "loss": 1.5707, "step": 29120 }, { "epoch": 0.5897830310880829, "grad_norm": 0.24406881630420685, "learning_rate": 4.102372085492228e-05, "loss": 1.5799, "step": 29140 }, { "epoch": 0.5901878238341969, "grad_norm": 0.2480667531490326, "learning_rate": 4.098324158031088e-05, "loss": 1.5812, "step": 29160 }, { "epoch": 0.5905926165803109, "grad_norm": 0.25957611203193665, "learning_rate": 4.094276230569948e-05, "loss": 1.5835, "step": 29180 }, { "epoch": 0.5909974093264249, "grad_norm": 0.24241919815540314, "learning_rate": 4.0902283031088084e-05, "loss": 1.572, "step": 29200 }, { "epoch": 0.5914022020725389, "grad_norm": 0.2518638074398041, "learning_rate": 4.0861803756476686e-05, "loss": 1.5685, "step": 29220 }, { "epoch": 0.5918069948186528, "grad_norm": 0.25352829694747925, "learning_rate": 4.082132448186529e-05, "loss": 1.5752, "step": 29240 }, { "epoch": 0.5922117875647669, "grad_norm": 0.2487095296382904, "learning_rate": 4.078084520725389e-05, "loss": 1.5677, "step": 29260 }, { "epoch": 0.5926165803108808, "grad_norm": 0.2385735809803009, "learning_rate": 4.074036593264249e-05, "loss": 1.5778, "step": 29280 }, { "epoch": 0.5930213730569949, "grad_norm": 0.24923932552337646, "learning_rate": 4.069988665803109e-05, "loss": 1.5755, "step": 29300 }, { "epoch": 0.5934261658031088, "grad_norm": 0.25011467933654785, "learning_rate": 4.065940738341969e-05, "loss": 1.5768, "step": 29320 }, { "epoch": 0.5938309585492227, "grad_norm": 0.24440476298332214, "learning_rate": 4.061892810880829e-05, "loss": 1.575, "step": 29340 }, { "epoch": 0.5942357512953368, "grad_norm": 0.2593095898628235, "learning_rate": 4.057844883419689e-05, "loss": 1.5754, "step": 29360 }, { "epoch": 0.5946405440414507, "grad_norm": 0.2539571225643158, "learning_rate": 4.053796955958549e-05, "loss": 1.5723, "step": 29380 }, { "epoch": 0.5950453367875648, "grad_norm": 0.24505826830863953, "learning_rate": 4.0497490284974094e-05, "loss": 1.5737, "step": 29400 }, { "epoch": 0.5954501295336787, "grad_norm": 0.24120710790157318, "learning_rate": 4.0457011010362695e-05, "loss": 1.5831, "step": 29420 }, { "epoch": 0.5958549222797928, "grad_norm": 0.2603512406349182, "learning_rate": 4.04165317357513e-05, "loss": 1.5778, "step": 29440 }, { "epoch": 0.5962597150259067, "grad_norm": 0.24887946248054504, "learning_rate": 4.03760524611399e-05, "loss": 1.5701, "step": 29460 }, { "epoch": 0.5966645077720207, "grad_norm": 0.24498353898525238, "learning_rate": 4.03355731865285e-05, "loss": 1.5786, "step": 29480 }, { "epoch": 0.5970693005181347, "grad_norm": 0.24439983069896698, "learning_rate": 4.0295093911917095e-05, "loss": 1.5733, "step": 29500 }, { "epoch": 0.5974740932642487, "grad_norm": 0.2523341774940491, "learning_rate": 4.0254614637305696e-05, "loss": 1.5705, "step": 29520 }, { "epoch": 0.5978788860103627, "grad_norm": 0.2488023340702057, "learning_rate": 4.0214135362694305e-05, "loss": 1.5677, "step": 29540 }, { "epoch": 0.5982836787564767, "grad_norm": 0.24207501113414764, "learning_rate": 4.0173656088082906e-05, "loss": 1.5711, "step": 29560 }, { "epoch": 0.5986884715025906, "grad_norm": 0.2552051842212677, "learning_rate": 4.013317681347151e-05, "loss": 1.5769, "step": 29580 }, { "epoch": 0.5990932642487047, "grad_norm": 0.2527637183666229, "learning_rate": 4.009269753886011e-05, "loss": 1.5715, "step": 29600 }, { "epoch": 0.5994980569948186, "grad_norm": 0.2519965171813965, "learning_rate": 4.005221826424871e-05, "loss": 1.572, "step": 29620 }, { "epoch": 0.5999028497409327, "grad_norm": 0.24613921344280243, "learning_rate": 4.001173898963731e-05, "loss": 1.5774, "step": 29640 }, { "epoch": 0.6003076424870466, "grad_norm": 0.24614927172660828, "learning_rate": 3.997125971502591e-05, "loss": 1.5758, "step": 29660 }, { "epoch": 0.6007124352331606, "grad_norm": 0.23924462497234344, "learning_rate": 3.993078044041451e-05, "loss": 1.5737, "step": 29680 }, { "epoch": 0.6011172279792746, "grad_norm": 0.25098779797554016, "learning_rate": 3.989030116580311e-05, "loss": 1.5753, "step": 29700 }, { "epoch": 0.6015220207253886, "grad_norm": 0.24626727402210236, "learning_rate": 3.984982189119171e-05, "loss": 1.5797, "step": 29720 }, { "epoch": 0.6019268134715026, "grad_norm": 0.26206517219543457, "learning_rate": 3.9809342616580314e-05, "loss": 1.578, "step": 29740 }, { "epoch": 0.6023316062176166, "grad_norm": 0.23892979323863983, "learning_rate": 3.9768863341968915e-05, "loss": 1.5752, "step": 29760 }, { "epoch": 0.6027363989637305, "grad_norm": 0.2501845955848694, "learning_rate": 3.972838406735752e-05, "loss": 1.5731, "step": 29780 }, { "epoch": 0.6031411917098446, "grad_norm": 0.26110973954200745, "learning_rate": 3.968790479274612e-05, "loss": 1.5756, "step": 29800 }, { "epoch": 0.6035459844559585, "grad_norm": 0.2524327337741852, "learning_rate": 3.964742551813472e-05, "loss": 1.5764, "step": 29820 }, { "epoch": 0.6039507772020726, "grad_norm": 0.24868561327457428, "learning_rate": 3.960694624352332e-05, "loss": 1.5753, "step": 29840 }, { "epoch": 0.6043555699481865, "grad_norm": 0.24784155189990997, "learning_rate": 3.956646696891192e-05, "loss": 1.5727, "step": 29860 }, { "epoch": 0.6047603626943006, "grad_norm": 0.33210286498069763, "learning_rate": 3.952598769430052e-05, "loss": 1.5725, "step": 29880 }, { "epoch": 0.6051651554404145, "grad_norm": 0.25179848074913025, "learning_rate": 3.948550841968912e-05, "loss": 1.5712, "step": 29900 }, { "epoch": 0.6055699481865285, "grad_norm": 0.26595398783683777, "learning_rate": 3.944502914507772e-05, "loss": 1.5691, "step": 29920 }, { "epoch": 0.6059747409326425, "grad_norm": 0.24291539192199707, "learning_rate": 3.940454987046632e-05, "loss": 1.568, "step": 29940 }, { "epoch": 0.6063795336787565, "grad_norm": 0.24679183959960938, "learning_rate": 3.9364070595854925e-05, "loss": 1.584, "step": 29960 }, { "epoch": 0.6067843264248705, "grad_norm": 0.2545383870601654, "learning_rate": 3.9323591321243526e-05, "loss": 1.5799, "step": 29980 }, { "epoch": 0.6071891191709845, "grad_norm": 0.25054121017456055, "learning_rate": 3.928311204663213e-05, "loss": 1.5686, "step": 30000 }, { "epoch": 0.6075939119170984, "grad_norm": 0.25255411863327026, "learning_rate": 3.924263277202073e-05, "loss": 1.5726, "step": 30020 }, { "epoch": 0.6079987046632125, "grad_norm": 0.2520902752876282, "learning_rate": 3.9202153497409324e-05, "loss": 1.5747, "step": 30040 }, { "epoch": 0.6084034974093264, "grad_norm": 0.2409278154373169, "learning_rate": 3.9161674222797926e-05, "loss": 1.575, "step": 30060 }, { "epoch": 0.6088082901554405, "grad_norm": 0.25416630506515503, "learning_rate": 3.912119494818653e-05, "loss": 1.574, "step": 30080 }, { "epoch": 0.6092130829015544, "grad_norm": 0.2438848316669464, "learning_rate": 3.908071567357513e-05, "loss": 1.5784, "step": 30100 }, { "epoch": 0.6096178756476683, "grad_norm": 0.2498348206281662, "learning_rate": 3.904023639896373e-05, "loss": 1.574, "step": 30120 }, { "epoch": 0.6100226683937824, "grad_norm": 0.26180803775787354, "learning_rate": 3.899975712435233e-05, "loss": 1.5688, "step": 30140 }, { "epoch": 0.6104274611398963, "grad_norm": 0.24193161725997925, "learning_rate": 3.8959277849740934e-05, "loss": 1.5654, "step": 30160 }, { "epoch": 0.6108322538860104, "grad_norm": 0.2516796886920929, "learning_rate": 3.8918798575129535e-05, "loss": 1.5813, "step": 30180 }, { "epoch": 0.6112370466321243, "grad_norm": 0.24526935815811157, "learning_rate": 3.887831930051814e-05, "loss": 1.5719, "step": 30200 }, { "epoch": 0.6116418393782384, "grad_norm": 0.24142298102378845, "learning_rate": 3.883784002590674e-05, "loss": 1.5723, "step": 30220 }, { "epoch": 0.6120466321243523, "grad_norm": 0.25189441442489624, "learning_rate": 3.8797360751295333e-05, "loss": 1.5711, "step": 30240 }, { "epoch": 0.6124514248704663, "grad_norm": 0.24498246610164642, "learning_rate": 3.8756881476683935e-05, "loss": 1.5703, "step": 30260 }, { "epoch": 0.6128562176165803, "grad_norm": 0.25633877515792847, "learning_rate": 3.871640220207254e-05, "loss": 1.5754, "step": 30280 }, { "epoch": 0.6132610103626943, "grad_norm": 0.2357192039489746, "learning_rate": 3.867592292746114e-05, "loss": 1.5786, "step": 30300 }, { "epoch": 0.6136658031088082, "grad_norm": 0.25252485275268555, "learning_rate": 3.863544365284975e-05, "loss": 1.5717, "step": 30320 }, { "epoch": 0.6140705958549223, "grad_norm": 0.24811439216136932, "learning_rate": 3.859496437823835e-05, "loss": 1.5844, "step": 30340 }, { "epoch": 0.6144753886010362, "grad_norm": 0.25469183921813965, "learning_rate": 3.855448510362695e-05, "loss": 1.5814, "step": 30360 }, { "epoch": 0.6148801813471503, "grad_norm": 0.24572643637657166, "learning_rate": 3.851400582901555e-05, "loss": 1.5759, "step": 30380 }, { "epoch": 0.6152849740932642, "grad_norm": 0.24223372340202332, "learning_rate": 3.8473526554404146e-05, "loss": 1.5784, "step": 30400 }, { "epoch": 0.6156897668393783, "grad_norm": 0.24972647428512573, "learning_rate": 3.843304727979275e-05, "loss": 1.5727, "step": 30420 }, { "epoch": 0.6160945595854922, "grad_norm": 0.2543584108352661, "learning_rate": 3.839256800518135e-05, "loss": 1.5723, "step": 30440 }, { "epoch": 0.6164993523316062, "grad_norm": 0.2575310170650482, "learning_rate": 3.835208873056995e-05, "loss": 1.5784, "step": 30460 }, { "epoch": 0.6169041450777202, "grad_norm": 0.27129507064819336, "learning_rate": 3.831160945595855e-05, "loss": 1.5684, "step": 30480 }, { "epoch": 0.6173089378238342, "grad_norm": 0.2375326305627823, "learning_rate": 3.8271130181347154e-05, "loss": 1.5773, "step": 30500 }, { "epoch": 0.6177137305699482, "grad_norm": 0.24362146854400635, "learning_rate": 3.8230650906735756e-05, "loss": 1.5763, "step": 30520 }, { "epoch": 0.6181185233160622, "grad_norm": 0.2551186978816986, "learning_rate": 3.819017163212436e-05, "loss": 1.5803, "step": 30540 }, { "epoch": 0.6185233160621761, "grad_norm": 0.2513081431388855, "learning_rate": 3.814969235751296e-05, "loss": 1.5812, "step": 30560 }, { "epoch": 0.6189281088082902, "grad_norm": 0.24087649583816528, "learning_rate": 3.8109213082901554e-05, "loss": 1.57, "step": 30580 }, { "epoch": 0.6193329015544041, "grad_norm": 0.24705708026885986, "learning_rate": 3.8068733808290155e-05, "loss": 1.5748, "step": 30600 }, { "epoch": 0.6197376943005182, "grad_norm": 0.24201475083827972, "learning_rate": 3.802825453367876e-05, "loss": 1.5667, "step": 30620 }, { "epoch": 0.6201424870466321, "grad_norm": 0.25227147340774536, "learning_rate": 3.798777525906736e-05, "loss": 1.5795, "step": 30640 }, { "epoch": 0.6205472797927462, "grad_norm": 0.2508854269981384, "learning_rate": 3.794729598445596e-05, "loss": 1.5736, "step": 30660 }, { "epoch": 0.6209520725388601, "grad_norm": 0.258821040391922, "learning_rate": 3.790681670984456e-05, "loss": 1.5722, "step": 30680 }, { "epoch": 0.6213568652849741, "grad_norm": 0.2558509111404419, "learning_rate": 3.7866337435233163e-05, "loss": 1.5774, "step": 30700 }, { "epoch": 0.6217616580310881, "grad_norm": 0.24271105229854584, "learning_rate": 3.7825858160621765e-05, "loss": 1.5674, "step": 30720 }, { "epoch": 0.6221664507772021, "grad_norm": 0.2544645369052887, "learning_rate": 3.778537888601037e-05, "loss": 1.5786, "step": 30740 }, { "epoch": 0.622571243523316, "grad_norm": 0.23207813501358032, "learning_rate": 3.774489961139897e-05, "loss": 1.5821, "step": 30760 }, { "epoch": 0.6229760362694301, "grad_norm": 0.2484460175037384, "learning_rate": 3.770442033678756e-05, "loss": 1.5739, "step": 30780 }, { "epoch": 0.623380829015544, "grad_norm": 0.2465798258781433, "learning_rate": 3.7663941062176165e-05, "loss": 1.5808, "step": 30800 }, { "epoch": 0.6237856217616581, "grad_norm": 0.25107142329216003, "learning_rate": 3.7623461787564766e-05, "loss": 1.5739, "step": 30820 }, { "epoch": 0.624190414507772, "grad_norm": 0.236026793718338, "learning_rate": 3.758298251295337e-05, "loss": 1.5729, "step": 30840 }, { "epoch": 0.6245952072538861, "grad_norm": 0.2500072121620178, "learning_rate": 3.754250323834197e-05, "loss": 1.5771, "step": 30860 }, { "epoch": 0.625, "grad_norm": 0.25098055601119995, "learning_rate": 3.750202396373057e-05, "loss": 1.5715, "step": 30880 }, { "epoch": 0.6254047927461139, "grad_norm": 0.24521265923976898, "learning_rate": 3.746154468911917e-05, "loss": 1.5743, "step": 30900 }, { "epoch": 0.625809585492228, "grad_norm": 0.24678662419319153, "learning_rate": 3.7421065414507774e-05, "loss": 1.5746, "step": 30920 }, { "epoch": 0.6262143782383419, "grad_norm": 0.24493363499641418, "learning_rate": 3.7380586139896376e-05, "loss": 1.574, "step": 30940 }, { "epoch": 0.626619170984456, "grad_norm": 0.24736078083515167, "learning_rate": 3.734010686528497e-05, "loss": 1.5743, "step": 30960 }, { "epoch": 0.6270239637305699, "grad_norm": 0.25626078248023987, "learning_rate": 3.729962759067357e-05, "loss": 1.5825, "step": 30980 }, { "epoch": 0.627428756476684, "grad_norm": 0.2525791525840759, "learning_rate": 3.7259148316062174e-05, "loss": 1.5741, "step": 31000 }, { "epoch": 0.6278335492227979, "grad_norm": 0.2469402253627777, "learning_rate": 3.7218669041450775e-05, "loss": 1.5711, "step": 31020 }, { "epoch": 0.6282383419689119, "grad_norm": 0.24808770418167114, "learning_rate": 3.717818976683938e-05, "loss": 1.5744, "step": 31040 }, { "epoch": 0.6286431347150259, "grad_norm": 0.24421654641628265, "learning_rate": 3.713771049222798e-05, "loss": 1.5786, "step": 31060 }, { "epoch": 0.6290479274611399, "grad_norm": 0.25157591700553894, "learning_rate": 3.709723121761659e-05, "loss": 1.5725, "step": 31080 }, { "epoch": 0.6294527202072538, "grad_norm": 0.2588376998901367, "learning_rate": 3.705675194300519e-05, "loss": 1.5754, "step": 31100 }, { "epoch": 0.6298575129533679, "grad_norm": 0.23830947279930115, "learning_rate": 3.7016272668393783e-05, "loss": 1.5721, "step": 31120 }, { "epoch": 0.6302623056994818, "grad_norm": 0.24680526554584503, "learning_rate": 3.6975793393782385e-05, "loss": 1.5729, "step": 31140 }, { "epoch": 0.6306670984455959, "grad_norm": 0.243768110871315, "learning_rate": 3.693531411917099e-05, "loss": 1.5687, "step": 31160 }, { "epoch": 0.6310718911917098, "grad_norm": 0.24442842602729797, "learning_rate": 3.689483484455959e-05, "loss": 1.5721, "step": 31180 }, { "epoch": 0.6314766839378239, "grad_norm": 0.2463763803243637, "learning_rate": 3.685435556994819e-05, "loss": 1.5693, "step": 31200 }, { "epoch": 0.6318814766839378, "grad_norm": 0.24402445554733276, "learning_rate": 3.681387629533679e-05, "loss": 1.5739, "step": 31220 }, { "epoch": 0.6322862694300518, "grad_norm": 0.2562233805656433, "learning_rate": 3.677339702072539e-05, "loss": 1.5709, "step": 31240 }, { "epoch": 0.6326910621761658, "grad_norm": 0.24645063281059265, "learning_rate": 3.6732917746113995e-05, "loss": 1.5673, "step": 31260 }, { "epoch": 0.6330958549222798, "grad_norm": 0.2476893663406372, "learning_rate": 3.6692438471502596e-05, "loss": 1.5817, "step": 31280 }, { "epoch": 0.6335006476683938, "grad_norm": 0.2492830455303192, "learning_rate": 3.66519591968912e-05, "loss": 1.5636, "step": 31300 }, { "epoch": 0.6339054404145078, "grad_norm": 0.25315627455711365, "learning_rate": 3.661147992227979e-05, "loss": 1.5693, "step": 31320 }, { "epoch": 0.6343102331606217, "grad_norm": 0.24810099601745605, "learning_rate": 3.6571000647668394e-05, "loss": 1.5766, "step": 31340 }, { "epoch": 0.6347150259067358, "grad_norm": 0.25197628140449524, "learning_rate": 3.6530521373056996e-05, "loss": 1.5786, "step": 31360 }, { "epoch": 0.6351198186528497, "grad_norm": 0.25704458355903625, "learning_rate": 3.64900420984456e-05, "loss": 1.5704, "step": 31380 }, { "epoch": 0.6355246113989638, "grad_norm": 0.2423878163099289, "learning_rate": 3.64495628238342e-05, "loss": 1.5744, "step": 31400 }, { "epoch": 0.6359294041450777, "grad_norm": 0.2431255728006363, "learning_rate": 3.64090835492228e-05, "loss": 1.5773, "step": 31420 }, { "epoch": 0.6363341968911918, "grad_norm": 0.25815653800964355, "learning_rate": 3.63686042746114e-05, "loss": 1.5727, "step": 31440 }, { "epoch": 0.6367389896373057, "grad_norm": 0.2300557643175125, "learning_rate": 3.6328125000000004e-05, "loss": 1.5779, "step": 31460 }, { "epoch": 0.6371437823834197, "grad_norm": 0.25981083512306213, "learning_rate": 3.6287645725388605e-05, "loss": 1.5749, "step": 31480 }, { "epoch": 0.6375485751295337, "grad_norm": 0.2508260905742645, "learning_rate": 3.62471664507772e-05, "loss": 1.5694, "step": 31500 }, { "epoch": 0.6379533678756477, "grad_norm": 0.25218597054481506, "learning_rate": 3.62066871761658e-05, "loss": 1.5749, "step": 31520 }, { "epoch": 0.6383581606217616, "grad_norm": 0.26301437616348267, "learning_rate": 3.6166207901554403e-05, "loss": 1.5777, "step": 31540 }, { "epoch": 0.6387629533678757, "grad_norm": 0.24628494679927826, "learning_rate": 3.6125728626943005e-05, "loss": 1.5761, "step": 31560 }, { "epoch": 0.6391677461139896, "grad_norm": 0.25548791885375977, "learning_rate": 3.608524935233161e-05, "loss": 1.5808, "step": 31580 }, { "epoch": 0.6395725388601037, "grad_norm": 0.24939735233783722, "learning_rate": 3.604477007772021e-05, "loss": 1.5673, "step": 31600 }, { "epoch": 0.6399773316062176, "grad_norm": 0.24030573666095734, "learning_rate": 3.600429080310881e-05, "loss": 1.574, "step": 31620 }, { "epoch": 0.6403821243523317, "grad_norm": 0.24964666366577148, "learning_rate": 3.596381152849741e-05, "loss": 1.5733, "step": 31640 }, { "epoch": 0.6407869170984456, "grad_norm": 0.24960339069366455, "learning_rate": 3.592333225388601e-05, "loss": 1.5767, "step": 31660 }, { "epoch": 0.6411917098445595, "grad_norm": 0.2430756539106369, "learning_rate": 3.5882852979274615e-05, "loss": 1.5687, "step": 31680 }, { "epoch": 0.6415965025906736, "grad_norm": 0.2440321445465088, "learning_rate": 3.584237370466321e-05, "loss": 1.5754, "step": 31700 }, { "epoch": 0.6420012953367875, "grad_norm": 0.2512691617012024, "learning_rate": 3.580189443005181e-05, "loss": 1.5712, "step": 31720 }, { "epoch": 0.6424060880829016, "grad_norm": 0.24352805316448212, "learning_rate": 3.576141515544041e-05, "loss": 1.5709, "step": 31740 }, { "epoch": 0.6428108808290155, "grad_norm": 0.261437326669693, "learning_rate": 3.5720935880829014e-05, "loss": 1.5714, "step": 31760 }, { "epoch": 0.6432156735751295, "grad_norm": 0.25658345222473145, "learning_rate": 3.5680456606217616e-05, "loss": 1.5671, "step": 31780 }, { "epoch": 0.6436204663212435, "grad_norm": 0.24744468927383423, "learning_rate": 3.563997733160622e-05, "loss": 1.5656, "step": 31800 }, { "epoch": 0.6440252590673575, "grad_norm": 0.24158026278018951, "learning_rate": 3.559949805699482e-05, "loss": 1.5666, "step": 31820 }, { "epoch": 0.6444300518134715, "grad_norm": 0.24855244159698486, "learning_rate": 3.555901878238342e-05, "loss": 1.579, "step": 31840 }, { "epoch": 0.6448348445595855, "grad_norm": 0.2470637410879135, "learning_rate": 3.551853950777202e-05, "loss": 1.5718, "step": 31860 }, { "epoch": 0.6452396373056994, "grad_norm": 0.25781503319740295, "learning_rate": 3.5478060233160624e-05, "loss": 1.5741, "step": 31880 }, { "epoch": 0.6456444300518135, "grad_norm": 0.2523963749408722, "learning_rate": 3.5437580958549225e-05, "loss": 1.5705, "step": 31900 }, { "epoch": 0.6460492227979274, "grad_norm": 0.25985103845596313, "learning_rate": 3.539710168393783e-05, "loss": 1.5707, "step": 31920 }, { "epoch": 0.6464540155440415, "grad_norm": 0.2508140206336975, "learning_rate": 3.535662240932643e-05, "loss": 1.5805, "step": 31940 }, { "epoch": 0.6468588082901554, "grad_norm": 0.2522670328617096, "learning_rate": 3.531614313471503e-05, "loss": 1.576, "step": 31960 }, { "epoch": 0.6472636010362695, "grad_norm": 0.23971566557884216, "learning_rate": 3.527566386010363e-05, "loss": 1.5684, "step": 31980 }, { "epoch": 0.6476683937823834, "grad_norm": 0.25265347957611084, "learning_rate": 3.523518458549223e-05, "loss": 1.574, "step": 32000 }, { "epoch": 0.6480731865284974, "grad_norm": 0.23885497450828552, "learning_rate": 3.5194705310880835e-05, "loss": 1.576, "step": 32020 }, { "epoch": 0.6484779792746114, "grad_norm": 0.24552632868289948, "learning_rate": 3.515422603626943e-05, "loss": 1.5782, "step": 32040 }, { "epoch": 0.6488827720207254, "grad_norm": 0.24113216996192932, "learning_rate": 3.511374676165803e-05, "loss": 1.5749, "step": 32060 }, { "epoch": 0.6492875647668394, "grad_norm": 0.2522551715373993, "learning_rate": 3.507326748704663e-05, "loss": 1.5649, "step": 32080 }, { "epoch": 0.6496923575129534, "grad_norm": 0.24163100123405457, "learning_rate": 3.5032788212435235e-05, "loss": 1.5676, "step": 32100 }, { "epoch": 0.6500971502590673, "grad_norm": 0.25726160407066345, "learning_rate": 3.4992308937823836e-05, "loss": 1.5726, "step": 32120 }, { "epoch": 0.6505019430051814, "grad_norm": 0.25020506978034973, "learning_rate": 3.495182966321244e-05, "loss": 1.5732, "step": 32140 }, { "epoch": 0.6509067357512953, "grad_norm": 0.258883535861969, "learning_rate": 3.491135038860104e-05, "loss": 1.574, "step": 32160 }, { "epoch": 0.6513115284974094, "grad_norm": 0.2678943872451782, "learning_rate": 3.487087111398964e-05, "loss": 1.5655, "step": 32180 }, { "epoch": 0.6517163212435233, "grad_norm": 0.24309828877449036, "learning_rate": 3.483039183937824e-05, "loss": 1.5801, "step": 32200 }, { "epoch": 0.6521211139896373, "grad_norm": 0.2526661455631256, "learning_rate": 3.4789912564766844e-05, "loss": 1.5732, "step": 32220 }, { "epoch": 0.6525259067357513, "grad_norm": 0.24899008870124817, "learning_rate": 3.474943329015544e-05, "loss": 1.5757, "step": 32240 }, { "epoch": 0.6529306994818653, "grad_norm": 0.25422099232673645, "learning_rate": 3.470895401554404e-05, "loss": 1.5752, "step": 32260 }, { "epoch": 0.6533354922279793, "grad_norm": 0.2501138150691986, "learning_rate": 3.466847474093264e-05, "loss": 1.5756, "step": 32280 }, { "epoch": 0.6537402849740933, "grad_norm": 0.25308769941329956, "learning_rate": 3.4627995466321244e-05, "loss": 1.5785, "step": 32300 }, { "epoch": 0.6541450777202072, "grad_norm": 0.24894794821739197, "learning_rate": 3.4587516191709845e-05, "loss": 1.5802, "step": 32320 }, { "epoch": 0.6545498704663213, "grad_norm": 0.2487451583147049, "learning_rate": 3.454703691709845e-05, "loss": 1.5788, "step": 32340 }, { "epoch": 0.6549546632124352, "grad_norm": 0.25054946541786194, "learning_rate": 3.450655764248705e-05, "loss": 1.5678, "step": 32360 }, { "epoch": 0.6553594559585493, "grad_norm": 0.2507936358451843, "learning_rate": 3.446607836787565e-05, "loss": 1.5732, "step": 32380 }, { "epoch": 0.6557642487046632, "grad_norm": 0.2648046314716339, "learning_rate": 3.442559909326425e-05, "loss": 1.5769, "step": 32400 }, { "epoch": 0.6561690414507773, "grad_norm": 0.23981231451034546, "learning_rate": 3.438511981865285e-05, "loss": 1.5691, "step": 32420 }, { "epoch": 0.6565738341968912, "grad_norm": 0.24385464191436768, "learning_rate": 3.434464054404145e-05, "loss": 1.5821, "step": 32440 }, { "epoch": 0.6569786269430051, "grad_norm": 0.25669196248054504, "learning_rate": 3.430416126943005e-05, "loss": 1.5706, "step": 32460 }, { "epoch": 0.6573834196891192, "grad_norm": 0.25242260098457336, "learning_rate": 3.426368199481865e-05, "loss": 1.5722, "step": 32480 }, { "epoch": 0.6577882124352331, "grad_norm": 0.24916328489780426, "learning_rate": 3.422320272020725e-05, "loss": 1.5702, "step": 32500 }, { "epoch": 0.6581930051813472, "grad_norm": 0.2761092185974121, "learning_rate": 3.4182723445595855e-05, "loss": 1.5818, "step": 32520 }, { "epoch": 0.6585977979274611, "grad_norm": 0.24540704488754272, "learning_rate": 3.4142244170984456e-05, "loss": 1.573, "step": 32540 }, { "epoch": 0.6590025906735751, "grad_norm": 0.25081005692481995, "learning_rate": 3.410176489637306e-05, "loss": 1.5673, "step": 32560 }, { "epoch": 0.6594073834196891, "grad_norm": 0.2505885362625122, "learning_rate": 3.406128562176166e-05, "loss": 1.57, "step": 32580 }, { "epoch": 0.6598121761658031, "grad_norm": 0.2681802809238434, "learning_rate": 3.402080634715026e-05, "loss": 1.5766, "step": 32600 }, { "epoch": 0.6602169689119171, "grad_norm": 0.2554236054420471, "learning_rate": 3.398032707253886e-05, "loss": 1.576, "step": 32620 }, { "epoch": 0.6606217616580311, "grad_norm": 0.2545925974845886, "learning_rate": 3.3939847797927464e-05, "loss": 1.5737, "step": 32640 }, { "epoch": 0.661026554404145, "grad_norm": 0.2555898129940033, "learning_rate": 3.3899368523316066e-05, "loss": 1.5796, "step": 32660 }, { "epoch": 0.6614313471502591, "grad_norm": 0.2495018094778061, "learning_rate": 3.385888924870467e-05, "loss": 1.5676, "step": 32680 }, { "epoch": 0.661836139896373, "grad_norm": 0.2576781213283539, "learning_rate": 3.381840997409327e-05, "loss": 1.5724, "step": 32700 }, { "epoch": 0.6622409326424871, "grad_norm": 0.25561007857322693, "learning_rate": 3.377793069948187e-05, "loss": 1.5752, "step": 32720 }, { "epoch": 0.662645725388601, "grad_norm": 0.24897056818008423, "learning_rate": 3.373745142487047e-05, "loss": 1.5736, "step": 32740 }, { "epoch": 0.663050518134715, "grad_norm": 0.24622581899166107, "learning_rate": 3.3696972150259074e-05, "loss": 1.5701, "step": 32760 }, { "epoch": 0.663455310880829, "grad_norm": 0.26912856101989746, "learning_rate": 3.365649287564767e-05, "loss": 1.5749, "step": 32780 }, { "epoch": 0.663860103626943, "grad_norm": 0.24890604615211487, "learning_rate": 3.361601360103627e-05, "loss": 1.5731, "step": 32800 }, { "epoch": 0.664264896373057, "grad_norm": 0.2535284161567688, "learning_rate": 3.357553432642487e-05, "loss": 1.5809, "step": 32820 }, { "epoch": 0.664669689119171, "grad_norm": 0.24333080649375916, "learning_rate": 3.353505505181347e-05, "loss": 1.576, "step": 32840 }, { "epoch": 0.665074481865285, "grad_norm": 0.25092634558677673, "learning_rate": 3.3494575777202075e-05, "loss": 1.5819, "step": 32860 }, { "epoch": 0.665479274611399, "grad_norm": 0.252442866563797, "learning_rate": 3.3454096502590677e-05, "loss": 1.5741, "step": 32880 }, { "epoch": 0.6658840673575129, "grad_norm": 0.2466791868209839, "learning_rate": 3.341361722797928e-05, "loss": 1.5719, "step": 32900 }, { "epoch": 0.666288860103627, "grad_norm": 0.2488088309764862, "learning_rate": 3.337313795336788e-05, "loss": 1.5742, "step": 32920 }, { "epoch": 0.6666936528497409, "grad_norm": 0.2575971484184265, "learning_rate": 3.333265867875648e-05, "loss": 1.5679, "step": 32940 }, { "epoch": 0.667098445595855, "grad_norm": 0.24822643399238586, "learning_rate": 3.329217940414508e-05, "loss": 1.5737, "step": 32960 }, { "epoch": 0.6675032383419689, "grad_norm": 0.250617653131485, "learning_rate": 3.325170012953368e-05, "loss": 1.5736, "step": 32980 }, { "epoch": 0.6679080310880829, "grad_norm": 0.2568735182285309, "learning_rate": 3.321122085492228e-05, "loss": 1.5655, "step": 33000 }, { "epoch": 0.6683128238341969, "grad_norm": 0.258795827627182, "learning_rate": 3.317074158031088e-05, "loss": 1.5767, "step": 33020 }, { "epoch": 0.6687176165803109, "grad_norm": 0.2518269717693329, "learning_rate": 3.313026230569948e-05, "loss": 1.5797, "step": 33040 }, { "epoch": 0.6691224093264249, "grad_norm": 0.24946272373199463, "learning_rate": 3.3089783031088084e-05, "loss": 1.5712, "step": 33060 }, { "epoch": 0.6695272020725389, "grad_norm": 0.2463691234588623, "learning_rate": 3.3049303756476686e-05, "loss": 1.5705, "step": 33080 }, { "epoch": 0.6699319948186528, "grad_norm": 0.2478511780500412, "learning_rate": 3.300882448186529e-05, "loss": 1.577, "step": 33100 }, { "epoch": 0.6703367875647669, "grad_norm": 0.2671777009963989, "learning_rate": 3.296834520725389e-05, "loss": 1.5698, "step": 33120 }, { "epoch": 0.6707415803108808, "grad_norm": 0.24602633714675903, "learning_rate": 3.292786593264249e-05, "loss": 1.5711, "step": 33140 }, { "epoch": 0.6711463730569949, "grad_norm": 0.2536900043487549, "learning_rate": 3.2887386658031085e-05, "loss": 1.5681, "step": 33160 }, { "epoch": 0.6715511658031088, "grad_norm": 0.2536455988883972, "learning_rate": 3.284690738341969e-05, "loss": 1.5672, "step": 33180 }, { "epoch": 0.6719559585492227, "grad_norm": 0.24934329092502594, "learning_rate": 3.280642810880829e-05, "loss": 1.575, "step": 33200 }, { "epoch": 0.6723607512953368, "grad_norm": 0.25631240010261536, "learning_rate": 3.276594883419689e-05, "loss": 1.5673, "step": 33220 }, { "epoch": 0.6727655440414507, "grad_norm": 0.2463889867067337, "learning_rate": 3.272546955958549e-05, "loss": 1.5739, "step": 33240 }, { "epoch": 0.6731703367875648, "grad_norm": 0.2445671707391739, "learning_rate": 3.268499028497409e-05, "loss": 1.5749, "step": 33260 }, { "epoch": 0.6735751295336787, "grad_norm": 0.24745845794677734, "learning_rate": 3.2644511010362695e-05, "loss": 1.5736, "step": 33280 }, { "epoch": 0.6739799222797928, "grad_norm": 0.26067715883255005, "learning_rate": 3.2604031735751297e-05, "loss": 1.5747, "step": 33300 }, { "epoch": 0.6743847150259067, "grad_norm": 0.25191694498062134, "learning_rate": 3.25635524611399e-05, "loss": 1.5733, "step": 33320 }, { "epoch": 0.6747895077720207, "grad_norm": 0.25005969405174255, "learning_rate": 3.25230731865285e-05, "loss": 1.5716, "step": 33340 }, { "epoch": 0.6751943005181347, "grad_norm": 0.25360310077667236, "learning_rate": 3.2482593911917095e-05, "loss": 1.581, "step": 33360 }, { "epoch": 0.6755990932642487, "grad_norm": 0.24220459163188934, "learning_rate": 3.2442114637305696e-05, "loss": 1.5711, "step": 33380 }, { "epoch": 0.6760038860103627, "grad_norm": 0.24440714716911316, "learning_rate": 3.2401635362694305e-05, "loss": 1.5695, "step": 33400 }, { "epoch": 0.6764086787564767, "grad_norm": 0.26343193650245667, "learning_rate": 3.2361156088082906e-05, "loss": 1.5711, "step": 33420 }, { "epoch": 0.6768134715025906, "grad_norm": 0.25653204321861267, "learning_rate": 3.232067681347151e-05, "loss": 1.5723, "step": 33440 }, { "epoch": 0.6772182642487047, "grad_norm": 0.24873144924640656, "learning_rate": 3.228019753886011e-05, "loss": 1.5745, "step": 33460 }, { "epoch": 0.6776230569948186, "grad_norm": 0.25428998470306396, "learning_rate": 3.223971826424871e-05, "loss": 1.5734, "step": 33480 }, { "epoch": 0.6780278497409327, "grad_norm": 0.24440427124500275, "learning_rate": 3.219923898963731e-05, "loss": 1.5704, "step": 33500 }, { "epoch": 0.6784326424870466, "grad_norm": 0.26325732469558716, "learning_rate": 3.215875971502591e-05, "loss": 1.5765, "step": 33520 }, { "epoch": 0.6788374352331606, "grad_norm": 0.24527600407600403, "learning_rate": 3.211828044041451e-05, "loss": 1.5752, "step": 33540 }, { "epoch": 0.6792422279792746, "grad_norm": 0.246682271361351, "learning_rate": 3.207780116580311e-05, "loss": 1.5723, "step": 33560 }, { "epoch": 0.6796470207253886, "grad_norm": 0.2406044900417328, "learning_rate": 3.203732189119171e-05, "loss": 1.5673, "step": 33580 }, { "epoch": 0.6800518134715026, "grad_norm": 0.2519199550151825, "learning_rate": 3.1996842616580314e-05, "loss": 1.5719, "step": 33600 }, { "epoch": 0.6804566062176166, "grad_norm": 0.24392282962799072, "learning_rate": 3.1956363341968915e-05, "loss": 1.5632, "step": 33620 }, { "epoch": 0.6808613989637305, "grad_norm": 0.24221287667751312, "learning_rate": 3.191588406735752e-05, "loss": 1.586, "step": 33640 }, { "epoch": 0.6812661917098446, "grad_norm": 0.2563517987728119, "learning_rate": 3.187540479274612e-05, "loss": 1.5696, "step": 33660 }, { "epoch": 0.6816709844559585, "grad_norm": 0.24866734445095062, "learning_rate": 3.183492551813472e-05, "loss": 1.5759, "step": 33680 }, { "epoch": 0.6820757772020726, "grad_norm": 0.25606295466423035, "learning_rate": 3.1794446243523315e-05, "loss": 1.5647, "step": 33700 }, { "epoch": 0.6824805699481865, "grad_norm": 0.2512359917163849, "learning_rate": 3.1753966968911917e-05, "loss": 1.5799, "step": 33720 }, { "epoch": 0.6828853626943006, "grad_norm": 0.2507592737674713, "learning_rate": 3.171348769430052e-05, "loss": 1.5795, "step": 33740 }, { "epoch": 0.6832901554404145, "grad_norm": 0.2530190050601959, "learning_rate": 3.167300841968912e-05, "loss": 1.5763, "step": 33760 }, { "epoch": 0.6836949481865285, "grad_norm": 0.3369811177253723, "learning_rate": 3.163252914507772e-05, "loss": 1.5802, "step": 33780 }, { "epoch": 0.6840997409326425, "grad_norm": 0.2512577176094055, "learning_rate": 3.159204987046632e-05, "loss": 1.5742, "step": 33800 }, { "epoch": 0.6845045336787565, "grad_norm": 0.2536161541938782, "learning_rate": 3.1551570595854925e-05, "loss": 1.5789, "step": 33820 }, { "epoch": 0.6849093264248705, "grad_norm": 0.25155022740364075, "learning_rate": 3.1511091321243526e-05, "loss": 1.5738, "step": 33840 }, { "epoch": 0.6853141191709845, "grad_norm": 0.3485986888408661, "learning_rate": 3.147061204663213e-05, "loss": 1.574, "step": 33860 }, { "epoch": 0.6857189119170984, "grad_norm": 0.24235126376152039, "learning_rate": 3.143013277202073e-05, "loss": 1.5658, "step": 33880 }, { "epoch": 0.6861237046632125, "grad_norm": 0.24994811415672302, "learning_rate": 3.1389653497409324e-05, "loss": 1.5727, "step": 33900 }, { "epoch": 0.6865284974093264, "grad_norm": 0.2460213601589203, "learning_rate": 3.1349174222797926e-05, "loss": 1.5764, "step": 33920 }, { "epoch": 0.6869332901554405, "grad_norm": 0.2607326805591583, "learning_rate": 3.130869494818653e-05, "loss": 1.5737, "step": 33940 }, { "epoch": 0.6873380829015544, "grad_norm": 0.24419540166854858, "learning_rate": 3.126821567357513e-05, "loss": 1.5785, "step": 33960 }, { "epoch": 0.6877428756476683, "grad_norm": 0.25758716464042664, "learning_rate": 3.122773639896373e-05, "loss": 1.5758, "step": 33980 }, { "epoch": 0.6881476683937824, "grad_norm": 0.24961327016353607, "learning_rate": 3.118725712435233e-05, "loss": 1.5726, "step": 34000 }, { "epoch": 0.6885524611398963, "grad_norm": 0.24532894790172577, "learning_rate": 3.1146777849740934e-05, "loss": 1.5794, "step": 34020 }, { "epoch": 0.6889572538860104, "grad_norm": 0.2494623064994812, "learning_rate": 3.1106298575129535e-05, "loss": 1.5781, "step": 34040 }, { "epoch": 0.6893620466321243, "grad_norm": 0.2508695423603058, "learning_rate": 3.106581930051814e-05, "loss": 1.5698, "step": 34060 }, { "epoch": 0.6897668393782384, "grad_norm": 0.24827632308006287, "learning_rate": 3.102534002590673e-05, "loss": 1.5792, "step": 34080 }, { "epoch": 0.6901716321243523, "grad_norm": 0.2606000304222107, "learning_rate": 3.098486075129533e-05, "loss": 1.578, "step": 34100 }, { "epoch": 0.6905764248704663, "grad_norm": 0.2510295510292053, "learning_rate": 3.0944381476683935e-05, "loss": 1.5771, "step": 34120 }, { "epoch": 0.6909812176165803, "grad_norm": 0.25414571166038513, "learning_rate": 3.0903902202072537e-05, "loss": 1.5828, "step": 34140 }, { "epoch": 0.6913860103626943, "grad_norm": 0.2645397186279297, "learning_rate": 3.086342292746114e-05, "loss": 1.5732, "step": 34160 }, { "epoch": 0.6917908031088082, "grad_norm": 0.23759837448596954, "learning_rate": 3.0822943652849746e-05, "loss": 1.5784, "step": 34180 }, { "epoch": 0.6921955958549223, "grad_norm": 0.2480812519788742, "learning_rate": 3.078246437823835e-05, "loss": 1.5717, "step": 34200 }, { "epoch": 0.6926003886010362, "grad_norm": 0.26525771617889404, "learning_rate": 3.074198510362695e-05, "loss": 1.5721, "step": 34220 }, { "epoch": 0.6930051813471503, "grad_norm": 0.24939726293087006, "learning_rate": 3.0701505829015544e-05, "loss": 1.5683, "step": 34240 }, { "epoch": 0.6934099740932642, "grad_norm": 0.2567727565765381, "learning_rate": 3.0661026554404146e-05, "loss": 1.5731, "step": 34260 }, { "epoch": 0.6938147668393783, "grad_norm": 0.2502591013908386, "learning_rate": 3.062054727979275e-05, "loss": 1.5744, "step": 34280 }, { "epoch": 0.6942195595854922, "grad_norm": 0.254374623298645, "learning_rate": 3.058006800518135e-05, "loss": 1.5788, "step": 34300 }, { "epoch": 0.6946243523316062, "grad_norm": 0.2638742923736572, "learning_rate": 3.053958873056995e-05, "loss": 1.5776, "step": 34320 }, { "epoch": 0.6950291450777202, "grad_norm": 0.2566435933113098, "learning_rate": 3.0499109455958552e-05, "loss": 1.5731, "step": 34340 }, { "epoch": 0.6954339378238342, "grad_norm": 0.24715924263000488, "learning_rate": 3.0458630181347154e-05, "loss": 1.5735, "step": 34360 }, { "epoch": 0.6958387305699482, "grad_norm": 0.2608638107776642, "learning_rate": 3.0418150906735752e-05, "loss": 1.569, "step": 34380 }, { "epoch": 0.6962435233160622, "grad_norm": 0.2604832947254181, "learning_rate": 3.0377671632124354e-05, "loss": 1.5739, "step": 34400 }, { "epoch": 0.6966483160621761, "grad_norm": 0.2562369108200073, "learning_rate": 3.0337192357512955e-05, "loss": 1.5675, "step": 34420 }, { "epoch": 0.6970531088082902, "grad_norm": 0.2535128593444824, "learning_rate": 3.0296713082901557e-05, "loss": 1.5735, "step": 34440 }, { "epoch": 0.6974579015544041, "grad_norm": 0.24846428632736206, "learning_rate": 3.025623380829016e-05, "loss": 1.565, "step": 34460 }, { "epoch": 0.6978626943005182, "grad_norm": 0.24398653209209442, "learning_rate": 3.0215754533678757e-05, "loss": 1.5807, "step": 34480 }, { "epoch": 0.6982674870466321, "grad_norm": 0.25204524397850037, "learning_rate": 3.017527525906736e-05, "loss": 1.5744, "step": 34500 }, { "epoch": 0.6986722797927462, "grad_norm": 0.25202682614326477, "learning_rate": 3.013479598445596e-05, "loss": 1.5713, "step": 34520 }, { "epoch": 0.6990770725388601, "grad_norm": 0.24983835220336914, "learning_rate": 3.009431670984456e-05, "loss": 1.5679, "step": 34540 }, { "epoch": 0.6994818652849741, "grad_norm": 0.2446143478155136, "learning_rate": 3.0053837435233163e-05, "loss": 1.575, "step": 34560 }, { "epoch": 0.6998866580310881, "grad_norm": 0.2593005895614624, "learning_rate": 3.001335816062176e-05, "loss": 1.5757, "step": 34580 }, { "epoch": 0.7002914507772021, "grad_norm": 0.2555936574935913, "learning_rate": 2.9972878886010363e-05, "loss": 1.5841, "step": 34600 }, { "epoch": 0.700696243523316, "grad_norm": 0.264298677444458, "learning_rate": 2.9932399611398965e-05, "loss": 1.5768, "step": 34620 }, { "epoch": 0.7011010362694301, "grad_norm": 0.2513805031776428, "learning_rate": 2.9891920336787566e-05, "loss": 1.5761, "step": 34640 }, { "epoch": 0.701505829015544, "grad_norm": 0.2531839907169342, "learning_rate": 2.9851441062176168e-05, "loss": 1.5695, "step": 34660 }, { "epoch": 0.7019106217616581, "grad_norm": 0.24602557718753815, "learning_rate": 2.9810961787564766e-05, "loss": 1.573, "step": 34680 }, { "epoch": 0.702315414507772, "grad_norm": 0.2508755326271057, "learning_rate": 2.9770482512953368e-05, "loss": 1.5748, "step": 34700 }, { "epoch": 0.7027202072538861, "grad_norm": 0.2522776424884796, "learning_rate": 2.973000323834197e-05, "loss": 1.5716, "step": 34720 }, { "epoch": 0.703125, "grad_norm": 0.2567850351333618, "learning_rate": 2.968952396373057e-05, "loss": 1.5753, "step": 34740 }, { "epoch": 0.7035297927461139, "grad_norm": 0.2637972831726074, "learning_rate": 2.9649044689119172e-05, "loss": 1.576, "step": 34760 }, { "epoch": 0.703934585492228, "grad_norm": 0.240611732006073, "learning_rate": 2.960856541450777e-05, "loss": 1.5726, "step": 34780 }, { "epoch": 0.7043393782383419, "grad_norm": 0.2476857602596283, "learning_rate": 2.9568086139896372e-05, "loss": 1.5597, "step": 34800 }, { "epoch": 0.704744170984456, "grad_norm": 0.2566538155078888, "learning_rate": 2.9527606865284974e-05, "loss": 1.5773, "step": 34820 }, { "epoch": 0.7051489637305699, "grad_norm": 0.2610398530960083, "learning_rate": 2.9487127590673575e-05, "loss": 1.5691, "step": 34840 }, { "epoch": 0.705553756476684, "grad_norm": 0.2523614764213562, "learning_rate": 2.9446648316062174e-05, "loss": 1.572, "step": 34860 }, { "epoch": 0.7059585492227979, "grad_norm": 0.2628850042819977, "learning_rate": 2.9406169041450775e-05, "loss": 1.5751, "step": 34880 }, { "epoch": 0.7063633419689119, "grad_norm": 0.24658383429050446, "learning_rate": 2.9365689766839377e-05, "loss": 1.5738, "step": 34900 }, { "epoch": 0.7067681347150259, "grad_norm": 0.25988277792930603, "learning_rate": 2.932521049222798e-05, "loss": 1.573, "step": 34920 }, { "epoch": 0.7071729274611399, "grad_norm": 0.24184207618236542, "learning_rate": 2.9284731217616583e-05, "loss": 1.5793, "step": 34940 }, { "epoch": 0.7075777202072538, "grad_norm": 0.26685410737991333, "learning_rate": 2.9244251943005185e-05, "loss": 1.5704, "step": 34960 }, { "epoch": 0.7079825129533679, "grad_norm": 0.2515631914138794, "learning_rate": 2.9203772668393787e-05, "loss": 1.5731, "step": 34980 }, { "epoch": 0.7083873056994818, "grad_norm": 0.24365225434303284, "learning_rate": 2.9163293393782388e-05, "loss": 1.5736, "step": 35000 }, { "epoch": 0.7087920984455959, "grad_norm": 0.24960650503635406, "learning_rate": 2.9122814119170986e-05, "loss": 1.5753, "step": 35020 }, { "epoch": 0.7091968911917098, "grad_norm": 0.24998480081558228, "learning_rate": 2.9082334844559588e-05, "loss": 1.5754, "step": 35040 }, { "epoch": 0.7096016839378239, "grad_norm": 0.2656254470348358, "learning_rate": 2.904185556994819e-05, "loss": 1.5664, "step": 35060 }, { "epoch": 0.7100064766839378, "grad_norm": 0.24945731461048126, "learning_rate": 2.900137629533679e-05, "loss": 1.5728, "step": 35080 }, { "epoch": 0.7104112694300518, "grad_norm": 0.2514556050300598, "learning_rate": 2.8960897020725393e-05, "loss": 1.569, "step": 35100 }, { "epoch": 0.7108160621761658, "grad_norm": 0.25028255581855774, "learning_rate": 2.892041774611399e-05, "loss": 1.5705, "step": 35120 }, { "epoch": 0.7112208549222798, "grad_norm": 0.2502228021621704, "learning_rate": 2.8879938471502593e-05, "loss": 1.5641, "step": 35140 }, { "epoch": 0.7116256476683938, "grad_norm": 0.24139191210269928, "learning_rate": 2.8839459196891194e-05, "loss": 1.5724, "step": 35160 }, { "epoch": 0.7120304404145078, "grad_norm": 0.24705040454864502, "learning_rate": 2.8798979922279796e-05, "loss": 1.5753, "step": 35180 }, { "epoch": 0.7124352331606217, "grad_norm": 0.25380939245224, "learning_rate": 2.8758500647668397e-05, "loss": 1.578, "step": 35200 }, { "epoch": 0.7128400259067358, "grad_norm": 0.24211634695529938, "learning_rate": 2.8718021373056996e-05, "loss": 1.5773, "step": 35220 }, { "epoch": 0.7132448186528497, "grad_norm": 0.26032498478889465, "learning_rate": 2.8677542098445597e-05, "loss": 1.5665, "step": 35240 }, { "epoch": 0.7136496113989638, "grad_norm": 0.2628876864910126, "learning_rate": 2.86370628238342e-05, "loss": 1.5733, "step": 35260 }, { "epoch": 0.7140544041450777, "grad_norm": 0.2551079988479614, "learning_rate": 2.85965835492228e-05, "loss": 1.5766, "step": 35280 }, { "epoch": 0.7144591968911918, "grad_norm": 0.25086188316345215, "learning_rate": 2.8556104274611402e-05, "loss": 1.5711, "step": 35300 }, { "epoch": 0.7148639896373057, "grad_norm": 0.24641644954681396, "learning_rate": 2.8515625e-05, "loss": 1.582, "step": 35320 }, { "epoch": 0.7152687823834197, "grad_norm": 0.24826680123806, "learning_rate": 2.8475145725388602e-05, "loss": 1.5747, "step": 35340 }, { "epoch": 0.7156735751295337, "grad_norm": 0.24664539098739624, "learning_rate": 2.8434666450777203e-05, "loss": 1.5675, "step": 35360 }, { "epoch": 0.7160783678756477, "grad_norm": 0.24513651430606842, "learning_rate": 2.8394187176165805e-05, "loss": 1.5702, "step": 35380 }, { "epoch": 0.7164831606217616, "grad_norm": 0.2574043869972229, "learning_rate": 2.8353707901554403e-05, "loss": 1.5793, "step": 35400 }, { "epoch": 0.7168879533678757, "grad_norm": 0.2519179880619049, "learning_rate": 2.8313228626943005e-05, "loss": 1.5757, "step": 35420 }, { "epoch": 0.7172927461139896, "grad_norm": 0.24382127821445465, "learning_rate": 2.8272749352331606e-05, "loss": 1.5732, "step": 35440 }, { "epoch": 0.7176975388601037, "grad_norm": 0.2618078589439392, "learning_rate": 2.8232270077720208e-05, "loss": 1.5754, "step": 35460 }, { "epoch": 0.7181023316062176, "grad_norm": 0.25035402178764343, "learning_rate": 2.819179080310881e-05, "loss": 1.5734, "step": 35480 }, { "epoch": 0.7185071243523317, "grad_norm": 0.25199976563453674, "learning_rate": 2.8151311528497408e-05, "loss": 1.573, "step": 35500 }, { "epoch": 0.7189119170984456, "grad_norm": 0.24226494133472443, "learning_rate": 2.811083225388601e-05, "loss": 1.5694, "step": 35520 }, { "epoch": 0.7193167098445595, "grad_norm": 0.24880766868591309, "learning_rate": 2.807035297927461e-05, "loss": 1.5722, "step": 35540 }, { "epoch": 0.7197215025906736, "grad_norm": 0.24832676351070404, "learning_rate": 2.8029873704663213e-05, "loss": 1.5796, "step": 35560 }, { "epoch": 0.7201262953367875, "grad_norm": 0.24725449085235596, "learning_rate": 2.7989394430051814e-05, "loss": 1.5647, "step": 35580 }, { "epoch": 0.7205310880829016, "grad_norm": 0.26770928502082825, "learning_rate": 2.7948915155440412e-05, "loss": 1.5815, "step": 35600 }, { "epoch": 0.7209358808290155, "grad_norm": 0.2554534673690796, "learning_rate": 2.7908435880829014e-05, "loss": 1.5785, "step": 35620 }, { "epoch": 0.7213406735751295, "grad_norm": 0.24423575401306152, "learning_rate": 2.7867956606217616e-05, "loss": 1.5653, "step": 35640 }, { "epoch": 0.7217454663212435, "grad_norm": 0.2605242133140564, "learning_rate": 2.7827477331606217e-05, "loss": 1.5721, "step": 35660 }, { "epoch": 0.7221502590673575, "grad_norm": 0.2542654871940613, "learning_rate": 2.778699805699482e-05, "loss": 1.5672, "step": 35680 }, { "epoch": 0.7225550518134715, "grad_norm": 0.24953693151474, "learning_rate": 2.7746518782383417e-05, "loss": 1.563, "step": 35700 }, { "epoch": 0.7229598445595855, "grad_norm": 0.25238755345344543, "learning_rate": 2.7706039507772025e-05, "loss": 1.5809, "step": 35720 }, { "epoch": 0.7233646373056994, "grad_norm": 0.24605531990528107, "learning_rate": 2.7665560233160627e-05, "loss": 1.5753, "step": 35740 }, { "epoch": 0.7237694300518135, "grad_norm": 0.23741181194782257, "learning_rate": 2.7625080958549225e-05, "loss": 1.5745, "step": 35760 }, { "epoch": 0.7241742227979274, "grad_norm": 0.25375494360923767, "learning_rate": 2.7584601683937827e-05, "loss": 1.5682, "step": 35780 }, { "epoch": 0.7245790155440415, "grad_norm": 0.26071789860725403, "learning_rate": 2.754412240932643e-05, "loss": 1.5777, "step": 35800 }, { "epoch": 0.7249838082901554, "grad_norm": 0.2565481662750244, "learning_rate": 2.750364313471503e-05, "loss": 1.5751, "step": 35820 }, { "epoch": 0.7253886010362695, "grad_norm": 0.2526152729988098, "learning_rate": 2.746316386010363e-05, "loss": 1.5779, "step": 35840 }, { "epoch": 0.7257933937823834, "grad_norm": 0.24290066957473755, "learning_rate": 2.742268458549223e-05, "loss": 1.5678, "step": 35860 }, { "epoch": 0.7261981865284974, "grad_norm": 0.24926695227622986, "learning_rate": 2.738220531088083e-05, "loss": 1.573, "step": 35880 }, { "epoch": 0.7266029792746114, "grad_norm": 0.24915564060211182, "learning_rate": 2.7341726036269433e-05, "loss": 1.5642, "step": 35900 }, { "epoch": 0.7270077720207254, "grad_norm": 0.2520081698894501, "learning_rate": 2.7301246761658035e-05, "loss": 1.5681, "step": 35920 }, { "epoch": 0.7274125647668394, "grad_norm": 0.25532978773117065, "learning_rate": 2.7260767487046636e-05, "loss": 1.5732, "step": 35940 }, { "epoch": 0.7278173575129534, "grad_norm": 0.255519837141037, "learning_rate": 2.7220288212435234e-05, "loss": 1.5784, "step": 35960 }, { "epoch": 0.7282221502590673, "grad_norm": 0.24932484328746796, "learning_rate": 2.7179808937823836e-05, "loss": 1.5755, "step": 35980 }, { "epoch": 0.7286269430051814, "grad_norm": 0.2582454979419708, "learning_rate": 2.7139329663212438e-05, "loss": 1.5768, "step": 36000 }, { "epoch": 0.7290317357512953, "grad_norm": 0.2705901563167572, "learning_rate": 2.709885038860104e-05, "loss": 1.5681, "step": 36020 }, { "epoch": 0.7294365284974094, "grad_norm": 0.24864184856414795, "learning_rate": 2.7058371113989637e-05, "loss": 1.5773, "step": 36040 }, { "epoch": 0.7298413212435233, "grad_norm": 0.2490536868572235, "learning_rate": 2.701789183937824e-05, "loss": 1.5652, "step": 36060 }, { "epoch": 0.7302461139896373, "grad_norm": 0.26461735367774963, "learning_rate": 2.697741256476684e-05, "loss": 1.5737, "step": 36080 }, { "epoch": 0.7306509067357513, "grad_norm": 0.24966390430927277, "learning_rate": 2.6936933290155442e-05, "loss": 1.5683, "step": 36100 }, { "epoch": 0.7310556994818653, "grad_norm": 0.2443007230758667, "learning_rate": 2.6896454015544044e-05, "loss": 1.5731, "step": 36120 }, { "epoch": 0.7314604922279793, "grad_norm": 0.25966736674308777, "learning_rate": 2.6855974740932642e-05, "loss": 1.5757, "step": 36140 }, { "epoch": 0.7318652849740933, "grad_norm": 0.2533910274505615, "learning_rate": 2.6815495466321244e-05, "loss": 1.5661, "step": 36160 }, { "epoch": 0.7322700777202072, "grad_norm": 0.24273908138275146, "learning_rate": 2.6775016191709845e-05, "loss": 1.5785, "step": 36180 }, { "epoch": 0.7326748704663213, "grad_norm": 0.24965745210647583, "learning_rate": 2.6734536917098447e-05, "loss": 1.5724, "step": 36200 }, { "epoch": 0.7330796632124352, "grad_norm": 0.24166996777057648, "learning_rate": 2.669405764248705e-05, "loss": 1.5769, "step": 36220 }, { "epoch": 0.7334844559585493, "grad_norm": 0.2592270076274872, "learning_rate": 2.6653578367875647e-05, "loss": 1.5636, "step": 36240 }, { "epoch": 0.7338892487046632, "grad_norm": 0.2515961527824402, "learning_rate": 2.6613099093264248e-05, "loss": 1.5742, "step": 36260 }, { "epoch": 0.7342940414507773, "grad_norm": 0.2541889548301697, "learning_rate": 2.657261981865285e-05, "loss": 1.573, "step": 36280 }, { "epoch": 0.7346988341968912, "grad_norm": 0.252239853143692, "learning_rate": 2.653214054404145e-05, "loss": 1.5628, "step": 36300 }, { "epoch": 0.7351036269430051, "grad_norm": 0.2719256579875946, "learning_rate": 2.6491661269430053e-05, "loss": 1.5609, "step": 36320 }, { "epoch": 0.7355084196891192, "grad_norm": 0.2622844874858856, "learning_rate": 2.645118199481865e-05, "loss": 1.5737, "step": 36340 }, { "epoch": 0.7359132124352331, "grad_norm": 0.2501380443572998, "learning_rate": 2.6410702720207253e-05, "loss": 1.5731, "step": 36360 }, { "epoch": 0.7363180051813472, "grad_norm": 0.2531323730945587, "learning_rate": 2.6370223445595854e-05, "loss": 1.5725, "step": 36380 }, { "epoch": 0.7367227979274611, "grad_norm": 0.2484414279460907, "learning_rate": 2.6329744170984456e-05, "loss": 1.5774, "step": 36400 }, { "epoch": 0.7371275906735751, "grad_norm": 0.24498580396175385, "learning_rate": 2.6289264896373054e-05, "loss": 1.578, "step": 36420 }, { "epoch": 0.7375323834196891, "grad_norm": 0.2445896863937378, "learning_rate": 2.6248785621761656e-05, "loss": 1.5759, "step": 36440 }, { "epoch": 0.7379371761658031, "grad_norm": 0.25662627816200256, "learning_rate": 2.6208306347150257e-05, "loss": 1.5651, "step": 36460 }, { "epoch": 0.7383419689119171, "grad_norm": 0.25122150778770447, "learning_rate": 2.6167827072538866e-05, "loss": 1.5757, "step": 36480 }, { "epoch": 0.7387467616580311, "grad_norm": 0.25469040870666504, "learning_rate": 2.6127347797927464e-05, "loss": 1.574, "step": 36500 }, { "epoch": 0.739151554404145, "grad_norm": 0.2656291723251343, "learning_rate": 2.6086868523316066e-05, "loss": 1.5725, "step": 36520 }, { "epoch": 0.7395563471502591, "grad_norm": 0.2541623115539551, "learning_rate": 2.6046389248704667e-05, "loss": 1.5792, "step": 36540 }, { "epoch": 0.739961139896373, "grad_norm": 0.25913527607917786, "learning_rate": 2.600590997409327e-05, "loss": 1.5748, "step": 36560 }, { "epoch": 0.7403659326424871, "grad_norm": 0.25835469365119934, "learning_rate": 2.5965430699481867e-05, "loss": 1.5701, "step": 36580 }, { "epoch": 0.740770725388601, "grad_norm": 0.2574467062950134, "learning_rate": 2.592495142487047e-05, "loss": 1.5727, "step": 36600 }, { "epoch": 0.741175518134715, "grad_norm": 0.2497314214706421, "learning_rate": 2.588447215025907e-05, "loss": 1.5745, "step": 36620 }, { "epoch": 0.741580310880829, "grad_norm": 0.2441394180059433, "learning_rate": 2.5843992875647672e-05, "loss": 1.5749, "step": 36640 }, { "epoch": 0.741985103626943, "grad_norm": 0.25760316848754883, "learning_rate": 2.5803513601036273e-05, "loss": 1.5761, "step": 36660 }, { "epoch": 0.742389896373057, "grad_norm": 0.24283872544765472, "learning_rate": 2.576303432642487e-05, "loss": 1.5755, "step": 36680 }, { "epoch": 0.742794689119171, "grad_norm": 0.2511046528816223, "learning_rate": 2.5722555051813473e-05, "loss": 1.5726, "step": 36700 }, { "epoch": 0.743199481865285, "grad_norm": 0.2661892771720886, "learning_rate": 2.5682075777202075e-05, "loss": 1.5734, "step": 36720 }, { "epoch": 0.743604274611399, "grad_norm": 0.24831591546535492, "learning_rate": 2.5641596502590676e-05, "loss": 1.5666, "step": 36740 }, { "epoch": 0.7440090673575129, "grad_norm": 0.2577371895313263, "learning_rate": 2.5601117227979278e-05, "loss": 1.5686, "step": 36760 }, { "epoch": 0.744413860103627, "grad_norm": 0.24967145919799805, "learning_rate": 2.5560637953367876e-05, "loss": 1.5778, "step": 36780 }, { "epoch": 0.7448186528497409, "grad_norm": 0.2475225180387497, "learning_rate": 2.5520158678756478e-05, "loss": 1.5711, "step": 36800 }, { "epoch": 0.745223445595855, "grad_norm": 0.25675755739212036, "learning_rate": 2.547967940414508e-05, "loss": 1.5616, "step": 36820 }, { "epoch": 0.7456282383419689, "grad_norm": 0.295620322227478, "learning_rate": 2.543920012953368e-05, "loss": 1.5704, "step": 36840 }, { "epoch": 0.7460330310880829, "grad_norm": 0.2483331859111786, "learning_rate": 2.5398720854922283e-05, "loss": 1.5628, "step": 36860 }, { "epoch": 0.7464378238341969, "grad_norm": 0.24867244064807892, "learning_rate": 2.535824158031088e-05, "loss": 1.5694, "step": 36880 }, { "epoch": 0.7468426165803109, "grad_norm": 0.25280624628067017, "learning_rate": 2.5317762305699482e-05, "loss": 1.5794, "step": 36900 }, { "epoch": 0.7472474093264249, "grad_norm": 0.25671178102493286, "learning_rate": 2.5277283031088084e-05, "loss": 1.5795, "step": 36920 }, { "epoch": 0.7476522020725389, "grad_norm": 0.25887587666511536, "learning_rate": 2.5236803756476686e-05, "loss": 1.5738, "step": 36940 }, { "epoch": 0.7480569948186528, "grad_norm": 0.2511579692363739, "learning_rate": 2.5196324481865287e-05, "loss": 1.567, "step": 36960 }, { "epoch": 0.7484617875647669, "grad_norm": 0.25551214814186096, "learning_rate": 2.5155845207253885e-05, "loss": 1.5747, "step": 36980 }, { "epoch": 0.7488665803108808, "grad_norm": 0.25155285000801086, "learning_rate": 2.5115365932642487e-05, "loss": 1.5713, "step": 37000 }, { "epoch": 0.7492713730569949, "grad_norm": 0.24772867560386658, "learning_rate": 2.507488665803109e-05, "loss": 1.5718, "step": 37020 }, { "epoch": 0.7496761658031088, "grad_norm": 0.26629137992858887, "learning_rate": 2.503440738341969e-05, "loss": 1.58, "step": 37040 }, { "epoch": 0.7500809585492227, "grad_norm": 0.24032272398471832, "learning_rate": 2.4993928108808292e-05, "loss": 1.5811, "step": 37060 }, { "epoch": 0.7504857512953368, "grad_norm": 0.25228941440582275, "learning_rate": 2.4953448834196893e-05, "loss": 1.5782, "step": 37080 }, { "epoch": 0.7508905440414507, "grad_norm": 0.25634661316871643, "learning_rate": 2.4912969559585495e-05, "loss": 1.5742, "step": 37100 }, { "epoch": 0.7512953367875648, "grad_norm": 0.24936357140541077, "learning_rate": 2.4872490284974097e-05, "loss": 1.5647, "step": 37120 }, { "epoch": 0.7517001295336787, "grad_norm": 0.24932922422885895, "learning_rate": 2.4832011010362695e-05, "loss": 1.5716, "step": 37140 }, { "epoch": 0.7521049222797928, "grad_norm": 0.2566436231136322, "learning_rate": 2.4791531735751296e-05, "loss": 1.5751, "step": 37160 }, { "epoch": 0.7525097150259067, "grad_norm": 0.261510968208313, "learning_rate": 2.4751052461139898e-05, "loss": 1.5715, "step": 37180 }, { "epoch": 0.7529145077720207, "grad_norm": 0.2525883913040161, "learning_rate": 2.47105731865285e-05, "loss": 1.5721, "step": 37200 }, { "epoch": 0.7533193005181347, "grad_norm": 0.2603633403778076, "learning_rate": 2.46700939119171e-05, "loss": 1.5713, "step": 37220 }, { "epoch": 0.7537240932642487, "grad_norm": 0.2499496042728424, "learning_rate": 2.46296146373057e-05, "loss": 1.57, "step": 37240 }, { "epoch": 0.7541288860103627, "grad_norm": 0.24653297662734985, "learning_rate": 2.45891353626943e-05, "loss": 1.576, "step": 37260 }, { "epoch": 0.7545336787564767, "grad_norm": 0.3348506689071655, "learning_rate": 2.4548656088082903e-05, "loss": 1.5661, "step": 37280 }, { "epoch": 0.7549384715025906, "grad_norm": 0.2528190314769745, "learning_rate": 2.4508176813471504e-05, "loss": 1.5707, "step": 37300 }, { "epoch": 0.7553432642487047, "grad_norm": 0.25262799859046936, "learning_rate": 2.4467697538860106e-05, "loss": 1.5724, "step": 37320 }, { "epoch": 0.7557480569948186, "grad_norm": 0.2535884380340576, "learning_rate": 2.4427218264248704e-05, "loss": 1.5712, "step": 37340 }, { "epoch": 0.7561528497409327, "grad_norm": 0.23843662440776825, "learning_rate": 2.4386738989637306e-05, "loss": 1.5662, "step": 37360 }, { "epoch": 0.7565576424870466, "grad_norm": 0.2526833415031433, "learning_rate": 2.4346259715025907e-05, "loss": 1.5794, "step": 37380 }, { "epoch": 0.7569624352331606, "grad_norm": 0.2430005967617035, "learning_rate": 2.430578044041451e-05, "loss": 1.5718, "step": 37400 }, { "epoch": 0.7573672279792746, "grad_norm": 0.24720069766044617, "learning_rate": 2.426530116580311e-05, "loss": 1.5733, "step": 37420 }, { "epoch": 0.7577720207253886, "grad_norm": 0.2544929087162018, "learning_rate": 2.422482189119171e-05, "loss": 1.5713, "step": 37440 }, { "epoch": 0.7581768134715026, "grad_norm": 0.25433075428009033, "learning_rate": 2.4184342616580314e-05, "loss": 1.5659, "step": 37460 }, { "epoch": 0.7585816062176166, "grad_norm": 0.2423565834760666, "learning_rate": 2.4143863341968915e-05, "loss": 1.5776, "step": 37480 }, { "epoch": 0.7589863989637305, "grad_norm": 0.25606125593185425, "learning_rate": 2.4103384067357517e-05, "loss": 1.582, "step": 37500 }, { "epoch": 0.7593911917098446, "grad_norm": 0.25156882405281067, "learning_rate": 2.4062904792746115e-05, "loss": 1.5736, "step": 37520 }, { "epoch": 0.7597959844559585, "grad_norm": 0.2566584646701813, "learning_rate": 2.4022425518134717e-05, "loss": 1.5692, "step": 37540 }, { "epoch": 0.7602007772020726, "grad_norm": 0.24807313084602356, "learning_rate": 2.3981946243523318e-05, "loss": 1.5674, "step": 37560 }, { "epoch": 0.7606055699481865, "grad_norm": 0.251307874917984, "learning_rate": 2.394146696891192e-05, "loss": 1.5752, "step": 37580 }, { "epoch": 0.7610103626943006, "grad_norm": 0.2485140562057495, "learning_rate": 2.3900987694300518e-05, "loss": 1.5646, "step": 37600 }, { "epoch": 0.7614151554404145, "grad_norm": 0.2503208816051483, "learning_rate": 2.386050841968912e-05, "loss": 1.5744, "step": 37620 }, { "epoch": 0.7618199481865285, "grad_norm": 0.2504694163799286, "learning_rate": 2.382002914507772e-05, "loss": 1.5678, "step": 37640 }, { "epoch": 0.7622247409326425, "grad_norm": 0.2550312578678131, "learning_rate": 2.3779549870466323e-05, "loss": 1.5681, "step": 37660 }, { "epoch": 0.7626295336787565, "grad_norm": 0.25204208493232727, "learning_rate": 2.3739070595854924e-05, "loss": 1.57, "step": 37680 }, { "epoch": 0.7630343264248705, "grad_norm": 0.26752638816833496, "learning_rate": 2.3698591321243523e-05, "loss": 1.5739, "step": 37700 }, { "epoch": 0.7634391191709845, "grad_norm": 0.25242891907691956, "learning_rate": 2.3658112046632124e-05, "loss": 1.5763, "step": 37720 }, { "epoch": 0.7638439119170984, "grad_norm": 0.2430320680141449, "learning_rate": 2.3617632772020726e-05, "loss": 1.5688, "step": 37740 }, { "epoch": 0.7642487046632125, "grad_norm": 0.24227763712406158, "learning_rate": 2.3577153497409327e-05, "loss": 1.5679, "step": 37760 }, { "epoch": 0.7646534974093264, "grad_norm": 0.24392585456371307, "learning_rate": 2.353667422279793e-05, "loss": 1.5749, "step": 37780 }, { "epoch": 0.7650582901554405, "grad_norm": 0.26033908128738403, "learning_rate": 2.3496194948186527e-05, "loss": 1.5701, "step": 37800 }, { "epoch": 0.7654630829015544, "grad_norm": 0.2505531907081604, "learning_rate": 2.345571567357513e-05, "loss": 1.5714, "step": 37820 }, { "epoch": 0.7658678756476683, "grad_norm": 0.2540918290615082, "learning_rate": 2.3415236398963734e-05, "loss": 1.5761, "step": 37840 }, { "epoch": 0.7662726683937824, "grad_norm": 0.26687198877334595, "learning_rate": 2.3374757124352335e-05, "loss": 1.5726, "step": 37860 }, { "epoch": 0.7666774611398963, "grad_norm": 0.2514880895614624, "learning_rate": 2.3334277849740934e-05, "loss": 1.5704, "step": 37880 }, { "epoch": 0.7670822538860104, "grad_norm": 0.25129416584968567, "learning_rate": 2.3293798575129535e-05, "loss": 1.5723, "step": 37900 }, { "epoch": 0.7674870466321243, "grad_norm": 0.2529752850532532, "learning_rate": 2.3253319300518137e-05, "loss": 1.5748, "step": 37920 }, { "epoch": 0.7678918393782384, "grad_norm": 0.2474336177110672, "learning_rate": 2.321284002590674e-05, "loss": 1.5758, "step": 37940 }, { "epoch": 0.7682966321243523, "grad_norm": 0.25870227813720703, "learning_rate": 2.317236075129534e-05, "loss": 1.5749, "step": 37960 }, { "epoch": 0.7687014248704663, "grad_norm": 0.2582238018512726, "learning_rate": 2.3131881476683938e-05, "loss": 1.5681, "step": 37980 }, { "epoch": 0.7691062176165803, "grad_norm": 0.2658427059650421, "learning_rate": 2.309140220207254e-05, "loss": 1.5735, "step": 38000 }, { "epoch": 0.7695110103626943, "grad_norm": 0.24747730791568756, "learning_rate": 2.305092292746114e-05, "loss": 1.5661, "step": 38020 }, { "epoch": 0.7699158031088082, "grad_norm": 0.25449338555336, "learning_rate": 2.3010443652849743e-05, "loss": 1.5612, "step": 38040 }, { "epoch": 0.7703205958549223, "grad_norm": 0.2582302391529083, "learning_rate": 2.296996437823834e-05, "loss": 1.5636, "step": 38060 }, { "epoch": 0.7707253886010362, "grad_norm": 0.24392135441303253, "learning_rate": 2.2929485103626943e-05, "loss": 1.5709, "step": 38080 }, { "epoch": 0.7711301813471503, "grad_norm": 0.2514166533946991, "learning_rate": 2.2889005829015544e-05, "loss": 1.5698, "step": 38100 }, { "epoch": 0.7715349740932642, "grad_norm": 0.2716028094291687, "learning_rate": 2.2848526554404146e-05, "loss": 1.5673, "step": 38120 }, { "epoch": 0.7719397668393783, "grad_norm": 0.2509279251098633, "learning_rate": 2.2808047279792748e-05, "loss": 1.5701, "step": 38140 }, { "epoch": 0.7723445595854922, "grad_norm": 0.2526683211326599, "learning_rate": 2.2767568005181346e-05, "loss": 1.5737, "step": 38160 }, { "epoch": 0.7727493523316062, "grad_norm": 0.2482449859380722, "learning_rate": 2.2727088730569947e-05, "loss": 1.5686, "step": 38180 }, { "epoch": 0.7731541450777202, "grad_norm": 0.25977635383605957, "learning_rate": 2.268660945595855e-05, "loss": 1.5679, "step": 38200 }, { "epoch": 0.7735589378238342, "grad_norm": 0.24743391573429108, "learning_rate": 2.2646130181347154e-05, "loss": 1.5623, "step": 38220 }, { "epoch": 0.7739637305699482, "grad_norm": 0.2575525641441345, "learning_rate": 2.2605650906735752e-05, "loss": 1.5708, "step": 38240 }, { "epoch": 0.7743685233160622, "grad_norm": 0.25144830346107483, "learning_rate": 2.2565171632124354e-05, "loss": 1.5681, "step": 38260 }, { "epoch": 0.7747733160621761, "grad_norm": 0.25126686692237854, "learning_rate": 2.2524692357512955e-05, "loss": 1.5629, "step": 38280 }, { "epoch": 0.7751781088082902, "grad_norm": 0.2519494891166687, "learning_rate": 2.2484213082901557e-05, "loss": 1.5711, "step": 38300 }, { "epoch": 0.7755829015544041, "grad_norm": 0.25716114044189453, "learning_rate": 2.244373380829016e-05, "loss": 1.5708, "step": 38320 }, { "epoch": 0.7759876943005182, "grad_norm": 0.24117785692214966, "learning_rate": 2.2403254533678757e-05, "loss": 1.5718, "step": 38340 }, { "epoch": 0.7763924870466321, "grad_norm": 0.2545400559902191, "learning_rate": 2.236277525906736e-05, "loss": 1.5691, "step": 38360 }, { "epoch": 0.7767972797927462, "grad_norm": 0.24605786800384521, "learning_rate": 2.232229598445596e-05, "loss": 1.564, "step": 38380 }, { "epoch": 0.7772020725388601, "grad_norm": 0.26804250478744507, "learning_rate": 2.228181670984456e-05, "loss": 1.5703, "step": 38400 }, { "epoch": 0.7776068652849741, "grad_norm": 0.24791593849658966, "learning_rate": 2.2241337435233163e-05, "loss": 1.5741, "step": 38420 }, { "epoch": 0.7780116580310881, "grad_norm": 0.25617215037345886, "learning_rate": 2.220085816062176e-05, "loss": 1.5641, "step": 38440 }, { "epoch": 0.7784164507772021, "grad_norm": 0.24256493151187897, "learning_rate": 2.2160378886010363e-05, "loss": 1.5736, "step": 38460 }, { "epoch": 0.778821243523316, "grad_norm": 0.2615529000759125, "learning_rate": 2.2119899611398965e-05, "loss": 1.5676, "step": 38480 }, { "epoch": 0.7792260362694301, "grad_norm": 0.2541978061199188, "learning_rate": 2.2079420336787566e-05, "loss": 1.5701, "step": 38500 }, { "epoch": 0.779630829015544, "grad_norm": 0.2451649010181427, "learning_rate": 2.2038941062176168e-05, "loss": 1.5643, "step": 38520 }, { "epoch": 0.7800356217616581, "grad_norm": 0.250972181558609, "learning_rate": 2.1998461787564766e-05, "loss": 1.5734, "step": 38540 }, { "epoch": 0.780440414507772, "grad_norm": 0.26161718368530273, "learning_rate": 2.1957982512953368e-05, "loss": 1.5757, "step": 38560 }, { "epoch": 0.7808452072538861, "grad_norm": 0.2520747482776642, "learning_rate": 2.191750323834197e-05, "loss": 1.5776, "step": 38580 }, { "epoch": 0.78125, "grad_norm": 0.2577764391899109, "learning_rate": 2.187702396373057e-05, "loss": 1.5676, "step": 38600 }, { "epoch": 0.7816547927461139, "grad_norm": 0.2893233895301819, "learning_rate": 2.1836544689119172e-05, "loss": 1.5771, "step": 38620 }, { "epoch": 0.782059585492228, "grad_norm": 0.25735557079315186, "learning_rate": 2.1796065414507774e-05, "loss": 1.5752, "step": 38640 }, { "epoch": 0.7824643782383419, "grad_norm": 0.2511720359325409, "learning_rate": 2.1755586139896376e-05, "loss": 1.5623, "step": 38660 }, { "epoch": 0.782869170984456, "grad_norm": 0.24791091680526733, "learning_rate": 2.1715106865284977e-05, "loss": 1.5759, "step": 38680 }, { "epoch": 0.7832739637305699, "grad_norm": 0.2544258236885071, "learning_rate": 2.1674627590673575e-05, "loss": 1.5744, "step": 38700 }, { "epoch": 0.783678756476684, "grad_norm": 0.2569575607776642, "learning_rate": 2.1634148316062177e-05, "loss": 1.567, "step": 38720 }, { "epoch": 0.7840835492227979, "grad_norm": 0.25290733575820923, "learning_rate": 2.159366904145078e-05, "loss": 1.5777, "step": 38740 }, { "epoch": 0.7844883419689119, "grad_norm": 0.26998576521873474, "learning_rate": 2.155318976683938e-05, "loss": 1.5715, "step": 38760 }, { "epoch": 0.7848931347150259, "grad_norm": 0.24731047451496124, "learning_rate": 2.1512710492227982e-05, "loss": 1.5647, "step": 38780 }, { "epoch": 0.7852979274611399, "grad_norm": 0.25445204973220825, "learning_rate": 2.147223121761658e-05, "loss": 1.5665, "step": 38800 }, { "epoch": 0.7857027202072538, "grad_norm": 0.2462853044271469, "learning_rate": 2.143175194300518e-05, "loss": 1.5647, "step": 38820 }, { "epoch": 0.7861075129533679, "grad_norm": 0.2653237581253052, "learning_rate": 2.1391272668393783e-05, "loss": 1.5739, "step": 38840 }, { "epoch": 0.7865123056994818, "grad_norm": 0.24717441201210022, "learning_rate": 2.1350793393782385e-05, "loss": 1.5773, "step": 38860 }, { "epoch": 0.7869170984455959, "grad_norm": 0.25257742404937744, "learning_rate": 2.1310314119170986e-05, "loss": 1.564, "step": 38880 }, { "epoch": 0.7873218911917098, "grad_norm": 0.25856876373291016, "learning_rate": 2.1269834844559585e-05, "loss": 1.5716, "step": 38900 }, { "epoch": 0.7877266839378239, "grad_norm": 0.29623687267303467, "learning_rate": 2.1229355569948186e-05, "loss": 1.5728, "step": 38920 }, { "epoch": 0.7881314766839378, "grad_norm": 0.2994031608104706, "learning_rate": 2.1188876295336788e-05, "loss": 1.5725, "step": 38940 }, { "epoch": 0.7885362694300518, "grad_norm": 0.24449287354946136, "learning_rate": 2.114839702072539e-05, "loss": 1.568, "step": 38960 }, { "epoch": 0.7889410621761658, "grad_norm": 0.24593818187713623, "learning_rate": 2.110791774611399e-05, "loss": 1.5711, "step": 38980 }, { "epoch": 0.7893458549222798, "grad_norm": 0.2520069181919098, "learning_rate": 2.1067438471502592e-05, "loss": 1.573, "step": 39000 }, { "epoch": 0.7897506476683938, "grad_norm": 0.25799089670181274, "learning_rate": 2.1026959196891194e-05, "loss": 1.5655, "step": 39020 }, { "epoch": 0.7901554404145078, "grad_norm": 0.25565093755722046, "learning_rate": 2.0986479922279796e-05, "loss": 1.5867, "step": 39040 }, { "epoch": 0.7905602331606217, "grad_norm": 0.25041863322257996, "learning_rate": 2.0946000647668397e-05, "loss": 1.5722, "step": 39060 }, { "epoch": 0.7909650259067358, "grad_norm": 0.25355058908462524, "learning_rate": 2.0905521373056995e-05, "loss": 1.5665, "step": 39080 }, { "epoch": 0.7913698186528497, "grad_norm": 0.25064224004745483, "learning_rate": 2.0865042098445597e-05, "loss": 1.5829, "step": 39100 }, { "epoch": 0.7917746113989638, "grad_norm": 0.2651539742946625, "learning_rate": 2.08245628238342e-05, "loss": 1.5699, "step": 39120 }, { "epoch": 0.7921794041450777, "grad_norm": 0.26357370615005493, "learning_rate": 2.07840835492228e-05, "loss": 1.5681, "step": 39140 }, { "epoch": 0.7925841968911918, "grad_norm": 0.2611573040485382, "learning_rate": 2.07436042746114e-05, "loss": 1.573, "step": 39160 }, { "epoch": 0.7929889896373057, "grad_norm": 0.2571450471878052, "learning_rate": 2.0703125e-05, "loss": 1.5736, "step": 39180 }, { "epoch": 0.7933937823834197, "grad_norm": 0.2568607032299042, "learning_rate": 2.06626457253886e-05, "loss": 1.5671, "step": 39200 }, { "epoch": 0.7937985751295337, "grad_norm": 0.250097393989563, "learning_rate": 2.0622166450777203e-05, "loss": 1.568, "step": 39220 }, { "epoch": 0.7942033678756477, "grad_norm": 0.2538306713104248, "learning_rate": 2.0581687176165805e-05, "loss": 1.5756, "step": 39240 }, { "epoch": 0.7946081606217616, "grad_norm": 0.25407010316848755, "learning_rate": 2.0541207901554403e-05, "loss": 1.566, "step": 39260 }, { "epoch": 0.7950129533678757, "grad_norm": 0.2509724795818329, "learning_rate": 2.0500728626943005e-05, "loss": 1.5753, "step": 39280 }, { "epoch": 0.7954177461139896, "grad_norm": 0.24499580264091492, "learning_rate": 2.0460249352331606e-05, "loss": 1.5642, "step": 39300 }, { "epoch": 0.7958225388601037, "grad_norm": 0.25154736638069153, "learning_rate": 2.0419770077720208e-05, "loss": 1.5759, "step": 39320 }, { "epoch": 0.7962273316062176, "grad_norm": 0.2529792785644531, "learning_rate": 2.037929080310881e-05, "loss": 1.5681, "step": 39340 }, { "epoch": 0.7966321243523317, "grad_norm": 0.2541936933994293, "learning_rate": 2.0338811528497408e-05, "loss": 1.5742, "step": 39360 }, { "epoch": 0.7970369170984456, "grad_norm": 0.25492891669273376, "learning_rate": 2.0298332253886013e-05, "loss": 1.5745, "step": 39380 }, { "epoch": 0.7974417098445595, "grad_norm": 0.25221309065818787, "learning_rate": 2.0257852979274614e-05, "loss": 1.5677, "step": 39400 }, { "epoch": 0.7978465025906736, "grad_norm": 0.24458923935890198, "learning_rate": 2.0217373704663216e-05, "loss": 1.5754, "step": 39420 }, { "epoch": 0.7982512953367875, "grad_norm": 0.24897027015686035, "learning_rate": 2.0176894430051814e-05, "loss": 1.5686, "step": 39440 }, { "epoch": 0.7986560880829016, "grad_norm": 0.25478240847587585, "learning_rate": 2.0136415155440416e-05, "loss": 1.571, "step": 39460 }, { "epoch": 0.7990608808290155, "grad_norm": 0.26319724321365356, "learning_rate": 2.0095935880829017e-05, "loss": 1.5731, "step": 39480 }, { "epoch": 0.7994656735751295, "grad_norm": 0.2636016607284546, "learning_rate": 2.005545660621762e-05, "loss": 1.5697, "step": 39500 }, { "epoch": 0.7998704663212435, "grad_norm": 0.24527607858181, "learning_rate": 2.001497733160622e-05, "loss": 1.5835, "step": 39520 }, { "epoch": 0.8002752590673575, "grad_norm": 0.2632750868797302, "learning_rate": 1.997449805699482e-05, "loss": 1.5679, "step": 39540 }, { "epoch": 0.8006800518134715, "grad_norm": 0.2556610107421875, "learning_rate": 1.993401878238342e-05, "loss": 1.5684, "step": 39560 }, { "epoch": 0.8010848445595855, "grad_norm": 0.272382527589798, "learning_rate": 1.9893539507772022e-05, "loss": 1.5747, "step": 39580 }, { "epoch": 0.8014896373056994, "grad_norm": 0.25378477573394775, "learning_rate": 1.9853060233160623e-05, "loss": 1.5655, "step": 39600 }, { "epoch": 0.8018944300518135, "grad_norm": 0.2494637817144394, "learning_rate": 1.981258095854922e-05, "loss": 1.5726, "step": 39620 }, { "epoch": 0.8022992227979274, "grad_norm": 0.25276607275009155, "learning_rate": 1.9772101683937823e-05, "loss": 1.5657, "step": 39640 }, { "epoch": 0.8027040155440415, "grad_norm": 0.2689211368560791, "learning_rate": 1.9731622409326425e-05, "loss": 1.5752, "step": 39660 }, { "epoch": 0.8031088082901554, "grad_norm": 0.24493053555488586, "learning_rate": 1.9691143134715026e-05, "loss": 1.5657, "step": 39680 }, { "epoch": 0.8035136010362695, "grad_norm": 0.24791499972343445, "learning_rate": 1.9650663860103628e-05, "loss": 1.5663, "step": 39700 }, { "epoch": 0.8039183937823834, "grad_norm": 0.251071035861969, "learning_rate": 1.9610184585492226e-05, "loss": 1.5701, "step": 39720 }, { "epoch": 0.8043231865284974, "grad_norm": 0.25385645031929016, "learning_rate": 1.9569705310880828e-05, "loss": 1.5721, "step": 39740 }, { "epoch": 0.8047279792746114, "grad_norm": 0.2543620467185974, "learning_rate": 1.9529226036269433e-05, "loss": 1.5729, "step": 39760 }, { "epoch": 0.8051327720207254, "grad_norm": 0.2568882703781128, "learning_rate": 1.9488746761658034e-05, "loss": 1.5693, "step": 39780 }, { "epoch": 0.8055375647668394, "grad_norm": 0.2557227909564972, "learning_rate": 1.9448267487046633e-05, "loss": 1.5721, "step": 39800 }, { "epoch": 0.8059423575129534, "grad_norm": 0.25350111722946167, "learning_rate": 1.9407788212435234e-05, "loss": 1.5809, "step": 39820 }, { "epoch": 0.8063471502590673, "grad_norm": 0.24536067247390747, "learning_rate": 1.9367308937823836e-05, "loss": 1.5681, "step": 39840 }, { "epoch": 0.8067519430051814, "grad_norm": 0.25118115544319153, "learning_rate": 1.9326829663212437e-05, "loss": 1.5683, "step": 39860 }, { "epoch": 0.8071567357512953, "grad_norm": 0.25101667642593384, "learning_rate": 1.928635038860104e-05, "loss": 1.576, "step": 39880 }, { "epoch": 0.8075615284974094, "grad_norm": 0.256912499666214, "learning_rate": 1.9245871113989637e-05, "loss": 1.5729, "step": 39900 }, { "epoch": 0.8079663212435233, "grad_norm": 0.2652980089187622, "learning_rate": 1.920539183937824e-05, "loss": 1.5779, "step": 39920 }, { "epoch": 0.8083711139896373, "grad_norm": 0.25292864441871643, "learning_rate": 1.916491256476684e-05, "loss": 1.5658, "step": 39940 }, { "epoch": 0.8087759067357513, "grad_norm": 0.24683897197246552, "learning_rate": 1.9124433290155442e-05, "loss": 1.5716, "step": 39960 }, { "epoch": 0.8091806994818653, "grad_norm": 0.2569979727268219, "learning_rate": 1.9083954015544044e-05, "loss": 1.5752, "step": 39980 }, { "epoch": 0.8095854922279793, "grad_norm": 0.2437114417552948, "learning_rate": 1.9043474740932642e-05, "loss": 1.5693, "step": 40000 }, { "epoch": 0.8099902849740933, "grad_norm": 0.2557174861431122, "learning_rate": 1.9002995466321243e-05, "loss": 1.5683, "step": 40020 }, { "epoch": 0.8103950777202072, "grad_norm": 0.25752195715904236, "learning_rate": 1.8962516191709845e-05, "loss": 1.5728, "step": 40040 }, { "epoch": 0.8107998704663213, "grad_norm": 0.25611767172813416, "learning_rate": 1.8922036917098447e-05, "loss": 1.5709, "step": 40060 }, { "epoch": 0.8112046632124352, "grad_norm": 0.2471778690814972, "learning_rate": 1.8881557642487048e-05, "loss": 1.5726, "step": 40080 }, { "epoch": 0.8116094559585493, "grad_norm": 0.2577037215232849, "learning_rate": 1.8841078367875646e-05, "loss": 1.573, "step": 40100 }, { "epoch": 0.8120142487046632, "grad_norm": 0.24916967749595642, "learning_rate": 1.8800599093264248e-05, "loss": 1.5767, "step": 40120 }, { "epoch": 0.8124190414507773, "grad_norm": 0.282293438911438, "learning_rate": 1.876011981865285e-05, "loss": 1.5732, "step": 40140 }, { "epoch": 0.8128238341968912, "grad_norm": 0.25367578864097595, "learning_rate": 1.8719640544041455e-05, "loss": 1.5616, "step": 40160 }, { "epoch": 0.8132286269430051, "grad_norm": 0.24809318780899048, "learning_rate": 1.8679161269430053e-05, "loss": 1.5735, "step": 40180 }, { "epoch": 0.8136334196891192, "grad_norm": 0.24932460486888885, "learning_rate": 1.8638681994818654e-05, "loss": 1.566, "step": 40200 }, { "epoch": 0.8140382124352331, "grad_norm": 0.24481765925884247, "learning_rate": 1.8598202720207256e-05, "loss": 1.572, "step": 40220 }, { "epoch": 0.8144430051813472, "grad_norm": 0.25175589323043823, "learning_rate": 1.8557723445595858e-05, "loss": 1.5685, "step": 40240 }, { "epoch": 0.8148477979274611, "grad_norm": 0.25128373503685, "learning_rate": 1.8517244170984456e-05, "loss": 1.5787, "step": 40260 }, { "epoch": 0.8152525906735751, "grad_norm": 0.24804188311100006, "learning_rate": 1.8476764896373057e-05, "loss": 1.5639, "step": 40280 }, { "epoch": 0.8156573834196891, "grad_norm": 0.25700822472572327, "learning_rate": 1.843628562176166e-05, "loss": 1.5657, "step": 40300 }, { "epoch": 0.8160621761658031, "grad_norm": 0.258410781621933, "learning_rate": 1.839580634715026e-05, "loss": 1.5752, "step": 40320 }, { "epoch": 0.8164669689119171, "grad_norm": 0.24451586604118347, "learning_rate": 1.8355327072538862e-05, "loss": 1.5697, "step": 40340 }, { "epoch": 0.8168717616580311, "grad_norm": 0.24533432722091675, "learning_rate": 1.831484779792746e-05, "loss": 1.5709, "step": 40360 }, { "epoch": 0.817276554404145, "grad_norm": 0.2555823028087616, "learning_rate": 1.8274368523316062e-05, "loss": 1.5689, "step": 40380 }, { "epoch": 0.8176813471502591, "grad_norm": 0.2554455101490021, "learning_rate": 1.8233889248704664e-05, "loss": 1.5781, "step": 40400 }, { "epoch": 0.818086139896373, "grad_norm": 0.24355241656303406, "learning_rate": 1.8193409974093265e-05, "loss": 1.5708, "step": 40420 }, { "epoch": 0.8184909326424871, "grad_norm": 0.25088781118392944, "learning_rate": 1.8152930699481867e-05, "loss": 1.5716, "step": 40440 }, { "epoch": 0.818895725388601, "grad_norm": 0.250862717628479, "learning_rate": 1.8112451424870465e-05, "loss": 1.5735, "step": 40460 }, { "epoch": 0.819300518134715, "grad_norm": 0.25573572516441345, "learning_rate": 1.8071972150259067e-05, "loss": 1.5617, "step": 40480 }, { "epoch": 0.819705310880829, "grad_norm": 0.24667002260684967, "learning_rate": 1.8031492875647668e-05, "loss": 1.5703, "step": 40500 }, { "epoch": 0.820110103626943, "grad_norm": 0.2578114867210388, "learning_rate": 1.799101360103627e-05, "loss": 1.5736, "step": 40520 }, { "epoch": 0.820514896373057, "grad_norm": 0.25511059165000916, "learning_rate": 1.795053432642487e-05, "loss": 1.5723, "step": 40540 }, { "epoch": 0.820919689119171, "grad_norm": 0.258868932723999, "learning_rate": 1.7910055051813473e-05, "loss": 1.5734, "step": 40560 }, { "epoch": 0.821324481865285, "grad_norm": 0.2515399158000946, "learning_rate": 1.7869575777202075e-05, "loss": 1.5694, "step": 40580 }, { "epoch": 0.821729274611399, "grad_norm": 0.24874266982078552, "learning_rate": 1.7829096502590676e-05, "loss": 1.5683, "step": 40600 }, { "epoch": 0.8221340673575129, "grad_norm": 0.2533651292324066, "learning_rate": 1.7788617227979278e-05, "loss": 1.5814, "step": 40620 }, { "epoch": 0.822538860103627, "grad_norm": 0.2458176612854004, "learning_rate": 1.7748137953367876e-05, "loss": 1.5675, "step": 40640 }, { "epoch": 0.8229436528497409, "grad_norm": 0.2790375053882599, "learning_rate": 1.7707658678756478e-05, "loss": 1.5652, "step": 40660 }, { "epoch": 0.823348445595855, "grad_norm": 0.25354933738708496, "learning_rate": 1.766717940414508e-05, "loss": 1.5705, "step": 40680 }, { "epoch": 0.8237532383419689, "grad_norm": 0.2667285203933716, "learning_rate": 1.762670012953368e-05, "loss": 1.5737, "step": 40700 }, { "epoch": 0.8241580310880829, "grad_norm": 0.2590566873550415, "learning_rate": 1.758622085492228e-05, "loss": 1.5685, "step": 40720 }, { "epoch": 0.8245628238341969, "grad_norm": 0.2534830868244171, "learning_rate": 1.754574158031088e-05, "loss": 1.5761, "step": 40740 }, { "epoch": 0.8249676165803109, "grad_norm": 0.2590513527393341, "learning_rate": 1.7505262305699482e-05, "loss": 1.5689, "step": 40760 }, { "epoch": 0.8253724093264249, "grad_norm": 0.263685941696167, "learning_rate": 1.7464783031088084e-05, "loss": 1.5657, "step": 40780 }, { "epoch": 0.8257772020725389, "grad_norm": 0.25989198684692383, "learning_rate": 1.7424303756476685e-05, "loss": 1.5699, "step": 40800 }, { "epoch": 0.8261819948186528, "grad_norm": 0.25977224111557007, "learning_rate": 1.7383824481865284e-05, "loss": 1.5683, "step": 40820 }, { "epoch": 0.8265867875647669, "grad_norm": 0.2510925829410553, "learning_rate": 1.7343345207253885e-05, "loss": 1.5729, "step": 40840 }, { "epoch": 0.8269915803108808, "grad_norm": 0.24908201396465302, "learning_rate": 1.7302865932642487e-05, "loss": 1.57, "step": 40860 }, { "epoch": 0.8273963730569949, "grad_norm": 0.24826762080192566, "learning_rate": 1.726238665803109e-05, "loss": 1.5757, "step": 40880 }, { "epoch": 0.8278011658031088, "grad_norm": 0.26110416650772095, "learning_rate": 1.722190738341969e-05, "loss": 1.5674, "step": 40900 }, { "epoch": 0.8282059585492227, "grad_norm": 0.26132503151893616, "learning_rate": 1.718142810880829e-05, "loss": 1.5713, "step": 40920 }, { "epoch": 0.8286107512953368, "grad_norm": 0.26024553179740906, "learning_rate": 1.7140948834196893e-05, "loss": 1.5746, "step": 40940 }, { "epoch": 0.8290155440414507, "grad_norm": 0.2631426751613617, "learning_rate": 1.7100469559585495e-05, "loss": 1.5725, "step": 40960 }, { "epoch": 0.8294203367875648, "grad_norm": 0.25791001319885254, "learning_rate": 1.7059990284974096e-05, "loss": 1.5702, "step": 40980 }, { "epoch": 0.8298251295336787, "grad_norm": 0.2592918276786804, "learning_rate": 1.7019511010362695e-05, "loss": 1.5598, "step": 41000 }, { "epoch": 0.8302299222797928, "grad_norm": 0.2635282576084137, "learning_rate": 1.6979031735751296e-05, "loss": 1.5672, "step": 41020 }, { "epoch": 0.8306347150259067, "grad_norm": 0.25127291679382324, "learning_rate": 1.6938552461139898e-05, "loss": 1.5729, "step": 41040 }, { "epoch": 0.8310395077720207, "grad_norm": 0.26902133226394653, "learning_rate": 1.68980731865285e-05, "loss": 1.5674, "step": 41060 }, { "epoch": 0.8314443005181347, "grad_norm": 0.24819737672805786, "learning_rate": 1.68575939119171e-05, "loss": 1.5782, "step": 41080 }, { "epoch": 0.8318490932642487, "grad_norm": 0.25183171033859253, "learning_rate": 1.68171146373057e-05, "loss": 1.5744, "step": 41100 }, { "epoch": 0.8322538860103627, "grad_norm": 0.2579007148742676, "learning_rate": 1.67766353626943e-05, "loss": 1.5743, "step": 41120 }, { "epoch": 0.8326586787564767, "grad_norm": 0.25526559352874756, "learning_rate": 1.6736156088082902e-05, "loss": 1.5699, "step": 41140 }, { "epoch": 0.8330634715025906, "grad_norm": 0.25813058018684387, "learning_rate": 1.6695676813471504e-05, "loss": 1.5737, "step": 41160 }, { "epoch": 0.8334682642487047, "grad_norm": 0.25876912474632263, "learning_rate": 1.6655197538860106e-05, "loss": 1.5692, "step": 41180 }, { "epoch": 0.8338730569948186, "grad_norm": 0.26097047328948975, "learning_rate": 1.6614718264248704e-05, "loss": 1.5687, "step": 41200 }, { "epoch": 0.8342778497409327, "grad_norm": 0.252909779548645, "learning_rate": 1.6574238989637305e-05, "loss": 1.5749, "step": 41220 }, { "epoch": 0.8346826424870466, "grad_norm": 0.27480536699295044, "learning_rate": 1.6533759715025907e-05, "loss": 1.5753, "step": 41240 }, { "epoch": 0.8350874352331606, "grad_norm": 0.26065734028816223, "learning_rate": 1.649328044041451e-05, "loss": 1.5713, "step": 41260 }, { "epoch": 0.8354922279792746, "grad_norm": 0.2536485493183136, "learning_rate": 1.6452801165803107e-05, "loss": 1.5643, "step": 41280 }, { "epoch": 0.8358970207253886, "grad_norm": 0.2638033926486969, "learning_rate": 1.641232189119171e-05, "loss": 1.5729, "step": 41300 }, { "epoch": 0.8363018134715026, "grad_norm": 0.2528742849826813, "learning_rate": 1.6371842616580313e-05, "loss": 1.5749, "step": 41320 }, { "epoch": 0.8367066062176166, "grad_norm": 0.25007766485214233, "learning_rate": 1.6331363341968915e-05, "loss": 1.5756, "step": 41340 }, { "epoch": 0.8371113989637305, "grad_norm": 0.2540990114212036, "learning_rate": 1.6290884067357513e-05, "loss": 1.5695, "step": 41360 }, { "epoch": 0.8375161917098446, "grad_norm": 0.25288519263267517, "learning_rate": 1.6250404792746115e-05, "loss": 1.567, "step": 41380 }, { "epoch": 0.8379209844559585, "grad_norm": 0.26662927865982056, "learning_rate": 1.6209925518134716e-05, "loss": 1.5738, "step": 41400 }, { "epoch": 0.8383257772020726, "grad_norm": 0.2518477737903595, "learning_rate": 1.6169446243523318e-05, "loss": 1.5683, "step": 41420 }, { "epoch": 0.8387305699481865, "grad_norm": 0.2660258114337921, "learning_rate": 1.612896696891192e-05, "loss": 1.5656, "step": 41440 }, { "epoch": 0.8391353626943006, "grad_norm": 0.26525411009788513, "learning_rate": 1.6088487694300518e-05, "loss": 1.5678, "step": 41460 }, { "epoch": 0.8395401554404145, "grad_norm": 0.2588740587234497, "learning_rate": 1.604800841968912e-05, "loss": 1.575, "step": 41480 }, { "epoch": 0.8399449481865285, "grad_norm": 0.25979238748550415, "learning_rate": 1.600752914507772e-05, "loss": 1.5773, "step": 41500 }, { "epoch": 0.8403497409326425, "grad_norm": 0.2559567093849182, "learning_rate": 1.5967049870466323e-05, "loss": 1.5663, "step": 41520 }, { "epoch": 0.8407545336787565, "grad_norm": 0.25018632411956787, "learning_rate": 1.5926570595854924e-05, "loss": 1.5688, "step": 41540 }, { "epoch": 0.8411593264248705, "grad_norm": 0.2540867328643799, "learning_rate": 1.5886091321243522e-05, "loss": 1.5769, "step": 41560 }, { "epoch": 0.8415641191709845, "grad_norm": 0.24757783114910126, "learning_rate": 1.5845612046632124e-05, "loss": 1.5686, "step": 41580 }, { "epoch": 0.8419689119170984, "grad_norm": 0.24858924746513367, "learning_rate": 1.5805132772020726e-05, "loss": 1.577, "step": 41600 }, { "epoch": 0.8423737046632125, "grad_norm": 0.2557097375392914, "learning_rate": 1.5764653497409327e-05, "loss": 1.5673, "step": 41620 }, { "epoch": 0.8427784974093264, "grad_norm": 0.2545352578163147, "learning_rate": 1.572417422279793e-05, "loss": 1.5655, "step": 41640 }, { "epoch": 0.8431832901554405, "grad_norm": 0.263104647397995, "learning_rate": 1.5683694948186527e-05, "loss": 1.5653, "step": 41660 }, { "epoch": 0.8435880829015544, "grad_norm": 0.25819841027259827, "learning_rate": 1.564321567357513e-05, "loss": 1.5691, "step": 41680 }, { "epoch": 0.8439928756476683, "grad_norm": 0.2569044828414917, "learning_rate": 1.5602736398963734e-05, "loss": 1.574, "step": 41700 }, { "epoch": 0.8443976683937824, "grad_norm": 0.2567899525165558, "learning_rate": 1.5562257124352335e-05, "loss": 1.5686, "step": 41720 }, { "epoch": 0.8448024611398963, "grad_norm": 0.2585518956184387, "learning_rate": 1.5521777849740933e-05, "loss": 1.571, "step": 41740 }, { "epoch": 0.8452072538860104, "grad_norm": 0.2424650341272354, "learning_rate": 1.5481298575129535e-05, "loss": 1.5677, "step": 41760 }, { "epoch": 0.8456120466321243, "grad_norm": 0.25245752930641174, "learning_rate": 1.5440819300518137e-05, "loss": 1.5669, "step": 41780 }, { "epoch": 0.8460168393782384, "grad_norm": 0.25395411252975464, "learning_rate": 1.5400340025906738e-05, "loss": 1.5693, "step": 41800 }, { "epoch": 0.8464216321243523, "grad_norm": 0.2509142756462097, "learning_rate": 1.5359860751295336e-05, "loss": 1.5715, "step": 41820 }, { "epoch": 0.8468264248704663, "grad_norm": 0.2657475173473358, "learning_rate": 1.5319381476683938e-05, "loss": 1.5733, "step": 41840 }, { "epoch": 0.8472312176165803, "grad_norm": 0.25911301374435425, "learning_rate": 1.527890220207254e-05, "loss": 1.5772, "step": 41860 }, { "epoch": 0.8476360103626943, "grad_norm": 0.2484068125486374, "learning_rate": 1.5238422927461141e-05, "loss": 1.572, "step": 41880 }, { "epoch": 0.8480408031088082, "grad_norm": 0.25415316224098206, "learning_rate": 1.5197943652849741e-05, "loss": 1.568, "step": 41900 }, { "epoch": 0.8484455958549223, "grad_norm": 0.25946328043937683, "learning_rate": 1.5157464378238343e-05, "loss": 1.5721, "step": 41920 }, { "epoch": 0.8488503886010362, "grad_norm": 0.2639846205711365, "learning_rate": 1.5116985103626943e-05, "loss": 1.5725, "step": 41940 }, { "epoch": 0.8492551813471503, "grad_norm": 0.25713902711868286, "learning_rate": 1.5076505829015544e-05, "loss": 1.5692, "step": 41960 }, { "epoch": 0.8496599740932642, "grad_norm": 0.2552792429924011, "learning_rate": 1.5036026554404146e-05, "loss": 1.5708, "step": 41980 }, { "epoch": 0.8500647668393783, "grad_norm": 0.2607964277267456, "learning_rate": 1.4995547279792746e-05, "loss": 1.578, "step": 42000 }, { "epoch": 0.8504695595854922, "grad_norm": 0.24158447980880737, "learning_rate": 1.4955068005181347e-05, "loss": 1.5637, "step": 42020 }, { "epoch": 0.8508743523316062, "grad_norm": 0.2542441785335541, "learning_rate": 1.4914588730569947e-05, "loss": 1.5711, "step": 42040 }, { "epoch": 0.8512791450777202, "grad_norm": 0.2519267797470093, "learning_rate": 1.4874109455958549e-05, "loss": 1.5721, "step": 42060 }, { "epoch": 0.8516839378238342, "grad_norm": 0.25119855999946594, "learning_rate": 1.4833630181347152e-05, "loss": 1.5684, "step": 42080 }, { "epoch": 0.8520887305699482, "grad_norm": 0.2565952241420746, "learning_rate": 1.4793150906735754e-05, "loss": 1.5767, "step": 42100 }, { "epoch": 0.8524935233160622, "grad_norm": 0.2599443197250366, "learning_rate": 1.4752671632124354e-05, "loss": 1.5751, "step": 42120 }, { "epoch": 0.8528983160621761, "grad_norm": 0.26812878251075745, "learning_rate": 1.4712192357512955e-05, "loss": 1.5683, "step": 42140 }, { "epoch": 0.8533031088082902, "grad_norm": 0.24561911821365356, "learning_rate": 1.4671713082901555e-05, "loss": 1.5629, "step": 42160 }, { "epoch": 0.8537079015544041, "grad_norm": 0.2520277500152588, "learning_rate": 1.4631233808290157e-05, "loss": 1.5703, "step": 42180 }, { "epoch": 0.8541126943005182, "grad_norm": 0.2598543167114258, "learning_rate": 1.4590754533678758e-05, "loss": 1.579, "step": 42200 }, { "epoch": 0.8545174870466321, "grad_norm": 0.2622869908809662, "learning_rate": 1.4550275259067358e-05, "loss": 1.5701, "step": 42220 }, { "epoch": 0.8549222797927462, "grad_norm": 0.2516776919364929, "learning_rate": 1.450979598445596e-05, "loss": 1.5755, "step": 42240 }, { "epoch": 0.8553270725388601, "grad_norm": 0.25977471470832825, "learning_rate": 1.446931670984456e-05, "loss": 1.5757, "step": 42260 }, { "epoch": 0.8557318652849741, "grad_norm": 0.2510720193386078, "learning_rate": 1.4428837435233161e-05, "loss": 1.5697, "step": 42280 }, { "epoch": 0.8561366580310881, "grad_norm": 0.24957919120788574, "learning_rate": 1.4388358160621763e-05, "loss": 1.5728, "step": 42300 }, { "epoch": 0.8565414507772021, "grad_norm": 0.2512624263763428, "learning_rate": 1.4347878886010363e-05, "loss": 1.5708, "step": 42320 }, { "epoch": 0.856946243523316, "grad_norm": 0.26114121079444885, "learning_rate": 1.4307399611398964e-05, "loss": 1.5689, "step": 42340 }, { "epoch": 0.8573510362694301, "grad_norm": 0.2557789087295532, "learning_rate": 1.4266920336787564e-05, "loss": 1.5787, "step": 42360 }, { "epoch": 0.857755829015544, "grad_norm": 0.2633610665798187, "learning_rate": 1.4226441062176166e-05, "loss": 1.5722, "step": 42380 }, { "epoch": 0.8581606217616581, "grad_norm": 0.26583048701286316, "learning_rate": 1.4185961787564766e-05, "loss": 1.5726, "step": 42400 }, { "epoch": 0.858565414507772, "grad_norm": 0.2659201920032501, "learning_rate": 1.4145482512953367e-05, "loss": 1.5694, "step": 42420 }, { "epoch": 0.8589702072538861, "grad_norm": 0.2592517137527466, "learning_rate": 1.4105003238341969e-05, "loss": 1.5729, "step": 42440 }, { "epoch": 0.859375, "grad_norm": 0.24245533347129822, "learning_rate": 1.4064523963730569e-05, "loss": 1.5624, "step": 42460 }, { "epoch": 0.8597797927461139, "grad_norm": 0.249851256608963, "learning_rate": 1.4024044689119172e-05, "loss": 1.5729, "step": 42480 }, { "epoch": 0.860184585492228, "grad_norm": 0.24155911803245544, "learning_rate": 1.3983565414507774e-05, "loss": 1.5657, "step": 42500 }, { "epoch": 0.8605893782383419, "grad_norm": 0.25153300166130066, "learning_rate": 1.3943086139896375e-05, "loss": 1.5743, "step": 42520 }, { "epoch": 0.860994170984456, "grad_norm": 0.25352907180786133, "learning_rate": 1.3902606865284975e-05, "loss": 1.5677, "step": 42540 }, { "epoch": 0.8613989637305699, "grad_norm": 0.2548029124736786, "learning_rate": 1.3862127590673577e-05, "loss": 1.5726, "step": 42560 }, { "epoch": 0.861803756476684, "grad_norm": 0.24728013575077057, "learning_rate": 1.3821648316062177e-05, "loss": 1.5707, "step": 42580 }, { "epoch": 0.8622085492227979, "grad_norm": 0.25442564487457275, "learning_rate": 1.3781169041450778e-05, "loss": 1.5711, "step": 42600 }, { "epoch": 0.8626133419689119, "grad_norm": 0.24865010380744934, "learning_rate": 1.374068976683938e-05, "loss": 1.5758, "step": 42620 }, { "epoch": 0.8630181347150259, "grad_norm": 0.2581830322742462, "learning_rate": 1.370021049222798e-05, "loss": 1.5702, "step": 42640 }, { "epoch": 0.8634229274611399, "grad_norm": 0.2572338581085205, "learning_rate": 1.3659731217616581e-05, "loss": 1.5793, "step": 42660 }, { "epoch": 0.8638277202072538, "grad_norm": 0.2523263096809387, "learning_rate": 1.3619251943005181e-05, "loss": 1.5787, "step": 42680 }, { "epoch": 0.8642325129533679, "grad_norm": 0.2665070593357086, "learning_rate": 1.3578772668393783e-05, "loss": 1.5767, "step": 42700 }, { "epoch": 0.8646373056994818, "grad_norm": 0.249988853931427, "learning_rate": 1.3538293393782383e-05, "loss": 1.5789, "step": 42720 }, { "epoch": 0.8650420984455959, "grad_norm": 0.25328001379966736, "learning_rate": 1.3497814119170984e-05, "loss": 1.5697, "step": 42740 }, { "epoch": 0.8654468911917098, "grad_norm": 0.24793829023838043, "learning_rate": 1.3457334844559586e-05, "loss": 1.5713, "step": 42760 }, { "epoch": 0.8658516839378239, "grad_norm": 0.26951175928115845, "learning_rate": 1.3416855569948186e-05, "loss": 1.5706, "step": 42780 }, { "epoch": 0.8662564766839378, "grad_norm": 0.24753917753696442, "learning_rate": 1.3376376295336788e-05, "loss": 1.5719, "step": 42800 }, { "epoch": 0.8666612694300518, "grad_norm": 0.265259325504303, "learning_rate": 1.3335897020725387e-05, "loss": 1.5726, "step": 42820 }, { "epoch": 0.8670660621761658, "grad_norm": 0.2545504868030548, "learning_rate": 1.3295417746113989e-05, "loss": 1.5778, "step": 42840 }, { "epoch": 0.8674708549222798, "grad_norm": 0.2629545331001282, "learning_rate": 1.3254938471502592e-05, "loss": 1.5738, "step": 42860 }, { "epoch": 0.8678756476683938, "grad_norm": 0.26589804887771606, "learning_rate": 1.3214459196891194e-05, "loss": 1.5748, "step": 42880 }, { "epoch": 0.8682804404145078, "grad_norm": 0.26281386613845825, "learning_rate": 1.3173979922279794e-05, "loss": 1.5712, "step": 42900 }, { "epoch": 0.8686852331606217, "grad_norm": 0.26535460352897644, "learning_rate": 1.3133500647668395e-05, "loss": 1.5767, "step": 42920 }, { "epoch": 0.8690900259067358, "grad_norm": 0.2690044343471527, "learning_rate": 1.3093021373056995e-05, "loss": 1.5747, "step": 42940 }, { "epoch": 0.8694948186528497, "grad_norm": 0.2625468373298645, "learning_rate": 1.3052542098445597e-05, "loss": 1.559, "step": 42960 }, { "epoch": 0.8698996113989638, "grad_norm": 0.26248112320899963, "learning_rate": 1.3012062823834199e-05, "loss": 1.5786, "step": 42980 }, { "epoch": 0.8703044041450777, "grad_norm": 0.25004419684410095, "learning_rate": 1.2971583549222798e-05, "loss": 1.5761, "step": 43000 }, { "epoch": 0.8707091968911918, "grad_norm": 0.2508049011230469, "learning_rate": 1.29311042746114e-05, "loss": 1.5683, "step": 43020 }, { "epoch": 0.8711139896373057, "grad_norm": 0.25043556094169617, "learning_rate": 1.2890625e-05, "loss": 1.5793, "step": 43040 }, { "epoch": 0.8715187823834197, "grad_norm": 0.25591766834259033, "learning_rate": 1.2850145725388602e-05, "loss": 1.5747, "step": 43060 }, { "epoch": 0.8719235751295337, "grad_norm": 0.259140282869339, "learning_rate": 1.2809666450777203e-05, "loss": 1.5704, "step": 43080 }, { "epoch": 0.8723283678756477, "grad_norm": 0.24748167395591736, "learning_rate": 1.2769187176165803e-05, "loss": 1.5741, "step": 43100 }, { "epoch": 0.8727331606217616, "grad_norm": 0.2624930441379547, "learning_rate": 1.2728707901554405e-05, "loss": 1.5777, "step": 43120 }, { "epoch": 0.8731379533678757, "grad_norm": 0.2581242620944977, "learning_rate": 1.2688228626943005e-05, "loss": 1.567, "step": 43140 }, { "epoch": 0.8735427461139896, "grad_norm": 0.2492588460445404, "learning_rate": 1.2647749352331606e-05, "loss": 1.5723, "step": 43160 }, { "epoch": 0.8739475388601037, "grad_norm": 0.24791285395622253, "learning_rate": 1.2607270077720206e-05, "loss": 1.5703, "step": 43180 }, { "epoch": 0.8743523316062176, "grad_norm": 0.2502272427082062, "learning_rate": 1.2566790803108808e-05, "loss": 1.5702, "step": 43200 }, { "epoch": 0.8747571243523317, "grad_norm": 0.25847890973091125, "learning_rate": 1.252631152849741e-05, "loss": 1.5722, "step": 43220 }, { "epoch": 0.8751619170984456, "grad_norm": 0.2571754455566406, "learning_rate": 1.248583225388601e-05, "loss": 1.573, "step": 43240 }, { "epoch": 0.8755667098445595, "grad_norm": 0.25300705432891846, "learning_rate": 1.2445352979274612e-05, "loss": 1.5671, "step": 43260 }, { "epoch": 0.8759715025906736, "grad_norm": 0.2617153525352478, "learning_rate": 1.2404873704663212e-05, "loss": 1.5715, "step": 43280 }, { "epoch": 0.8763762953367875, "grad_norm": 0.26475492119789124, "learning_rate": 1.2364394430051814e-05, "loss": 1.5731, "step": 43300 }, { "epoch": 0.8767810880829016, "grad_norm": 0.2681649625301361, "learning_rate": 1.2323915155440414e-05, "loss": 1.5755, "step": 43320 }, { "epoch": 0.8771858808290155, "grad_norm": 0.25988322496414185, "learning_rate": 1.2283435880829017e-05, "loss": 1.5725, "step": 43340 }, { "epoch": 0.8775906735751295, "grad_norm": 0.2503143548965454, "learning_rate": 1.2242956606217617e-05, "loss": 1.5628, "step": 43360 }, { "epoch": 0.8779954663212435, "grad_norm": 0.2501095235347748, "learning_rate": 1.2202477331606219e-05, "loss": 1.5668, "step": 43380 }, { "epoch": 0.8784002590673575, "grad_norm": 0.26063430309295654, "learning_rate": 1.216199805699482e-05, "loss": 1.5677, "step": 43400 }, { "epoch": 0.8788050518134715, "grad_norm": 0.24801862239837646, "learning_rate": 1.212151878238342e-05, "loss": 1.5741, "step": 43420 }, { "epoch": 0.8792098445595855, "grad_norm": 0.26549267768859863, "learning_rate": 1.2081039507772022e-05, "loss": 1.5739, "step": 43440 }, { "epoch": 0.8796146373056994, "grad_norm": 0.2610555589199066, "learning_rate": 1.2040560233160622e-05, "loss": 1.573, "step": 43460 }, { "epoch": 0.8800194300518135, "grad_norm": 0.24972783029079437, "learning_rate": 1.2000080958549223e-05, "loss": 1.5738, "step": 43480 }, { "epoch": 0.8804242227979274, "grad_norm": 0.26034054160118103, "learning_rate": 1.1959601683937823e-05, "loss": 1.5715, "step": 43500 }, { "epoch": 0.8808290155440415, "grad_norm": 0.25943461060523987, "learning_rate": 1.1919122409326425e-05, "loss": 1.5641, "step": 43520 }, { "epoch": 0.8812338082901554, "grad_norm": 0.257847398519516, "learning_rate": 1.1878643134715026e-05, "loss": 1.5703, "step": 43540 }, { "epoch": 0.8816386010362695, "grad_norm": 0.2553224265575409, "learning_rate": 1.1838163860103628e-05, "loss": 1.5694, "step": 43560 }, { "epoch": 0.8820433937823834, "grad_norm": 0.24387438595294952, "learning_rate": 1.179768458549223e-05, "loss": 1.5703, "step": 43580 }, { "epoch": 0.8824481865284974, "grad_norm": 0.25327038764953613, "learning_rate": 1.175720531088083e-05, "loss": 1.565, "step": 43600 }, { "epoch": 0.8828529792746114, "grad_norm": 0.2532143294811249, "learning_rate": 1.1716726036269431e-05, "loss": 1.5671, "step": 43620 }, { "epoch": 0.8832577720207254, "grad_norm": 0.25132057070732117, "learning_rate": 1.1676246761658031e-05, "loss": 1.5752, "step": 43640 }, { "epoch": 0.8836625647668394, "grad_norm": 0.26484185457229614, "learning_rate": 1.1635767487046633e-05, "loss": 1.5768, "step": 43660 }, { "epoch": 0.8840673575129534, "grad_norm": 0.251830130815506, "learning_rate": 1.1595288212435234e-05, "loss": 1.579, "step": 43680 }, { "epoch": 0.8844721502590673, "grad_norm": 0.2495918869972229, "learning_rate": 1.1554808937823834e-05, "loss": 1.5712, "step": 43700 }, { "epoch": 0.8848769430051814, "grad_norm": 0.25488904118537903, "learning_rate": 1.1514329663212437e-05, "loss": 1.569, "step": 43720 }, { "epoch": 0.8852817357512953, "grad_norm": 0.26040253043174744, "learning_rate": 1.1473850388601037e-05, "loss": 1.564, "step": 43740 }, { "epoch": 0.8856865284974094, "grad_norm": 0.26147425174713135, "learning_rate": 1.1433371113989639e-05, "loss": 1.5668, "step": 43760 }, { "epoch": 0.8860913212435233, "grad_norm": 0.25478029251098633, "learning_rate": 1.1392891839378239e-05, "loss": 1.5747, "step": 43780 }, { "epoch": 0.8864961139896373, "grad_norm": 0.2547585666179657, "learning_rate": 1.135241256476684e-05, "loss": 1.563, "step": 43800 }, { "epoch": 0.8869009067357513, "grad_norm": 0.27389389276504517, "learning_rate": 1.131193329015544e-05, "loss": 1.5726, "step": 43820 }, { "epoch": 0.8873056994818653, "grad_norm": 0.25377482175827026, "learning_rate": 1.1271454015544042e-05, "loss": 1.5628, "step": 43840 }, { "epoch": 0.8877104922279793, "grad_norm": 0.2554977834224701, "learning_rate": 1.1230974740932643e-05, "loss": 1.5697, "step": 43860 }, { "epoch": 0.8881152849740933, "grad_norm": 0.2486531287431717, "learning_rate": 1.1190495466321243e-05, "loss": 1.5743, "step": 43880 }, { "epoch": 0.8885200777202072, "grad_norm": 0.2578529715538025, "learning_rate": 1.1150016191709845e-05, "loss": 1.5781, "step": 43900 }, { "epoch": 0.8889248704663213, "grad_norm": 0.2525099813938141, "learning_rate": 1.1109536917098446e-05, "loss": 1.5696, "step": 43920 }, { "epoch": 0.8893296632124352, "grad_norm": 0.24904781579971313, "learning_rate": 1.1069057642487048e-05, "loss": 1.5697, "step": 43940 }, { "epoch": 0.8897344559585493, "grad_norm": 0.25235339999198914, "learning_rate": 1.1028578367875648e-05, "loss": 1.5693, "step": 43960 }, { "epoch": 0.8901392487046632, "grad_norm": 0.25065693259239197, "learning_rate": 1.098809909326425e-05, "loss": 1.5788, "step": 43980 }, { "epoch": 0.8905440414507773, "grad_norm": 0.254757285118103, "learning_rate": 1.094761981865285e-05, "loss": 1.5748, "step": 44000 }, { "epoch": 0.8909488341968912, "grad_norm": 0.25872156023979187, "learning_rate": 1.0907140544041451e-05, "loss": 1.5658, "step": 44020 }, { "epoch": 0.8913536269430051, "grad_norm": 0.2679365575313568, "learning_rate": 1.0866661269430053e-05, "loss": 1.579, "step": 44040 }, { "epoch": 0.8917584196891192, "grad_norm": 0.24792520701885223, "learning_rate": 1.0826181994818653e-05, "loss": 1.568, "step": 44060 }, { "epoch": 0.8921632124352331, "grad_norm": 0.24643781781196594, "learning_rate": 1.0785702720207254e-05, "loss": 1.5701, "step": 44080 }, { "epoch": 0.8925680051813472, "grad_norm": 0.2561403214931488, "learning_rate": 1.0745223445595854e-05, "loss": 1.5669, "step": 44100 }, { "epoch": 0.8929727979274611, "grad_norm": 0.24881517887115479, "learning_rate": 1.0704744170984457e-05, "loss": 1.5746, "step": 44120 }, { "epoch": 0.8933775906735751, "grad_norm": 0.24615344405174255, "learning_rate": 1.0664264896373057e-05, "loss": 1.5761, "step": 44140 }, { "epoch": 0.8937823834196891, "grad_norm": 0.26576095819473267, "learning_rate": 1.0623785621761659e-05, "loss": 1.5642, "step": 44160 }, { "epoch": 0.8941871761658031, "grad_norm": 0.24664047360420227, "learning_rate": 1.058330634715026e-05, "loss": 1.5689, "step": 44180 }, { "epoch": 0.8945919689119171, "grad_norm": 0.2521025538444519, "learning_rate": 1.054282707253886e-05, "loss": 1.5719, "step": 44200 }, { "epoch": 0.8949967616580311, "grad_norm": 0.2572341561317444, "learning_rate": 1.0502347797927462e-05, "loss": 1.577, "step": 44220 }, { "epoch": 0.895401554404145, "grad_norm": 0.2616499662399292, "learning_rate": 1.0461868523316062e-05, "loss": 1.5678, "step": 44240 }, { "epoch": 0.8958063471502591, "grad_norm": 0.2634851038455963, "learning_rate": 1.0421389248704663e-05, "loss": 1.5692, "step": 44260 }, { "epoch": 0.896211139896373, "grad_norm": 0.2540186643600464, "learning_rate": 1.0380909974093263e-05, "loss": 1.5712, "step": 44280 }, { "epoch": 0.8966159326424871, "grad_norm": 0.25947874784469604, "learning_rate": 1.0340430699481867e-05, "loss": 1.5591, "step": 44300 }, { "epoch": 0.897020725388601, "grad_norm": 0.2517952620983124, "learning_rate": 1.0299951424870467e-05, "loss": 1.5648, "step": 44320 }, { "epoch": 0.897425518134715, "grad_norm": 0.26228126883506775, "learning_rate": 1.0259472150259068e-05, "loss": 1.5675, "step": 44340 }, { "epoch": 0.897830310880829, "grad_norm": 0.24827389419078827, "learning_rate": 1.021899287564767e-05, "loss": 1.5734, "step": 44360 }, { "epoch": 0.898235103626943, "grad_norm": 0.2597552537918091, "learning_rate": 1.017851360103627e-05, "loss": 1.5699, "step": 44380 }, { "epoch": 0.898639896373057, "grad_norm": 0.2599639892578125, "learning_rate": 1.0138034326424871e-05, "loss": 1.5734, "step": 44400 }, { "epoch": 0.899044689119171, "grad_norm": 0.25166475772857666, "learning_rate": 1.0097555051813471e-05, "loss": 1.5758, "step": 44420 }, { "epoch": 0.899449481865285, "grad_norm": 0.26627975702285767, "learning_rate": 1.0057075777202073e-05, "loss": 1.5712, "step": 44440 }, { "epoch": 0.899854274611399, "grad_norm": 0.26282453536987305, "learning_rate": 1.0016596502590674e-05, "loss": 1.5724, "step": 44460 }, { "epoch": 0.9002590673575129, "grad_norm": 0.2582186758518219, "learning_rate": 9.976117227979274e-06, "loss": 1.5669, "step": 44480 }, { "epoch": 0.900663860103627, "grad_norm": 0.2574084997177124, "learning_rate": 9.935637953367878e-06, "loss": 1.581, "step": 44500 }, { "epoch": 0.9010686528497409, "grad_norm": 0.2510927617549896, "learning_rate": 9.895158678756477e-06, "loss": 1.5727, "step": 44520 }, { "epoch": 0.901473445595855, "grad_norm": 0.26533764600753784, "learning_rate": 9.854679404145079e-06, "loss": 1.5779, "step": 44540 }, { "epoch": 0.9018782383419689, "grad_norm": 0.2600981891155243, "learning_rate": 9.814200129533679e-06, "loss": 1.5756, "step": 44560 }, { "epoch": 0.9022830310880829, "grad_norm": 0.2670859694480896, "learning_rate": 9.77372085492228e-06, "loss": 1.574, "step": 44580 }, { "epoch": 0.9026878238341969, "grad_norm": 0.25412946939468384, "learning_rate": 9.73324158031088e-06, "loss": 1.5729, "step": 44600 }, { "epoch": 0.9030926165803109, "grad_norm": 0.2632431089878082, "learning_rate": 9.692762305699482e-06, "loss": 1.5715, "step": 44620 }, { "epoch": 0.9034974093264249, "grad_norm": 0.2566851079463959, "learning_rate": 9.652283031088084e-06, "loss": 1.5699, "step": 44640 }, { "epoch": 0.9039022020725389, "grad_norm": 0.2553281784057617, "learning_rate": 9.611803756476684e-06, "loss": 1.5698, "step": 44660 }, { "epoch": 0.9043069948186528, "grad_norm": 0.2514869272708893, "learning_rate": 9.571324481865285e-06, "loss": 1.573, "step": 44680 }, { "epoch": 0.9047117875647669, "grad_norm": 0.24543744325637817, "learning_rate": 9.530845207253887e-06, "loss": 1.5726, "step": 44700 }, { "epoch": 0.9051165803108808, "grad_norm": 0.25542789697647095, "learning_rate": 9.490365932642488e-06, "loss": 1.5769, "step": 44720 }, { "epoch": 0.9055213730569949, "grad_norm": 0.2611016035079956, "learning_rate": 9.449886658031088e-06, "loss": 1.5768, "step": 44740 }, { "epoch": 0.9059261658031088, "grad_norm": 0.2534278333187103, "learning_rate": 9.40940738341969e-06, "loss": 1.5659, "step": 44760 }, { "epoch": 0.9063309585492227, "grad_norm": 0.2554002106189728, "learning_rate": 9.36892810880829e-06, "loss": 1.5632, "step": 44780 }, { "epoch": 0.9067357512953368, "grad_norm": 0.2658659815788269, "learning_rate": 9.328448834196891e-06, "loss": 1.5549, "step": 44800 }, { "epoch": 0.9071405440414507, "grad_norm": 0.25480395555496216, "learning_rate": 9.287969559585493e-06, "loss": 1.5747, "step": 44820 }, { "epoch": 0.9075453367875648, "grad_norm": 0.25499290227890015, "learning_rate": 9.247490284974093e-06, "loss": 1.5767, "step": 44840 }, { "epoch": 0.9079501295336787, "grad_norm": 0.2625197470188141, "learning_rate": 9.207011010362694e-06, "loss": 1.568, "step": 44860 }, { "epoch": 0.9083549222797928, "grad_norm": 0.2555646002292633, "learning_rate": 9.166531735751296e-06, "loss": 1.563, "step": 44880 }, { "epoch": 0.9087597150259067, "grad_norm": 0.2409239411354065, "learning_rate": 9.126052461139898e-06, "loss": 1.5673, "step": 44900 }, { "epoch": 0.9091645077720207, "grad_norm": 0.26162832975387573, "learning_rate": 9.085573186528498e-06, "loss": 1.5672, "step": 44920 }, { "epoch": 0.9095693005181347, "grad_norm": 0.2609499990940094, "learning_rate": 9.045093911917099e-06, "loss": 1.5726, "step": 44940 }, { "epoch": 0.9099740932642487, "grad_norm": 0.2592770755290985, "learning_rate": 9.0046146373057e-06, "loss": 1.5789, "step": 44960 }, { "epoch": 0.9103788860103627, "grad_norm": 0.24817420542240143, "learning_rate": 8.9641353626943e-06, "loss": 1.5702, "step": 44980 }, { "epoch": 0.9107836787564767, "grad_norm": 0.25170964002609253, "learning_rate": 8.923656088082902e-06, "loss": 1.566, "step": 45000 }, { "epoch": 0.9111884715025906, "grad_norm": 0.25164875388145447, "learning_rate": 8.883176813471502e-06, "loss": 1.5579, "step": 45020 }, { "epoch": 0.9115932642487047, "grad_norm": 0.2549872100353241, "learning_rate": 8.842697538860104e-06, "loss": 1.5653, "step": 45040 }, { "epoch": 0.9119980569948186, "grad_norm": 0.25738486647605896, "learning_rate": 8.802218264248704e-06, "loss": 1.5659, "step": 45060 }, { "epoch": 0.9124028497409327, "grad_norm": 0.25675687193870544, "learning_rate": 8.761738989637307e-06, "loss": 1.5673, "step": 45080 }, { "epoch": 0.9128076424870466, "grad_norm": 0.2582915723323822, "learning_rate": 8.721259715025907e-06, "loss": 1.5729, "step": 45100 }, { "epoch": 0.9132124352331606, "grad_norm": 0.24318848550319672, "learning_rate": 8.680780440414508e-06, "loss": 1.5672, "step": 45120 }, { "epoch": 0.9136172279792746, "grad_norm": 0.25695088505744934, "learning_rate": 8.64030116580311e-06, "loss": 1.5721, "step": 45140 }, { "epoch": 0.9140220207253886, "grad_norm": 0.2553735673427582, "learning_rate": 8.59982189119171e-06, "loss": 1.5751, "step": 45160 }, { "epoch": 0.9144268134715026, "grad_norm": 0.2507034242153168, "learning_rate": 8.559342616580312e-06, "loss": 1.5755, "step": 45180 }, { "epoch": 0.9148316062176166, "grad_norm": 0.25583115220069885, "learning_rate": 8.518863341968911e-06, "loss": 1.5721, "step": 45200 }, { "epoch": 0.9152363989637305, "grad_norm": 0.25535279512405396, "learning_rate": 8.478384067357513e-06, "loss": 1.5688, "step": 45220 }, { "epoch": 0.9156411917098446, "grad_norm": 0.24842999875545502, "learning_rate": 8.437904792746115e-06, "loss": 1.5604, "step": 45240 }, { "epoch": 0.9160459844559585, "grad_norm": 0.24987834692001343, "learning_rate": 8.397425518134716e-06, "loss": 1.5724, "step": 45260 }, { "epoch": 0.9164507772020726, "grad_norm": 0.2582613229751587, "learning_rate": 8.356946243523318e-06, "loss": 1.5655, "step": 45280 }, { "epoch": 0.9168555699481865, "grad_norm": 0.2622925043106079, "learning_rate": 8.316466968911918e-06, "loss": 1.5689, "step": 45300 }, { "epoch": 0.9172603626943006, "grad_norm": 0.2615524232387543, "learning_rate": 8.27598769430052e-06, "loss": 1.564, "step": 45320 }, { "epoch": 0.9176651554404145, "grad_norm": 0.2646617293357849, "learning_rate": 8.23550841968912e-06, "loss": 1.5678, "step": 45340 }, { "epoch": 0.9180699481865285, "grad_norm": 0.26560425758361816, "learning_rate": 8.19502914507772e-06, "loss": 1.5703, "step": 45360 }, { "epoch": 0.9184747409326425, "grad_norm": 0.24932388961315155, "learning_rate": 8.15454987046632e-06, "loss": 1.5734, "step": 45380 }, { "epoch": 0.9188795336787565, "grad_norm": 0.2521463930606842, "learning_rate": 8.114070595854922e-06, "loss": 1.5735, "step": 45400 }, { "epoch": 0.9192843264248705, "grad_norm": 0.2641342878341675, "learning_rate": 8.073591321243524e-06, "loss": 1.574, "step": 45420 }, { "epoch": 0.9196891191709845, "grad_norm": 0.2832113206386566, "learning_rate": 8.033112046632124e-06, "loss": 1.5687, "step": 45440 }, { "epoch": 0.9200939119170984, "grad_norm": 0.261899471282959, "learning_rate": 7.992632772020727e-06, "loss": 1.5667, "step": 45460 }, { "epoch": 0.9204987046632125, "grad_norm": 0.26293420791625977, "learning_rate": 7.952153497409327e-06, "loss": 1.5705, "step": 45480 }, { "epoch": 0.9209034974093264, "grad_norm": 0.24554985761642456, "learning_rate": 7.911674222797929e-06, "loss": 1.568, "step": 45500 }, { "epoch": 0.9213082901554405, "grad_norm": 0.25262483954429626, "learning_rate": 7.871194948186529e-06, "loss": 1.5717, "step": 45520 }, { "epoch": 0.9217130829015544, "grad_norm": 0.2603040039539337, "learning_rate": 7.83071567357513e-06, "loss": 1.5684, "step": 45540 }, { "epoch": 0.9221178756476683, "grad_norm": 0.25271138548851013, "learning_rate": 7.79023639896373e-06, "loss": 1.5699, "step": 45560 }, { "epoch": 0.9225226683937824, "grad_norm": 0.25748467445373535, "learning_rate": 7.749757124352332e-06, "loss": 1.5701, "step": 45580 }, { "epoch": 0.9229274611398963, "grad_norm": 0.25635746121406555, "learning_rate": 7.709277849740933e-06, "loss": 1.5737, "step": 45600 }, { "epoch": 0.9233322538860104, "grad_norm": 0.26102736592292786, "learning_rate": 7.668798575129533e-06, "loss": 1.5661, "step": 45620 }, { "epoch": 0.9237370466321243, "grad_norm": 0.26892414689064026, "learning_rate": 7.628319300518135e-06, "loss": 1.5691, "step": 45640 }, { "epoch": 0.9241418393782384, "grad_norm": 0.25369420647621155, "learning_rate": 7.587840025906736e-06, "loss": 1.5751, "step": 45660 }, { "epoch": 0.9245466321243523, "grad_norm": 0.2804396450519562, "learning_rate": 7.547360751295338e-06, "loss": 1.57, "step": 45680 }, { "epoch": 0.9249514248704663, "grad_norm": 0.29719048738479614, "learning_rate": 7.506881476683939e-06, "loss": 1.5684, "step": 45700 }, { "epoch": 0.9253562176165803, "grad_norm": 0.2518305480480194, "learning_rate": 7.466402202072539e-06, "loss": 1.573, "step": 45720 }, { "epoch": 0.9257610103626943, "grad_norm": 0.26105576753616333, "learning_rate": 7.42592292746114e-06, "loss": 1.5729, "step": 45740 }, { "epoch": 0.9261658031088082, "grad_norm": 0.2631104588508606, "learning_rate": 7.385443652849741e-06, "loss": 1.5694, "step": 45760 }, { "epoch": 0.9265705958549223, "grad_norm": 0.255159467458725, "learning_rate": 7.344964378238342e-06, "loss": 1.5621, "step": 45780 }, { "epoch": 0.9269753886010362, "grad_norm": 0.2544821798801422, "learning_rate": 7.304485103626943e-06, "loss": 1.5753, "step": 45800 }, { "epoch": 0.9273801813471503, "grad_norm": 0.26334723830223083, "learning_rate": 7.264005829015544e-06, "loss": 1.5658, "step": 45820 }, { "epoch": 0.9277849740932642, "grad_norm": 0.25061649084091187, "learning_rate": 7.2235265544041465e-06, "loss": 1.5675, "step": 45840 }, { "epoch": 0.9281897668393783, "grad_norm": 0.26425278186798096, "learning_rate": 7.183047279792747e-06, "loss": 1.5795, "step": 45860 }, { "epoch": 0.9285945595854922, "grad_norm": 0.26492777466773987, "learning_rate": 7.142568005181348e-06, "loss": 1.5684, "step": 45880 }, { "epoch": 0.9289993523316062, "grad_norm": 0.2556169629096985, "learning_rate": 7.102088730569949e-06, "loss": 1.5805, "step": 45900 }, { "epoch": 0.9294041450777202, "grad_norm": 0.2563204765319824, "learning_rate": 7.0616094559585495e-06, "loss": 1.5632, "step": 45920 }, { "epoch": 0.9298089378238342, "grad_norm": 0.26142728328704834, "learning_rate": 7.02113018134715e-06, "loss": 1.5701, "step": 45940 }, { "epoch": 0.9302137305699482, "grad_norm": 0.24898861348628998, "learning_rate": 6.980650906735752e-06, "loss": 1.5715, "step": 45960 }, { "epoch": 0.9306185233160622, "grad_norm": 0.2772403359413147, "learning_rate": 6.9401716321243526e-06, "loss": 1.5682, "step": 45980 }, { "epoch": 0.9310233160621761, "grad_norm": 0.26016765832901, "learning_rate": 6.899692357512953e-06, "loss": 1.5739, "step": 46000 }, { "epoch": 0.9314281088082902, "grad_norm": 0.2573682367801666, "learning_rate": 6.859213082901554e-06, "loss": 1.575, "step": 46020 }, { "epoch": 0.9318329015544041, "grad_norm": 0.24973586201667786, "learning_rate": 6.8187338082901565e-06, "loss": 1.5663, "step": 46040 }, { "epoch": 0.9322376943005182, "grad_norm": 0.2562605142593384, "learning_rate": 6.778254533678757e-06, "loss": 1.563, "step": 46060 }, { "epoch": 0.9326424870466321, "grad_norm": 0.25460368394851685, "learning_rate": 6.737775259067358e-06, "loss": 1.5738, "step": 46080 }, { "epoch": 0.9330472797927462, "grad_norm": 0.253560870885849, "learning_rate": 6.697295984455959e-06, "loss": 1.5682, "step": 46100 }, { "epoch": 0.9334520725388601, "grad_norm": 0.2666437327861786, "learning_rate": 6.65681670984456e-06, "loss": 1.576, "step": 46120 }, { "epoch": 0.9338568652849741, "grad_norm": 0.24883325397968292, "learning_rate": 6.616337435233161e-06, "loss": 1.5673, "step": 46140 }, { "epoch": 0.9342616580310881, "grad_norm": 0.2677778899669647, "learning_rate": 6.575858160621762e-06, "loss": 1.57, "step": 46160 }, { "epoch": 0.9346664507772021, "grad_norm": 0.26080843806266785, "learning_rate": 6.535378886010363e-06, "loss": 1.5723, "step": 46180 }, { "epoch": 0.935071243523316, "grad_norm": 0.2689919173717499, "learning_rate": 6.494899611398963e-06, "loss": 1.5693, "step": 46200 }, { "epoch": 0.9354760362694301, "grad_norm": 0.2586071789264679, "learning_rate": 6.454420336787564e-06, "loss": 1.5631, "step": 46220 }, { "epoch": 0.935880829015544, "grad_norm": 0.2504340708255768, "learning_rate": 6.4139410621761665e-06, "loss": 1.5647, "step": 46240 }, { "epoch": 0.9362856217616581, "grad_norm": 0.2533526122570038, "learning_rate": 6.373461787564767e-06, "loss": 1.5637, "step": 46260 }, { "epoch": 0.936690414507772, "grad_norm": 0.25601011514663696, "learning_rate": 6.332982512953369e-06, "loss": 1.5724, "step": 46280 }, { "epoch": 0.9370952072538861, "grad_norm": 0.25052890181541443, "learning_rate": 6.29250323834197e-06, "loss": 1.5706, "step": 46300 }, { "epoch": 0.9375, "grad_norm": 0.25660181045532227, "learning_rate": 6.25202396373057e-06, "loss": 1.5805, "step": 46320 }, { "epoch": 0.9379047927461139, "grad_norm": 0.25800108909606934, "learning_rate": 6.211544689119171e-06, "loss": 1.5663, "step": 46340 }, { "epoch": 0.938309585492228, "grad_norm": 0.24977047741413116, "learning_rate": 6.171065414507772e-06, "loss": 1.5742, "step": 46360 }, { "epoch": 0.9387143782383419, "grad_norm": 0.2543341815471649, "learning_rate": 6.1305861398963735e-06, "loss": 1.5684, "step": 46380 }, { "epoch": 0.939119170984456, "grad_norm": 0.25037723779678345, "learning_rate": 6.090106865284974e-06, "loss": 1.5641, "step": 46400 }, { "epoch": 0.9395239637305699, "grad_norm": 0.26310402154922485, "learning_rate": 6.049627590673575e-06, "loss": 1.5663, "step": 46420 }, { "epoch": 0.939928756476684, "grad_norm": 0.27186834812164307, "learning_rate": 6.009148316062177e-06, "loss": 1.5677, "step": 46440 }, { "epoch": 0.9403335492227979, "grad_norm": 0.2580932080745697, "learning_rate": 5.968669041450777e-06, "loss": 1.5671, "step": 46460 }, { "epoch": 0.9407383419689119, "grad_norm": 0.2688298523426056, "learning_rate": 5.928189766839379e-06, "loss": 1.561, "step": 46480 }, { "epoch": 0.9411431347150259, "grad_norm": 0.2578883171081543, "learning_rate": 5.88771049222798e-06, "loss": 1.5662, "step": 46500 }, { "epoch": 0.9415479274611399, "grad_norm": 0.25857794284820557, "learning_rate": 5.8472312176165804e-06, "loss": 1.5692, "step": 46520 }, { "epoch": 0.9419527202072538, "grad_norm": 0.24729087948799133, "learning_rate": 5.806751943005181e-06, "loss": 1.5739, "step": 46540 }, { "epoch": 0.9423575129533679, "grad_norm": 0.2541347146034241, "learning_rate": 5.766272668393782e-06, "loss": 1.5755, "step": 46560 }, { "epoch": 0.9427623056994818, "grad_norm": 0.2509617507457733, "learning_rate": 5.7257933937823835e-06, "loss": 1.5655, "step": 46580 }, { "epoch": 0.9431670984455959, "grad_norm": 0.2604050636291504, "learning_rate": 5.685314119170985e-06, "loss": 1.5636, "step": 46600 }, { "epoch": 0.9435718911917098, "grad_norm": 0.2565382122993469, "learning_rate": 5.644834844559586e-06, "loss": 1.5722, "step": 46620 }, { "epoch": 0.9439766839378239, "grad_norm": 0.24961328506469727, "learning_rate": 5.604355569948187e-06, "loss": 1.5708, "step": 46640 }, { "epoch": 0.9443814766839378, "grad_norm": 0.2562832236289978, "learning_rate": 5.563876295336788e-06, "loss": 1.5683, "step": 46660 }, { "epoch": 0.9447862694300518, "grad_norm": 0.261081725358963, "learning_rate": 5.523397020725389e-06, "loss": 1.57, "step": 46680 }, { "epoch": 0.9451910621761658, "grad_norm": 0.2644845247268677, "learning_rate": 5.48291774611399e-06, "loss": 1.5706, "step": 46700 }, { "epoch": 0.9455958549222798, "grad_norm": 0.2499408721923828, "learning_rate": 5.4424384715025905e-06, "loss": 1.5674, "step": 46720 }, { "epoch": 0.9460006476683938, "grad_norm": 0.2633872628211975, "learning_rate": 5.401959196891192e-06, "loss": 1.567, "step": 46740 }, { "epoch": 0.9464054404145078, "grad_norm": 0.2516401708126068, "learning_rate": 5.361479922279794e-06, "loss": 1.5721, "step": 46760 }, { "epoch": 0.9468102331606217, "grad_norm": 0.2555289566516876, "learning_rate": 5.321000647668394e-06, "loss": 1.5683, "step": 46780 }, { "epoch": 0.9472150259067358, "grad_norm": 0.26316767930984497, "learning_rate": 5.280521373056995e-06, "loss": 1.5678, "step": 46800 }, { "epoch": 0.9476198186528497, "grad_norm": 0.2566683292388916, "learning_rate": 5.240042098445596e-06, "loss": 1.5675, "step": 46820 }, { "epoch": 0.9480246113989638, "grad_norm": 0.25119417905807495, "learning_rate": 5.199562823834197e-06, "loss": 1.5677, "step": 46840 }, { "epoch": 0.9484294041450777, "grad_norm": 0.26426294445991516, "learning_rate": 5.159083549222798e-06, "loss": 1.5696, "step": 46860 }, { "epoch": 0.9488341968911918, "grad_norm": 0.25833362340927124, "learning_rate": 5.118604274611399e-06, "loss": 1.5796, "step": 46880 }, { "epoch": 0.9492389896373057, "grad_norm": 0.25570693612098694, "learning_rate": 5.078125000000001e-06, "loss": 1.5581, "step": 46900 }, { "epoch": 0.9496437823834197, "grad_norm": 0.2591635286808014, "learning_rate": 5.037645725388601e-06, "loss": 1.5673, "step": 46920 }, { "epoch": 0.9500485751295337, "grad_norm": 0.2678343951702118, "learning_rate": 4.997166450777202e-06, "loss": 1.5655, "step": 46940 }, { "epoch": 0.9504533678756477, "grad_norm": 0.2628271281719208, "learning_rate": 4.956687176165804e-06, "loss": 1.5765, "step": 46960 }, { "epoch": 0.9508581606217616, "grad_norm": 0.26003652811050415, "learning_rate": 4.9162079015544045e-06, "loss": 1.5742, "step": 46980 }, { "epoch": 0.9512629533678757, "grad_norm": 0.2604547142982483, "learning_rate": 4.875728626943005e-06, "loss": 1.5694, "step": 47000 }, { "epoch": 0.9516677461139896, "grad_norm": 0.24819648265838623, "learning_rate": 4.835249352331606e-06, "loss": 1.5729, "step": 47020 }, { "epoch": 0.9520725388601037, "grad_norm": 0.25938495993614197, "learning_rate": 4.7947700777202076e-06, "loss": 1.5682, "step": 47040 }, { "epoch": 0.9524773316062176, "grad_norm": 0.261831134557724, "learning_rate": 4.754290803108809e-06, "loss": 1.5709, "step": 47060 }, { "epoch": 0.9528821243523317, "grad_norm": 0.26270338892936707, "learning_rate": 4.71381152849741e-06, "loss": 1.5764, "step": 47080 }, { "epoch": 0.9532869170984456, "grad_norm": 0.2503334581851959, "learning_rate": 4.673332253886011e-06, "loss": 1.5667, "step": 47100 }, { "epoch": 0.9536917098445595, "grad_norm": 0.24429410696029663, "learning_rate": 4.632852979274611e-06, "loss": 1.5609, "step": 47120 }, { "epoch": 0.9540965025906736, "grad_norm": 0.2654910683631897, "learning_rate": 4.592373704663212e-06, "loss": 1.5767, "step": 47140 }, { "epoch": 0.9545012953367875, "grad_norm": 0.2683030366897583, "learning_rate": 4.551894430051814e-06, "loss": 1.5724, "step": 47160 }, { "epoch": 0.9549060880829016, "grad_norm": 0.25525471568107605, "learning_rate": 4.5114151554404145e-06, "loss": 1.5698, "step": 47180 }, { "epoch": 0.9553108808290155, "grad_norm": 0.2625405788421631, "learning_rate": 4.470935880829016e-06, "loss": 1.5769, "step": 47200 }, { "epoch": 0.9557156735751295, "grad_norm": 0.2504134178161621, "learning_rate": 4.430456606217617e-06, "loss": 1.5683, "step": 47220 }, { "epoch": 0.9561204663212435, "grad_norm": 0.25180014967918396, "learning_rate": 4.3899773316062184e-06, "loss": 1.5648, "step": 47240 }, { "epoch": 0.9565252590673575, "grad_norm": 0.26126623153686523, "learning_rate": 4.349498056994819e-06, "loss": 1.5661, "step": 47260 }, { "epoch": 0.9569300518134715, "grad_norm": 0.24719549715518951, "learning_rate": 4.30901878238342e-06, "loss": 1.5704, "step": 47280 }, { "epoch": 0.9573348445595855, "grad_norm": 0.2578175663948059, "learning_rate": 4.268539507772021e-06, "loss": 1.5534, "step": 47300 }, { "epoch": 0.9577396373056994, "grad_norm": 0.24988719820976257, "learning_rate": 4.2280602331606214e-06, "loss": 1.5693, "step": 47320 }, { "epoch": 0.9581444300518135, "grad_norm": 0.26286858320236206, "learning_rate": 4.187580958549223e-06, "loss": 1.5728, "step": 47340 }, { "epoch": 0.9585492227979274, "grad_norm": 0.24987630546092987, "learning_rate": 4.147101683937824e-06, "loss": 1.5714, "step": 47360 }, { "epoch": 0.9589540155440415, "grad_norm": 0.2595711648464203, "learning_rate": 4.106622409326425e-06, "loss": 1.5775, "step": 47380 }, { "epoch": 0.9593588082901554, "grad_norm": 0.2491418719291687, "learning_rate": 4.066143134715026e-06, "loss": 1.5723, "step": 47400 }, { "epoch": 0.9597636010362695, "grad_norm": 0.26589274406433105, "learning_rate": 4.025663860103627e-06, "loss": 1.5737, "step": 47420 }, { "epoch": 0.9601683937823834, "grad_norm": 0.2483764886856079, "learning_rate": 3.9851845854922285e-06, "loss": 1.5706, "step": 47440 }, { "epoch": 0.9605731865284974, "grad_norm": 0.24740555882453918, "learning_rate": 3.944705310880829e-06, "loss": 1.5656, "step": 47460 }, { "epoch": 0.9609779792746114, "grad_norm": 0.2504119575023651, "learning_rate": 3.90422603626943e-06, "loss": 1.5664, "step": 47480 }, { "epoch": 0.9613827720207254, "grad_norm": 0.2587319314479828, "learning_rate": 3.863746761658031e-06, "loss": 1.5674, "step": 47500 }, { "epoch": 0.9617875647668394, "grad_norm": 0.2511790096759796, "learning_rate": 3.823267487046632e-06, "loss": 1.5704, "step": 47520 }, { "epoch": 0.9621923575129534, "grad_norm": 0.25409796833992004, "learning_rate": 3.7827882124352335e-06, "loss": 1.5714, "step": 47540 }, { "epoch": 0.9625971502590673, "grad_norm": 0.2624165713787079, "learning_rate": 3.7423089378238347e-06, "loss": 1.5591, "step": 47560 }, { "epoch": 0.9630019430051814, "grad_norm": 0.26142439246177673, "learning_rate": 3.7018296632124354e-06, "loss": 1.5693, "step": 47580 }, { "epoch": 0.9634067357512953, "grad_norm": 0.2503470778465271, "learning_rate": 3.661350388601036e-06, "loss": 1.561, "step": 47600 }, { "epoch": 0.9638115284974094, "grad_norm": 0.25816863775253296, "learning_rate": 3.6208711139896374e-06, "loss": 1.5757, "step": 47620 }, { "epoch": 0.9642163212435233, "grad_norm": 0.25450074672698975, "learning_rate": 3.5803918393782385e-06, "loss": 1.5631, "step": 47640 }, { "epoch": 0.9646211139896373, "grad_norm": 0.25450724363327026, "learning_rate": 3.5399125647668397e-06, "loss": 1.5566, "step": 47660 }, { "epoch": 0.9650259067357513, "grad_norm": 0.25086233019828796, "learning_rate": 3.4994332901554405e-06, "loss": 1.5761, "step": 47680 }, { "epoch": 0.9654306994818653, "grad_norm": 0.2633439004421234, "learning_rate": 3.458954015544041e-06, "loss": 1.5708, "step": 47700 }, { "epoch": 0.9658354922279793, "grad_norm": 0.24674391746520996, "learning_rate": 3.4184747409326424e-06, "loss": 1.5742, "step": 47720 }, { "epoch": 0.9662402849740933, "grad_norm": 0.25820380449295044, "learning_rate": 3.377995466321244e-06, "loss": 1.5769, "step": 47740 }, { "epoch": 0.9666450777202072, "grad_norm": 0.2625579237937927, "learning_rate": 3.3375161917098447e-06, "loss": 1.5712, "step": 47760 }, { "epoch": 0.9670498704663213, "grad_norm": 0.26158276200294495, "learning_rate": 3.2970369170984455e-06, "loss": 1.567, "step": 47780 }, { "epoch": 0.9674546632124352, "grad_norm": 0.25013846158981323, "learning_rate": 3.2565576424870467e-06, "loss": 1.566, "step": 47800 }, { "epoch": 0.9678594559585493, "grad_norm": 0.24471288919448853, "learning_rate": 3.2160783678756483e-06, "loss": 1.5601, "step": 47820 }, { "epoch": 0.9682642487046632, "grad_norm": 0.2658553719520569, "learning_rate": 3.175599093264249e-06, "loss": 1.572, "step": 47840 }, { "epoch": 0.9686690414507773, "grad_norm": 0.255751371383667, "learning_rate": 3.1351198186528498e-06, "loss": 1.5665, "step": 47860 }, { "epoch": 0.9690738341968912, "grad_norm": 0.26612791419029236, "learning_rate": 3.094640544041451e-06, "loss": 1.5657, "step": 47880 }, { "epoch": 0.9694786269430051, "grad_norm": 0.2641318440437317, "learning_rate": 3.054161269430052e-06, "loss": 1.5617, "step": 47900 }, { "epoch": 0.9698834196891192, "grad_norm": 0.2637476921081543, "learning_rate": 3.013681994818653e-06, "loss": 1.5639, "step": 47920 }, { "epoch": 0.9702882124352331, "grad_norm": 0.25428497791290283, "learning_rate": 2.973202720207254e-06, "loss": 1.5743, "step": 47940 }, { "epoch": 0.9706930051813472, "grad_norm": 0.24614189565181732, "learning_rate": 2.932723445595855e-06, "loss": 1.5696, "step": 47960 }, { "epoch": 0.9710977979274611, "grad_norm": 0.25745895504951477, "learning_rate": 2.892244170984456e-06, "loss": 1.5637, "step": 47980 }, { "epoch": 0.9715025906735751, "grad_norm": 0.25736236572265625, "learning_rate": 2.851764896373057e-06, "loss": 1.5775, "step": 48000 }, { "epoch": 0.9719073834196891, "grad_norm": 0.25263646245002747, "learning_rate": 2.8112856217616583e-06, "loss": 1.5719, "step": 48020 }, { "epoch": 0.9723121761658031, "grad_norm": 0.25840088725090027, "learning_rate": 2.7708063471502595e-06, "loss": 1.565, "step": 48040 }, { "epoch": 0.9727169689119171, "grad_norm": 0.2507283091545105, "learning_rate": 2.7303270725388602e-06, "loss": 1.5721, "step": 48060 }, { "epoch": 0.9731217616580311, "grad_norm": 0.2571468949317932, "learning_rate": 2.689847797927461e-06, "loss": 1.5693, "step": 48080 }, { "epoch": 0.973526554404145, "grad_norm": 0.2520105838775635, "learning_rate": 2.649368523316062e-06, "loss": 1.5681, "step": 48100 }, { "epoch": 0.9739313471502591, "grad_norm": 0.2495371550321579, "learning_rate": 2.6088892487046633e-06, "loss": 1.5674, "step": 48120 }, { "epoch": 0.974336139896373, "grad_norm": 0.25861841440200806, "learning_rate": 2.5684099740932645e-06, "loss": 1.5656, "step": 48140 }, { "epoch": 0.9747409326424871, "grad_norm": 0.26183152198791504, "learning_rate": 2.5279306994818652e-06, "loss": 1.5694, "step": 48160 }, { "epoch": 0.975145725388601, "grad_norm": 0.2455419898033142, "learning_rate": 2.4874514248704664e-06, "loss": 1.5784, "step": 48180 }, { "epoch": 0.975550518134715, "grad_norm": 0.24733096361160278, "learning_rate": 2.4469721502590676e-06, "loss": 1.5732, "step": 48200 }, { "epoch": 0.975955310880829, "grad_norm": 0.2550254166126251, "learning_rate": 2.4064928756476683e-06, "loss": 1.5691, "step": 48220 }, { "epoch": 0.976360103626943, "grad_norm": 0.2582331597805023, "learning_rate": 2.3660136010362695e-06, "loss": 1.5749, "step": 48240 }, { "epoch": 0.976764896373057, "grad_norm": 0.2566150426864624, "learning_rate": 2.3255343264248707e-06, "loss": 1.5738, "step": 48260 }, { "epoch": 0.977169689119171, "grad_norm": 0.25311756134033203, "learning_rate": 2.285055051813472e-06, "loss": 1.5728, "step": 48280 }, { "epoch": 0.977574481865285, "grad_norm": 0.25643831491470337, "learning_rate": 2.2445757772020726e-06, "loss": 1.5674, "step": 48300 }, { "epoch": 0.977979274611399, "grad_norm": 0.2478354275226593, "learning_rate": 2.2040965025906734e-06, "loss": 1.5707, "step": 48320 }, { "epoch": 0.9783840673575129, "grad_norm": 0.25191354751586914, "learning_rate": 2.163617227979275e-06, "loss": 1.5775, "step": 48340 }, { "epoch": 0.978788860103627, "grad_norm": 0.2705230712890625, "learning_rate": 2.1231379533678757e-06, "loss": 1.5696, "step": 48360 }, { "epoch": 0.9791936528497409, "grad_norm": 0.25326859951019287, "learning_rate": 2.082658678756477e-06, "loss": 1.5707, "step": 48380 }, { "epoch": 0.979598445595855, "grad_norm": 0.2560963034629822, "learning_rate": 2.0421794041450776e-06, "loss": 1.5583, "step": 48400 }, { "epoch": 0.9800032383419689, "grad_norm": 0.24979490041732788, "learning_rate": 2.0017001295336792e-06, "loss": 1.5713, "step": 48420 }, { "epoch": 0.9804080310880829, "grad_norm": 0.2558477520942688, "learning_rate": 1.96122085492228e-06, "loss": 1.5659, "step": 48440 }, { "epoch": 0.9808128238341969, "grad_norm": 0.2632245123386383, "learning_rate": 1.9207415803108807e-06, "loss": 1.5672, "step": 48460 }, { "epoch": 0.9812176165803109, "grad_norm": 0.2562946081161499, "learning_rate": 1.8802623056994821e-06, "loss": 1.5677, "step": 48480 }, { "epoch": 0.9816224093264249, "grad_norm": 0.2557205557823181, "learning_rate": 1.8397830310880829e-06, "loss": 1.5671, "step": 48500 }, { "epoch": 0.9820272020725389, "grad_norm": 0.2549383342266083, "learning_rate": 1.7993037564766842e-06, "loss": 1.5696, "step": 48520 }, { "epoch": 0.9824319948186528, "grad_norm": 0.254589706659317, "learning_rate": 1.758824481865285e-06, "loss": 1.5675, "step": 48540 }, { "epoch": 0.9828367875647669, "grad_norm": 0.2687326967716217, "learning_rate": 1.718345207253886e-06, "loss": 1.5668, "step": 48560 }, { "epoch": 0.9832415803108808, "grad_norm": 0.25105705857276917, "learning_rate": 1.6778659326424871e-06, "loss": 1.5727, "step": 48580 }, { "epoch": 0.9836463730569949, "grad_norm": 0.24815461039543152, "learning_rate": 1.637386658031088e-06, "loss": 1.5733, "step": 48600 }, { "epoch": 0.9840511658031088, "grad_norm": 0.25263941287994385, "learning_rate": 1.5969073834196893e-06, "loss": 1.5748, "step": 48620 }, { "epoch": 0.9844559585492227, "grad_norm": 0.2521785497665405, "learning_rate": 1.5564281088082902e-06, "loss": 1.5677, "step": 48640 }, { "epoch": 0.9848607512953368, "grad_norm": 0.2585909366607666, "learning_rate": 1.5159488341968912e-06, "loss": 1.5706, "step": 48660 }, { "epoch": 0.9852655440414507, "grad_norm": 0.25049838423728943, "learning_rate": 1.4754695595854924e-06, "loss": 1.5636, "step": 48680 }, { "epoch": 0.9856703367875648, "grad_norm": 0.25149422883987427, "learning_rate": 1.4349902849740933e-06, "loss": 1.5739, "step": 48700 }, { "epoch": 0.9860751295336787, "grad_norm": 0.26695874333381653, "learning_rate": 1.3945110103626943e-06, "loss": 1.5698, "step": 48720 }, { "epoch": 0.9864799222797928, "grad_norm": 0.2473231554031372, "learning_rate": 1.3540317357512953e-06, "loss": 1.5711, "step": 48740 }, { "epoch": 0.9868847150259067, "grad_norm": 0.24454140663146973, "learning_rate": 1.3135524611398964e-06, "loss": 1.5645, "step": 48760 }, { "epoch": 0.9872895077720207, "grad_norm": 0.2444067746400833, "learning_rate": 1.2730731865284974e-06, "loss": 1.5643, "step": 48780 }, { "epoch": 0.9876943005181347, "grad_norm": 0.26351621747016907, "learning_rate": 1.2325939119170986e-06, "loss": 1.5743, "step": 48800 }, { "epoch": 0.9880990932642487, "grad_norm": 0.25004157423973083, "learning_rate": 1.1921146373056995e-06, "loss": 1.5677, "step": 48820 }, { "epoch": 0.9885038860103627, "grad_norm": 0.24657028913497925, "learning_rate": 1.1516353626943005e-06, "loss": 1.5638, "step": 48840 }, { "epoch": 0.9889086787564767, "grad_norm": 0.2563260793685913, "learning_rate": 1.1111560880829017e-06, "loss": 1.5733, "step": 48860 }, { "epoch": 0.9893134715025906, "grad_norm": 0.2587776780128479, "learning_rate": 1.0706768134715026e-06, "loss": 1.5727, "step": 48880 }, { "epoch": 0.9897182642487047, "grad_norm": 0.2526193857192993, "learning_rate": 1.0301975388601038e-06, "loss": 1.5634, "step": 48900 }, { "epoch": 0.9901230569948186, "grad_norm": 0.2605911195278168, "learning_rate": 9.897182642487048e-07, "loss": 1.5737, "step": 48920 }, { "epoch": 0.9905278497409327, "grad_norm": 0.27009183168411255, "learning_rate": 9.492389896373058e-07, "loss": 1.5729, "step": 48940 }, { "epoch": 0.9909326424870466, "grad_norm": 0.24691690504550934, "learning_rate": 9.087597150259067e-07, "loss": 1.564, "step": 48960 }, { "epoch": 0.9913374352331606, "grad_norm": 0.2555922865867615, "learning_rate": 8.682804404145077e-07, "loss": 1.5685, "step": 48980 }, { "epoch": 0.9917422279792746, "grad_norm": 0.2585732340812683, "learning_rate": 8.278011658031088e-07, "loss": 1.5704, "step": 49000 }, { "epoch": 0.9921470207253886, "grad_norm": 0.2669827342033386, "learning_rate": 7.873218911917099e-07, "loss": 1.5667, "step": 49020 }, { "epoch": 0.9925518134715026, "grad_norm": 0.2532013952732086, "learning_rate": 7.468426165803108e-07, "loss": 1.5695, "step": 49040 }, { "epoch": 0.9929566062176166, "grad_norm": 0.25732988119125366, "learning_rate": 7.063633419689119e-07, "loss": 1.5679, "step": 49060 }, { "epoch": 0.9933613989637305, "grad_norm": 0.2705577313899994, "learning_rate": 6.65884067357513e-07, "loss": 1.5726, "step": 49080 }, { "epoch": 0.9937661917098446, "grad_norm": 0.26147714257240295, "learning_rate": 6.254047927461139e-07, "loss": 1.5641, "step": 49100 }, { "epoch": 0.9941709844559585, "grad_norm": 0.261763334274292, "learning_rate": 5.84925518134715e-07, "loss": 1.5675, "step": 49120 }, { "epoch": 0.9945757772020726, "grad_norm": 0.2579801380634308, "learning_rate": 5.444462435233161e-07, "loss": 1.5812, "step": 49140 }, { "epoch": 0.9949805699481865, "grad_norm": 0.24958565831184387, "learning_rate": 5.03966968911917e-07, "loss": 1.5754, "step": 49160 }, { "epoch": 0.9953853626943006, "grad_norm": 0.25848251581192017, "learning_rate": 4.6348769430051816e-07, "loss": 1.5607, "step": 49180 }, { "epoch": 0.9957901554404145, "grad_norm": 0.2536344826221466, "learning_rate": 4.2300841968911923e-07, "loss": 1.5582, "step": 49200 }, { "epoch": 0.9961949481865285, "grad_norm": 0.26069629192352295, "learning_rate": 3.8252914507772024e-07, "loss": 1.5744, "step": 49220 }, { "epoch": 0.9965997409326425, "grad_norm": 0.26701316237449646, "learning_rate": 3.4204987046632126e-07, "loss": 1.5692, "step": 49240 }, { "epoch": 0.9970045336787565, "grad_norm": 0.2489342838525772, "learning_rate": 3.0157059585492227e-07, "loss": 1.5652, "step": 49260 }, { "epoch": 0.9974093264248705, "grad_norm": 0.24794739484786987, "learning_rate": 2.6109132124352334e-07, "loss": 1.568, "step": 49280 }, { "epoch": 0.9978141191709845, "grad_norm": 0.2597905993461609, "learning_rate": 2.2061204663212436e-07, "loss": 1.5683, "step": 49300 }, { "epoch": 0.9982189119170984, "grad_norm": 0.2682546377182007, "learning_rate": 1.801327720207254e-07, "loss": 1.5731, "step": 49320 }, { "epoch": 0.9986237046632125, "grad_norm": 0.26443028450012207, "learning_rate": 1.3965349740932644e-07, "loss": 1.5737, "step": 49340 }, { "epoch": 0.9990284974093264, "grad_norm": 0.267911434173584, "learning_rate": 9.917422279792747e-08, "loss": 1.5706, "step": 49360 }, { "epoch": 0.9994332901554405, "grad_norm": 0.26001212000846863, "learning_rate": 5.86949481865285e-08, "loss": 1.5688, "step": 49380 }, { "epoch": 0.9998380829015544, "grad_norm": 0.25613394379615784, "learning_rate": 1.8215673575129535e-08, "loss": 1.5637, "step": 49400 }, { "epoch": 1.0, "step": 49408, "total_flos": 1.584664034112281e+19, "train_loss": 0.2990152974241447, "train_runtime": 9477.5808, "train_samples_per_second": 750.683, "train_steps_per_second": 5.213 } ], "logging_steps": 20, "max_steps": 49408, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.584664034112281e+19, "train_batch_size": 24, "trial_name": null, "trial_params": null }