DescribeEarth / trainer_state.json
Andrew0425's picture
Upload 14 files
7d46f29 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999383363137448,
"eval_steps": 500,
"global_step": 8108,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006166368625516433,
"grad_norm": 16.689239750860494,
"learning_rate": 4.0983606557377046e-08,
"loss": 1.618,
"step": 50
},
{
"epoch": 0.012332737251032866,
"grad_norm": 17.403265975850882,
"learning_rate": 8.196721311475409e-08,
"loss": 1.6045,
"step": 100
},
{
"epoch": 0.0184991058765493,
"grad_norm": 14.749335037473138,
"learning_rate": 1.2295081967213116e-07,
"loss": 1.5032,
"step": 150
},
{
"epoch": 0.024665474502065732,
"grad_norm": 8.133780120015619,
"learning_rate": 1.6393442622950818e-07,
"loss": 1.3224,
"step": 200
},
{
"epoch": 0.030831843127582168,
"grad_norm": 5.828403504971832,
"learning_rate": 1.9999971273346704e-07,
"loss": 1.1032,
"step": 250
},
{
"epoch": 0.0369982117530986,
"grad_norm": 3.9432862715914587,
"learning_rate": 1.9997497692480678e-07,
"loss": 0.9673,
"step": 300
},
{
"epoch": 0.043164580378615036,
"grad_norm": 3.4359051761562025,
"learning_rate": 1.9991035427741063e-07,
"loss": 0.8524,
"step": 350
},
{
"epoch": 0.049330949004131465,
"grad_norm": 3.058893644982214,
"learning_rate": 1.9980587057366126e-07,
"loss": 0.7977,
"step": 400
},
{
"epoch": 0.0554973176296479,
"grad_norm": 2.780325971119289,
"learning_rate": 1.9966156749923613e-07,
"loss": 0.7693,
"step": 450
},
{
"epoch": 0.061663686255164336,
"grad_norm": 2.9531788670760784,
"learning_rate": 1.994775026264762e-07,
"loss": 0.7432,
"step": 500
},
{
"epoch": 0.06783005488068077,
"grad_norm": 3.2391609392351692,
"learning_rate": 1.9925374939141637e-07,
"loss": 0.7299,
"step": 550
},
{
"epoch": 0.0739964235061972,
"grad_norm": 2.963688989960738,
"learning_rate": 1.9899039706448692e-07,
"loss": 0.7002,
"step": 600
},
{
"epoch": 0.08016279213171364,
"grad_norm": 2.880854595097375,
"learning_rate": 1.9868755071489728e-07,
"loss": 0.6761,
"step": 650
},
{
"epoch": 0.08632916075723007,
"grad_norm": 2.6331858272282904,
"learning_rate": 1.98345331168717e-07,
"loss": 0.6753,
"step": 700
},
{
"epoch": 0.0924955293827465,
"grad_norm": 3.166081780603538,
"learning_rate": 1.9796387496066975e-07,
"loss": 0.6627,
"step": 750
},
{
"epoch": 0.09866189800826293,
"grad_norm": 3.2004691707941215,
"learning_rate": 1.975433342796604e-07,
"loss": 0.6398,
"step": 800
},
{
"epoch": 0.10482826663377937,
"grad_norm": 3.4439116163641534,
"learning_rate": 1.9708387690805658e-07,
"loss": 0.643,
"step": 850
},
{
"epoch": 0.1109946352592958,
"grad_norm": 2.7959822715068237,
"learning_rate": 1.965856861547486e-07,
"loss": 0.6299,
"step": 900
},
{
"epoch": 0.11716100388481224,
"grad_norm": 2.8720786800067133,
"learning_rate": 1.960489607820153e-07,
"loss": 0.6156,
"step": 950
},
{
"epoch": 0.12332737251032867,
"grad_norm": 2.8998981058217512,
"learning_rate": 1.9547391492622407e-07,
"loss": 0.6045,
"step": 1000
},
{
"epoch": 0.1294937411358451,
"grad_norm": 2.937285416581705,
"learning_rate": 1.9486077801239723e-07,
"loss": 0.604,
"step": 1050
},
{
"epoch": 0.13566010976136153,
"grad_norm": 2.9265674908029258,
"learning_rate": 1.9420979466267888e-07,
"loss": 0.5918,
"step": 1100
},
{
"epoch": 0.14182647838687798,
"grad_norm": 3.1223514523834224,
"learning_rate": 1.9352122459873818e-07,
"loss": 0.5857,
"step": 1150
},
{
"epoch": 0.1479928470123944,
"grad_norm": 2.991244269539233,
"learning_rate": 1.9279534253814899e-07,
"loss": 0.5797,
"step": 1200
},
{
"epoch": 0.15415921563791082,
"grad_norm": 2.9531460521405313,
"learning_rate": 1.9203243808478597e-07,
"loss": 0.583,
"step": 1250
},
{
"epoch": 0.16032558426342727,
"grad_norm": 2.9620910098760174,
"learning_rate": 1.9123281561328205e-07,
"loss": 0.5647,
"step": 1300
},
{
"epoch": 0.1664919528889437,
"grad_norm": 2.8183125229693333,
"learning_rate": 1.9039679414759247e-07,
"loss": 0.5675,
"step": 1350
},
{
"epoch": 0.17265832151446014,
"grad_norm": 3.029807143662261,
"learning_rate": 1.8952470723371465e-07,
"loss": 0.5669,
"step": 1400
},
{
"epoch": 0.17882469013997657,
"grad_norm": 3.323729247650118,
"learning_rate": 1.886169028066135e-07,
"loss": 0.5579,
"step": 1450
},
{
"epoch": 0.184991058765493,
"grad_norm": 2.9853732307969123,
"learning_rate": 1.8767374305140678e-07,
"loss": 0.5578,
"step": 1500
},
{
"epoch": 0.19115742739100944,
"grad_norm": 2.8289118535370226,
"learning_rate": 1.8669560425886458e-07,
"loss": 0.5565,
"step": 1550
},
{
"epoch": 0.19732379601652586,
"grad_norm": 3.107927650244337,
"learning_rate": 1.8568287667528136e-07,
"loss": 0.5482,
"step": 1600
},
{
"epoch": 0.2034901646420423,
"grad_norm": 2.9356477568984474,
"learning_rate": 1.846359643467799e-07,
"loss": 0.5493,
"step": 1650
},
{
"epoch": 0.20965653326755873,
"grad_norm": 2.8886483110859706,
"learning_rate": 1.8355528495811004e-07,
"loss": 0.5441,
"step": 1700
},
{
"epoch": 0.21582290189307518,
"grad_norm": 3.043206189340112,
"learning_rate": 1.8244126966600537e-07,
"loss": 0.5309,
"step": 1750
},
{
"epoch": 0.2219892705185916,
"grad_norm": 2.84058010312111,
"learning_rate": 1.8129436292716576e-07,
"loss": 0.5281,
"step": 1800
},
{
"epoch": 0.22815563914410802,
"grad_norm": 3.2658177471645793,
"learning_rate": 1.8011502232093294e-07,
"loss": 0.5219,
"step": 1850
},
{
"epoch": 0.23432200776962447,
"grad_norm": 2.838918099928717,
"learning_rate": 1.7890371836673115e-07,
"loss": 0.5164,
"step": 1900
},
{
"epoch": 0.2404883763951409,
"grad_norm": 3.3022219294232222,
"learning_rate": 1.7766093433634462e-07,
"loss": 0.524,
"step": 1950
},
{
"epoch": 0.24665474502065735,
"grad_norm": 3.5602190680329637,
"learning_rate": 1.7638716606110768e-07,
"loss": 0.509,
"step": 2000
},
{
"epoch": 0.25282111364617377,
"grad_norm": 3.0096527122431973,
"learning_rate": 1.7508292173408366e-07,
"loss": 0.5193,
"step": 2050
},
{
"epoch": 0.2589874822716902,
"grad_norm": 3.3687126826867044,
"learning_rate": 1.7374872170731205e-07,
"loss": 0.5186,
"step": 2100
},
{
"epoch": 0.2651538508972066,
"grad_norm": 3.1979358817347734,
"learning_rate": 1.7238509828420468e-07,
"loss": 0.5081,
"step": 2150
},
{
"epoch": 0.27132021952272306,
"grad_norm": 2.7782410513777207,
"learning_rate": 1.709925955071734e-07,
"loss": 0.5046,
"step": 2200
},
{
"epoch": 0.2774865881482395,
"grad_norm": 3.0956007198543376,
"learning_rate": 1.6957176894057456e-07,
"loss": 0.5067,
"step": 2250
},
{
"epoch": 0.28365295677375596,
"grad_norm": 2.883657997016742,
"learning_rate": 1.681231854490565e-07,
"loss": 0.5034,
"step": 2300
},
{
"epoch": 0.28981932539927235,
"grad_norm": 3.037713494095377,
"learning_rate": 1.6664742297139842e-07,
"loss": 0.5017,
"step": 2350
},
{
"epoch": 0.2959856940247888,
"grad_norm": 2.7886707641373856,
"learning_rate": 1.6514507028993141e-07,
"loss": 0.5074,
"step": 2400
},
{
"epoch": 0.30215206265030525,
"grad_norm": 3.0522520309780665,
"learning_rate": 1.636167267956328e-07,
"loss": 0.504,
"step": 2450
},
{
"epoch": 0.30831843127582165,
"grad_norm": 2.9917653849017967,
"learning_rate": 1.620630022489884e-07,
"loss": 0.492,
"step": 2500
},
{
"epoch": 0.3144847999013381,
"grad_norm": 3.374780491495851,
"learning_rate": 1.604845165367171e-07,
"loss": 0.5012,
"step": 2550
},
{
"epoch": 0.32065116852685455,
"grad_norm": 3.054737104124034,
"learning_rate": 1.588818994244563e-07,
"loss": 0.4961,
"step": 2600
},
{
"epoch": 0.326817537152371,
"grad_norm": 3.1630826680292037,
"learning_rate": 1.5725579030550487e-07,
"loss": 0.4986,
"step": 2650
},
{
"epoch": 0.3329839057778874,
"grad_norm": 2.787165502227459,
"learning_rate": 1.5560683794572599e-07,
"loss": 0.5005,
"step": 2700
},
{
"epoch": 0.33915027440340384,
"grad_norm": 3.2159871448935853,
"learning_rate": 1.5393570022470996e-07,
"loss": 0.4912,
"step": 2750
},
{
"epoch": 0.3453166430289203,
"grad_norm": 2.921398178739714,
"learning_rate": 1.5224304387330113e-07,
"loss": 0.4873,
"step": 2800
},
{
"epoch": 0.3514830116544367,
"grad_norm": 3.033201824114291,
"learning_rate": 1.505295442075936e-07,
"loss": 0.4848,
"step": 2850
},
{
"epoch": 0.35764938027995313,
"grad_norm": 3.126845883000846,
"learning_rate": 1.4879588485950154e-07,
"loss": 0.4761,
"step": 2900
},
{
"epoch": 0.3638157489054696,
"grad_norm": 2.899612235662964,
"learning_rate": 1.4704275750401168e-07,
"loss": 0.4731,
"step": 2950
},
{
"epoch": 0.369982117530986,
"grad_norm": 2.78803166053557,
"learning_rate": 1.45270861583227e-07,
"loss": 0.4751,
"step": 3000
},
{
"epoch": 0.3761484861565024,
"grad_norm": 3.217869780099078,
"learning_rate": 1.4348090402731177e-07,
"loss": 0.4833,
"step": 3050
},
{
"epoch": 0.3823148547820189,
"grad_norm": 2.98388792612514,
"learning_rate": 1.416735989724485e-07,
"loss": 0.4768,
"step": 3100
},
{
"epoch": 0.3884812234075353,
"grad_norm": 3.095979105793261,
"learning_rate": 1.3984966747592066e-07,
"loss": 0.4781,
"step": 3150
},
{
"epoch": 0.3946475920330517,
"grad_norm": 2.8570658991316944,
"learning_rate": 1.380098372284335e-07,
"loss": 0.47,
"step": 3200
},
{
"epoch": 0.40081396065856817,
"grad_norm": 2.913522526116864,
"learning_rate": 1.3615484226378866e-07,
"loss": 0.4761,
"step": 3250
},
{
"epoch": 0.4069803292840846,
"grad_norm": 3.079167327659028,
"learning_rate": 1.3428542266602808e-07,
"loss": 0.4691,
"step": 3300
},
{
"epoch": 0.413146697909601,
"grad_norm": 3.167335424827754,
"learning_rate": 1.3240232427416377e-07,
"loss": 0.4762,
"step": 3350
},
{
"epoch": 0.41931306653511746,
"grad_norm": 2.951805565284142,
"learning_rate": 1.3050629838461213e-07,
"loss": 0.4743,
"step": 3400
},
{
"epoch": 0.4254794351606339,
"grad_norm": 3.344274691992938,
"learning_rate": 1.285981014514501e-07,
"loss": 0.4651,
"step": 3450
},
{
"epoch": 0.43164580378615036,
"grad_norm": 3.134003729646922,
"learning_rate": 1.2667849478461436e-07,
"loss": 0.474,
"step": 3500
},
{
"epoch": 0.43781217241166676,
"grad_norm": 3.000847232186744,
"learning_rate": 1.2474824424616271e-07,
"loss": 0.4729,
"step": 3550
},
{
"epoch": 0.4439785410371832,
"grad_norm": 2.836288640859743,
"learning_rate": 1.228081199447195e-07,
"loss": 0.4632,
"step": 3600
},
{
"epoch": 0.45014490966269965,
"grad_norm": 3.308502889653925,
"learning_rate": 1.2085889592822667e-07,
"loss": 0.4601,
"step": 3650
},
{
"epoch": 0.45631127828821605,
"grad_norm": 2.746002613176513,
"learning_rate": 1.1890134987512341e-07,
"loss": 0.467,
"step": 3700
},
{
"epoch": 0.4624776469137325,
"grad_norm": 3.24735950823672,
"learning_rate": 1.1693626278407694e-07,
"loss": 0.4617,
"step": 3750
},
{
"epoch": 0.46864401553924895,
"grad_norm": 3.0607507540260075,
"learning_rate": 1.1496441866238905e-07,
"loss": 0.4569,
"step": 3800
},
{
"epoch": 0.47481038416476534,
"grad_norm": 2.9943145563385998,
"learning_rate": 1.1298660421320194e-07,
"loss": 0.4619,
"step": 3850
},
{
"epoch": 0.4809767527902818,
"grad_norm": 3.1612704244607177,
"learning_rate": 1.1100360852162888e-07,
"loss": 0.4637,
"step": 3900
},
{
"epoch": 0.48714312141579824,
"grad_norm": 3.1449471984877055,
"learning_rate": 1.0901622273993417e-07,
"loss": 0.4701,
"step": 3950
},
{
"epoch": 0.4933094900413147,
"grad_norm": 3.0699714321899387,
"learning_rate": 1.070252397718884e-07,
"loss": 0.4558,
"step": 4000
},
{
"epoch": 0.4994758586668311,
"grad_norm": 3.4589428619371834,
"learning_rate": 1.0503145395642541e-07,
"loss": 0.4599,
"step": 4050
},
{
"epoch": 0.5056422272923475,
"grad_norm": 3.0848999815662674,
"learning_rate": 1.0303566075072598e-07,
"loss": 0.4558,
"step": 4100
},
{
"epoch": 0.511808595917864,
"grad_norm": 2.947163992749446,
"learning_rate": 1.0103865641285583e-07,
"loss": 0.457,
"step": 4150
},
{
"epoch": 0.5179749645433804,
"grad_norm": 3.5673363307250927,
"learning_rate": 9.904123768408389e-08,
"loss": 0.4575,
"step": 4200
},
{
"epoch": 0.5241413331688969,
"grad_norm": 3.07648492625604,
"learning_rate": 9.704420147100796e-08,
"loss": 0.4528,
"step": 4250
},
{
"epoch": 0.5303077017944132,
"grad_norm": 3.2080853332983907,
"learning_rate": 9.504834452761424e-08,
"loss": 0.455,
"step": 4300
},
{
"epoch": 0.5364740704199297,
"grad_norm": 2.952611892786328,
"learning_rate": 9.305446313739767e-08,
"loss": 0.4472,
"step": 4350
},
{
"epoch": 0.5426404390454461,
"grad_norm": 3.005908121136174,
"learning_rate": 9.106335279567037e-08,
"loss": 0.4516,
"step": 4400
},
{
"epoch": 0.5488068076709626,
"grad_norm": 2.822428791661921,
"learning_rate": 8.907580789218414e-08,
"loss": 0.4528,
"step": 4450
},
{
"epoch": 0.554973176296479,
"grad_norm": 2.8673595096457465,
"learning_rate": 8.709262139419424e-08,
"loss": 0.4536,
"step": 4500
},
{
"epoch": 0.5611395449219955,
"grad_norm": 3.264575792740317,
"learning_rate": 8.511458453009065e-08,
"loss": 0.4524,
"step": 4550
},
{
"epoch": 0.5673059135475119,
"grad_norm": 3.3129039957771806,
"learning_rate": 8.314248647372302e-08,
"loss": 0.4467,
"step": 4600
},
{
"epoch": 0.5734722821730283,
"grad_norm": 3.083187238173955,
"learning_rate": 8.117711402954554e-08,
"loss": 0.4488,
"step": 4650
},
{
"epoch": 0.5796386507985447,
"grad_norm": 3.1488830656848,
"learning_rate": 7.921925131870672e-08,
"loss": 0.4579,
"step": 4700
},
{
"epoch": 0.5858050194240612,
"grad_norm": 2.9768313706421874,
"learning_rate": 7.726967946621029e-08,
"loss": 0.4481,
"step": 4750
},
{
"epoch": 0.5919713880495776,
"grad_norm": 3.0236276200137486,
"learning_rate": 7.532917628927079e-08,
"loss": 0.4529,
"step": 4800
},
{
"epoch": 0.5981377566750941,
"grad_norm": 3.0681593760022285,
"learning_rate": 7.339851598698955e-08,
"loss": 0.4527,
"step": 4850
},
{
"epoch": 0.6043041253006105,
"grad_norm": 3.2203600426157495,
"learning_rate": 7.147846883147362e-08,
"loss": 0.4473,
"step": 4900
},
{
"epoch": 0.610470493926127,
"grad_norm": 3.127241727234972,
"learning_rate": 6.956980086052184e-08,
"loss": 0.4536,
"step": 4950
},
{
"epoch": 0.6166368625516433,
"grad_norm": 3.215958029153526,
"learning_rate": 6.76732735719999e-08,
"loss": 0.4505,
"step": 5000
},
{
"epoch": 0.6228032311771597,
"grad_norm": 2.8220120121880936,
"learning_rate": 6.578964362002715e-08,
"loss": 0.4514,
"step": 5050
},
{
"epoch": 0.6289695998026762,
"grad_norm": 3.0284791997521054,
"learning_rate": 6.391966251309539e-08,
"loss": 0.4458,
"step": 5100
},
{
"epoch": 0.6351359684281926,
"grad_norm": 3.371033810021987,
"learning_rate": 6.206407631424109e-08,
"loss": 0.4446,
"step": 5150
},
{
"epoch": 0.6413023370537091,
"grad_norm": 3.122281628753462,
"learning_rate": 6.02236253433898e-08,
"loss": 0.4473,
"step": 5200
},
{
"epoch": 0.6474687056792255,
"grad_norm": 2.7353573500503074,
"learning_rate": 5.8399043881992104e-08,
"loss": 0.4399,
"step": 5250
},
{
"epoch": 0.653635074304742,
"grad_norm": 3.194859384027796,
"learning_rate": 5.659105988006851e-08,
"loss": 0.4499,
"step": 5300
},
{
"epoch": 0.6598014429302583,
"grad_norm": 2.8707279633921194,
"learning_rate": 5.480039466578079e-08,
"loss": 0.453,
"step": 5350
},
{
"epoch": 0.6659678115557748,
"grad_norm": 3.313196070466103,
"learning_rate": 5.3027762657644745e-08,
"loss": 0.4433,
"step": 5400
},
{
"epoch": 0.6721341801812912,
"grad_norm": 2.9398335243680056,
"learning_rate": 5.1273871079499986e-08,
"loss": 0.447,
"step": 5450
},
{
"epoch": 0.6783005488068077,
"grad_norm": 2.9069645999783726,
"learning_rate": 4.9539419678350103e-08,
"loss": 0.4424,
"step": 5500
},
{
"epoch": 0.6844669174323241,
"grad_norm": 3.0807794080344744,
"learning_rate": 4.7825100445185904e-08,
"loss": 0.4502,
"step": 5550
},
{
"epoch": 0.6906332860578406,
"grad_norm": 3.1238439553913913,
"learning_rate": 4.613159733890279e-08,
"loss": 0.4371,
"step": 5600
},
{
"epoch": 0.6967996546833569,
"grad_norm": 2.894912373492253,
"learning_rate": 4.445958601342321e-08,
"loss": 0.4466,
"step": 5650
},
{
"epoch": 0.7029660233088734,
"grad_norm": 3.1861168079620352,
"learning_rate": 4.280973354813196e-08,
"loss": 0.4452,
"step": 5700
},
{
"epoch": 0.7091323919343898,
"grad_norm": 3.218055671881565,
"learning_rate": 4.118269818173283e-08,
"loss": 0.4335,
"step": 5750
},
{
"epoch": 0.7152987605599063,
"grad_norm": 3.7156552734894177,
"learning_rate": 3.957912904963225e-08,
"loss": 0.4482,
"step": 5800
},
{
"epoch": 0.7214651291854227,
"grad_norm": 3.248126042161764,
"learning_rate": 3.7999665924954815e-08,
"loss": 0.4407,
"step": 5850
},
{
"epoch": 0.7276314978109392,
"grad_norm": 3.37840785837335,
"learning_rate": 3.64449389632943e-08,
"loss": 0.4421,
"step": 5900
},
{
"epoch": 0.7337978664364556,
"grad_norm": 2.919668888292714,
"learning_rate": 3.491556845130147e-08,
"loss": 0.4358,
"step": 5950
},
{
"epoch": 0.739964235061972,
"grad_norm": 3.449594279809231,
"learning_rate": 3.3412164559209485e-08,
"loss": 0.4393,
"step": 6000
},
{
"epoch": 0.7461306036874884,
"grad_norm": 3.0449172482636713,
"learning_rate": 3.193532709739534e-08,
"loss": 0.443,
"step": 6050
},
{
"epoch": 0.7522969723130049,
"grad_norm": 2.9659035390086603,
"learning_rate": 3.048564527707457e-08,
"loss": 0.4541,
"step": 6100
},
{
"epoch": 0.7584633409385213,
"grad_norm": 3.0426691033458266,
"learning_rate": 2.9063697475224736e-08,
"loss": 0.4411,
"step": 6150
},
{
"epoch": 0.7646297095640378,
"grad_norm": 3.1254929066925543,
"learning_rate": 2.767005100383143e-08,
"loss": 0.4466,
"step": 6200
},
{
"epoch": 0.7707960781895542,
"grad_norm": 3.059948610503461,
"learning_rate": 2.6305261883548624e-08,
"loss": 0.4501,
"step": 6250
},
{
"epoch": 0.7769624468150707,
"grad_norm": 3.178741971582532,
"learning_rate": 2.4969874621864373e-08,
"loss": 0.4405,
"step": 6300
},
{
"epoch": 0.783128815440587,
"grad_norm": 3.178300180527373,
"learning_rate": 2.3664421995859463e-08,
"loss": 0.4499,
"step": 6350
},
{
"epoch": 0.7892951840661034,
"grad_norm": 3.003275204473159,
"learning_rate": 2.2389424839646286e-08,
"loss": 0.4399,
"step": 6400
},
{
"epoch": 0.7954615526916199,
"grad_norm": 3.420014772222019,
"learning_rate": 2.114539183657268e-08,
"loss": 0.4352,
"step": 6450
},
{
"epoch": 0.8016279213171363,
"grad_norm": 2.971627106875043,
"learning_rate": 1.9932819316273307e-08,
"loss": 0.4382,
"step": 6500
},
{
"epoch": 0.8077942899426528,
"grad_norm": 3.4422374871537533,
"learning_rate": 1.8752191056650023e-08,
"loss": 0.4377,
"step": 6550
},
{
"epoch": 0.8139606585681692,
"grad_norm": 3.053124764133182,
"learning_rate": 1.7603978090859794e-08,
"loss": 0.4442,
"step": 6600
},
{
"epoch": 0.8201270271936857,
"grad_norm": 3.1086937331605613,
"learning_rate": 1.6488638519387478e-08,
"loss": 0.4466,
"step": 6650
},
{
"epoch": 0.826293395819202,
"grad_norm": 3.4399676514136193,
"learning_rate": 1.5406617327278205e-08,
"loss": 0.4326,
"step": 6700
},
{
"epoch": 0.8324597644447185,
"grad_norm": 2.8487398222000744,
"learning_rate": 1.4358346206602612e-08,
"loss": 0.4422,
"step": 6750
},
{
"epoch": 0.8386261330702349,
"grad_norm": 2.9651774336393726,
"learning_rate": 1.334424338422534e-08,
"loss": 0.4305,
"step": 6800
},
{
"epoch": 0.8447925016957514,
"grad_norm": 3.4279808291982556,
"learning_rate": 1.236471345494583e-08,
"loss": 0.4386,
"step": 6850
},
{
"epoch": 0.8509588703212678,
"grad_norm": 3.298847289113035,
"learning_rate": 1.1420147220077847e-08,
"loss": 0.4425,
"step": 6900
},
{
"epoch": 0.8571252389467843,
"grad_norm": 3.199726112913922,
"learning_rate": 1.0510921531532192e-08,
"loss": 0.4339,
"step": 6950
},
{
"epoch": 0.8632916075723007,
"grad_norm": 3.3865902484127637,
"learning_rate": 9.63739914146473e-09,
"loss": 0.426,
"step": 7000
},
{
"epoch": 0.8694579761978171,
"grad_norm": 3.080132950484914,
"learning_rate": 8.799928557549863e-09,
"loss": 0.4437,
"step": 7050
},
{
"epoch": 0.8756243448233335,
"grad_norm": 3.2441647152844526,
"learning_rate": 7.998843903936992e-09,
"loss": 0.4338,
"step": 7100
},
{
"epoch": 0.88179071344885,
"grad_norm": 2.861131038634973,
"learning_rate": 7.2344647879456265e-09,
"loss": 0.4363,
"step": 7150
},
{
"epoch": 0.8879570820743664,
"grad_norm": 3.131842173102097,
"learning_rate": 6.507096172552195e-09,
"loss": 0.4333,
"step": 7200
},
{
"epoch": 0.8941234506998829,
"grad_norm": 3.1735067730802604,
"learning_rate": 5.817028254719536e-09,
"loss": 0.4395,
"step": 7250
},
{
"epoch": 0.9002898193253993,
"grad_norm": 2.941305225791783,
"learning_rate": 5.164536349617532e-09,
"loss": 0.4418,
"step": 7300
},
{
"epoch": 0.9064561879509158,
"grad_norm": 3.1369522496788806,
"learning_rate": 4.5498807807811015e-09,
"loss": 0.4413,
"step": 7350
},
{
"epoch": 0.9126225565764321,
"grad_norm": 3.10250834762718,
"learning_rate": 3.973306776249341e-09,
"loss": 0.4316,
"step": 7400
},
{
"epoch": 0.9187889252019485,
"grad_norm": 3.113181559222609,
"learning_rate": 3.4350443707274135e-09,
"loss": 0.4391,
"step": 7450
},
{
"epoch": 0.924955293827465,
"grad_norm": 3.2125045204581424,
"learning_rate": 2.9353083138099256e-09,
"loss": 0.4453,
"step": 7500
},
{
"epoch": 0.9311216624529814,
"grad_norm": 3.3021789642945008,
"learning_rate": 2.474297984302709e-09,
"loss": 0.4404,
"step": 7550
},
{
"epoch": 0.9372880310784979,
"grad_norm": 3.3940858957593223,
"learning_rate": 2.0521973106770285e-09,
"loss": 0.4387,
"step": 7600
},
{
"epoch": 0.9434543997040143,
"grad_norm": 2.8912916713122763,
"learning_rate": 1.6691746976879028e-09,
"loss": 0.4396,
"step": 7650
},
{
"epoch": 0.9496207683295307,
"grad_norm": 3.437089198963669,
"learning_rate": 1.3253829591860387e-09,
"loss": 0.4375,
"step": 7700
},
{
"epoch": 0.9557871369550471,
"grad_norm": 3.138591510137561,
"learning_rate": 1.0209592571498892e-09,
"loss": 0.432,
"step": 7750
},
{
"epoch": 0.9619535055805636,
"grad_norm": 3.0903316485258783,
"learning_rate": 7.560250469624385e-10,
"loss": 0.4381,
"step": 7800
},
{
"epoch": 0.96811987420608,
"grad_norm": 3.1363149734033233,
"learning_rate": 5.306860289543413e-10,
"loss": 0.4432,
"step": 7850
},
{
"epoch": 0.9742862428315965,
"grad_norm": 3.143737684684351,
"learning_rate": 3.450321062328232e-10,
"loss": 0.4334,
"step": 7900
},
{
"epoch": 0.9804526114571129,
"grad_norm": 2.8627388485987444,
"learning_rate": 1.9913734881326083e-10,
"loss": 0.4372,
"step": 7950
},
{
"epoch": 0.9866189800826294,
"grad_norm": 3.044456688116337,
"learning_rate": 9.305996406754335e-11,
"loss": 0.4376,
"step": 8000
},
{
"epoch": 0.9927853487081457,
"grad_norm": 2.9030301879677096,
"learning_rate": 2.6842273501193058e-11,
"loss": 0.4348,
"step": 8050
},
{
"epoch": 0.9989517173336622,
"grad_norm": 3.2924748739223664,
"learning_rate": 5.10695868449762e-13,
"loss": 0.4422,
"step": 8100
},
{
"epoch": 0.9999383363137448,
"step": 8108,
"total_flos": 533986133770240.0,
"train_loss": 0.5225248140364254,
"train_runtime": 45752.7497,
"train_samples_per_second": 5.671,
"train_steps_per_second": 0.177
}
],
"logging_steps": 50,
"max_steps": 8108,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 533986133770240.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}