codet5-k8s-full-final / checkpoint-9213 /trainer_state.json
gpol13's picture
Upload folder using huggingface_hub
507084e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 9213,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0162813415825464,
"grad_norm": 3.7931034564971924,
"learning_rate": 4.975035276240096e-05,
"loss": 1.7502801513671875,
"step": 50
},
{
"epoch": 0.0325626831650928,
"grad_norm": 3.5913758277893066,
"learning_rate": 4.947899706935852e-05,
"loss": 0.8483324432373047,
"step": 100
},
{
"epoch": 0.04884402474763921,
"grad_norm": 3.3178822994232178,
"learning_rate": 4.9207641376316076e-05,
"loss": 0.5748957061767578,
"step": 150
},
{
"epoch": 0.0651253663301856,
"grad_norm": 2.392831802368164,
"learning_rate": 4.8936285683273635e-05,
"loss": 0.4335686492919922,
"step": 200
},
{
"epoch": 0.08140670791273201,
"grad_norm": 2.411132335662842,
"learning_rate": 4.8664929990231194e-05,
"loss": 0.45588829040527346,
"step": 250
},
{
"epoch": 0.09768804949527841,
"grad_norm": 3.383033275604248,
"learning_rate": 4.839357429718876e-05,
"loss": 0.38454761505126955,
"step": 300
},
{
"epoch": 0.11396939107782482,
"grad_norm": 1.5641525983810425,
"learning_rate": 4.812221860414632e-05,
"loss": 0.31118762969970704,
"step": 350
},
{
"epoch": 0.1302507326603712,
"grad_norm": 1.962287425994873,
"learning_rate": 4.785086291110388e-05,
"loss": 0.32715892791748047,
"step": 400
},
{
"epoch": 0.14653207424291761,
"grad_norm": 2.539684534072876,
"learning_rate": 4.7579507218061436e-05,
"loss": 0.3269093704223633,
"step": 450
},
{
"epoch": 0.16281341582546402,
"grad_norm": 3.244333267211914,
"learning_rate": 4.7308151525018995e-05,
"loss": 0.2726271057128906,
"step": 500
},
{
"epoch": 0.17909475740801042,
"grad_norm": 2.2011330127716064,
"learning_rate": 4.7036795831976553e-05,
"loss": 0.3161302185058594,
"step": 550
},
{
"epoch": 0.19537609899055683,
"grad_norm": 5.027646541595459,
"learning_rate": 4.676544013893412e-05,
"loss": 0.22250593185424805,
"step": 600
},
{
"epoch": 0.21165744057310323,
"grad_norm": 0.6281399726867676,
"learning_rate": 4.649408444589168e-05,
"loss": 0.26152374267578127,
"step": 650
},
{
"epoch": 0.22793878215564964,
"grad_norm": 3.362748622894287,
"learning_rate": 4.622272875284924e-05,
"loss": 0.2384391975402832,
"step": 700
},
{
"epoch": 0.24422012373819602,
"grad_norm": 0.9307177066802979,
"learning_rate": 4.5951373059806795e-05,
"loss": 0.19612070083618163,
"step": 750
},
{
"epoch": 0.2605014653207424,
"grad_norm": 3.107837438583374,
"learning_rate": 4.5680017366764354e-05,
"loss": 0.22400428771972655,
"step": 800
},
{
"epoch": 0.2767828069032888,
"grad_norm": 2.3350419998168945,
"learning_rate": 4.540866167372192e-05,
"loss": 0.246726131439209,
"step": 850
},
{
"epoch": 0.29306414848583523,
"grad_norm": 1.3348891735076904,
"learning_rate": 4.513730598067948e-05,
"loss": 0.17960617065429688,
"step": 900
},
{
"epoch": 0.30934549006838163,
"grad_norm": 1.4406858682632446,
"learning_rate": 4.486595028763704e-05,
"loss": 0.19865007400512696,
"step": 950
},
{
"epoch": 0.32562683165092804,
"grad_norm": 2.17195200920105,
"learning_rate": 4.4594594594594596e-05,
"loss": 0.19141647338867188,
"step": 1000
},
{
"epoch": 0.34190817323347444,
"grad_norm": 2.63667893409729,
"learning_rate": 4.4323238901552155e-05,
"loss": 0.1807699966430664,
"step": 1050
},
{
"epoch": 0.35818951481602085,
"grad_norm": 1.1883361339569092,
"learning_rate": 4.4051883208509714e-05,
"loss": 0.19377944946289063,
"step": 1100
},
{
"epoch": 0.37447085639856725,
"grad_norm": 0.6610957384109497,
"learning_rate": 4.378052751546728e-05,
"loss": 0.18130062103271485,
"step": 1150
},
{
"epoch": 0.39075219798111366,
"grad_norm": 1.802565336227417,
"learning_rate": 4.350917182242484e-05,
"loss": 0.17183830261230468,
"step": 1200
},
{
"epoch": 0.40703353956366006,
"grad_norm": 1.6211966276168823,
"learning_rate": 4.32378161293824e-05,
"loss": 0.1512114906311035,
"step": 1250
},
{
"epoch": 0.42331488114620647,
"grad_norm": 0.8947325944900513,
"learning_rate": 4.2966460436339956e-05,
"loss": 0.1733652877807617,
"step": 1300
},
{
"epoch": 0.4395962227287529,
"grad_norm": 0.31800374388694763,
"learning_rate": 4.2695104743297515e-05,
"loss": 0.1839361572265625,
"step": 1350
},
{
"epoch": 0.4558775643112993,
"grad_norm": 0.9736223220825195,
"learning_rate": 4.2423749050255074e-05,
"loss": 0.12980345726013184,
"step": 1400
},
{
"epoch": 0.4721589058938456,
"grad_norm": 1.7321406602859497,
"learning_rate": 4.215239335721264e-05,
"loss": 0.14502116203308105,
"step": 1450
},
{
"epoch": 0.48844024747639203,
"grad_norm": 0.47778311371803284,
"learning_rate": 4.18810376641702e-05,
"loss": 0.15735553741455077,
"step": 1500
},
{
"epoch": 0.5047215890589385,
"grad_norm": 0.43138086795806885,
"learning_rate": 4.160968197112776e-05,
"loss": 0.13693093299865722,
"step": 1550
},
{
"epoch": 0.5210029306414848,
"grad_norm": 0.7121404409408569,
"learning_rate": 4.1338326278085316e-05,
"loss": 0.13991880416870117,
"step": 1600
},
{
"epoch": 0.5372842722240313,
"grad_norm": 1.8167650699615479,
"learning_rate": 4.1066970585042875e-05,
"loss": 0.13732372283935546,
"step": 1650
},
{
"epoch": 0.5535656138065776,
"grad_norm": 1.801047921180725,
"learning_rate": 4.079561489200044e-05,
"loss": 0.1461949062347412,
"step": 1700
},
{
"epoch": 0.5698469553891241,
"grad_norm": 1.2010151147842407,
"learning_rate": 4.0524259198958e-05,
"loss": 0.16667499542236328,
"step": 1750
},
{
"epoch": 0.5861282969716705,
"grad_norm": 0.31175410747528076,
"learning_rate": 4.025290350591556e-05,
"loss": 0.13411394119262696,
"step": 1800
},
{
"epoch": 0.6024096385542169,
"grad_norm": 1.0062410831451416,
"learning_rate": 3.998154781287312e-05,
"loss": 0.141832914352417,
"step": 1850
},
{
"epoch": 0.6186909801367633,
"grad_norm": 0.5772050619125366,
"learning_rate": 3.9710192119830675e-05,
"loss": 0.14646322250366212,
"step": 1900
},
{
"epoch": 0.6349723217193096,
"grad_norm": 1.1436623334884644,
"learning_rate": 3.9444263540649085e-05,
"loss": 0.11982306480407715,
"step": 1950
},
{
"epoch": 0.6512536633018561,
"grad_norm": 0.6914354562759399,
"learning_rate": 3.9172907847606644e-05,
"loss": 0.13149891853332518,
"step": 2000
},
{
"epoch": 0.6675350048844024,
"grad_norm": 0.5716465711593628,
"learning_rate": 3.89015521545642e-05,
"loss": 0.1392893123626709,
"step": 2050
},
{
"epoch": 0.6838163464669489,
"grad_norm": 1.4796607494354248,
"learning_rate": 3.863019646152177e-05,
"loss": 0.1265252685546875,
"step": 2100
},
{
"epoch": 0.7000976880494952,
"grad_norm": 0.849554717540741,
"learning_rate": 3.835884076847933e-05,
"loss": 0.1300504207611084,
"step": 2150
},
{
"epoch": 0.7163790296320417,
"grad_norm": 0.9229751825332642,
"learning_rate": 3.8087485075436886e-05,
"loss": 0.144088077545166,
"step": 2200
},
{
"epoch": 0.732660371214588,
"grad_norm": 0.6000483632087708,
"learning_rate": 3.7816129382394445e-05,
"loss": 0.1267460823059082,
"step": 2250
},
{
"epoch": 0.7489417127971345,
"grad_norm": 1.584933876991272,
"learning_rate": 3.7544773689352004e-05,
"loss": 0.12461037635803222,
"step": 2300
},
{
"epoch": 0.7652230543796809,
"grad_norm": 0.7694635987281799,
"learning_rate": 3.727341799630956e-05,
"loss": 0.1397037124633789,
"step": 2350
},
{
"epoch": 0.7815043959622273,
"grad_norm": 0.9538297653198242,
"learning_rate": 3.700206230326713e-05,
"loss": 0.09744812965393067,
"step": 2400
},
{
"epoch": 0.7977857375447737,
"grad_norm": 1.10379159450531,
"learning_rate": 3.673070661022469e-05,
"loss": 0.14414773941040038,
"step": 2450
},
{
"epoch": 0.8140670791273201,
"grad_norm": 1.6340835094451904,
"learning_rate": 3.6459350917182246e-05,
"loss": 0.11836291313171386,
"step": 2500
},
{
"epoch": 0.8303484207098665,
"grad_norm": 0.06909910589456558,
"learning_rate": 3.6187995224139805e-05,
"loss": 0.13456206321716307,
"step": 2550
},
{
"epoch": 0.8466297622924129,
"grad_norm": 0.11458413302898407,
"learning_rate": 3.5916639531097364e-05,
"loss": 0.12975069046020507,
"step": 2600
},
{
"epoch": 0.8629111038749593,
"grad_norm": 0.3121241331100464,
"learning_rate": 3.564528383805492e-05,
"loss": 0.10294739723205566,
"step": 2650
},
{
"epoch": 0.8791924454575057,
"grad_norm": 0.7833127975463867,
"learning_rate": 3.537392814501249e-05,
"loss": 0.1058332633972168,
"step": 2700
},
{
"epoch": 0.8954737870400521,
"grad_norm": 1.0220922231674194,
"learning_rate": 3.510257245197005e-05,
"loss": 0.11729028701782226,
"step": 2750
},
{
"epoch": 0.9117551286225986,
"grad_norm": 0.6296119093894958,
"learning_rate": 3.4831216758927606e-05,
"loss": 0.12148540496826171,
"step": 2800
},
{
"epoch": 0.9280364702051449,
"grad_norm": 0.8129004240036011,
"learning_rate": 3.4559861065885164e-05,
"loss": 0.09763257980346679,
"step": 2850
},
{
"epoch": 0.9443178117876913,
"grad_norm": 0.6814725399017334,
"learning_rate": 3.428850537284272e-05,
"loss": 0.10192323684692382,
"step": 2900
},
{
"epoch": 0.9605991533702377,
"grad_norm": 0.19898249208927155,
"learning_rate": 3.401714967980029e-05,
"loss": 0.11552732467651367,
"step": 2950
},
{
"epoch": 0.9768804949527841,
"grad_norm": 0.7032152414321899,
"learning_rate": 3.374579398675785e-05,
"loss": 0.09218964576721192,
"step": 3000
},
{
"epoch": 0.9931618365353305,
"grad_norm": 0.5327423214912415,
"learning_rate": 3.3474438293715407e-05,
"loss": 0.11835557937622071,
"step": 3050
},
{
"epoch": 1.0,
"eval_bertscore_f1": 0.9908905607812545,
"eval_bleu": 0.8857676606120443,
"eval_loss": 0.08205162733793259,
"eval_meteor": 0.9292767478739071,
"eval_rouge1": 0.9454800565736884,
"eval_rouge2": 0.9108168851120266,
"eval_runtime": 61.8203,
"eval_samples_per_second": 20.899,
"eval_steps_per_second": 2.621,
"step": 3071
},
{
"epoch": 1.009443178117877,
"grad_norm": 0.7608644366264343,
"learning_rate": 3.3203082600672965e-05,
"loss": 0.08838626861572266,
"step": 3100
},
{
"epoch": 1.0257245197004232,
"grad_norm": 0.6126351952552795,
"learning_rate": 3.2931726907630524e-05,
"loss": 0.07393273830413818,
"step": 3150
},
{
"epoch": 1.0420058612829697,
"grad_norm": 0.9907364845275879,
"learning_rate": 3.266037121458808e-05,
"loss": 0.1005620002746582,
"step": 3200
},
{
"epoch": 1.0582872028655161,
"grad_norm": 1.0079267024993896,
"learning_rate": 3.238901552154565e-05,
"loss": 0.0909033203125,
"step": 3250
},
{
"epoch": 1.0745685444480626,
"grad_norm": 1.661521315574646,
"learning_rate": 3.211765982850321e-05,
"loss": 0.07444488525390625,
"step": 3300
},
{
"epoch": 1.0908498860306088,
"grad_norm": 0.5184240341186523,
"learning_rate": 3.1846304135460766e-05,
"loss": 0.08309778213500976,
"step": 3350
},
{
"epoch": 1.1071312276131553,
"grad_norm": 1.1483348608016968,
"learning_rate": 3.1574948442418325e-05,
"loss": 0.07855434417724609,
"step": 3400
},
{
"epoch": 1.1234125691957018,
"grad_norm": 1.0581797361373901,
"learning_rate": 3.1303592749375884e-05,
"loss": 0.0779510498046875,
"step": 3450
},
{
"epoch": 1.1396939107782482,
"grad_norm": 0.3960680663585663,
"learning_rate": 3.103223705633344e-05,
"loss": 0.07558696269989014,
"step": 3500
},
{
"epoch": 1.1559752523607945,
"grad_norm": 0.7705583572387695,
"learning_rate": 3.076088136329101e-05,
"loss": 0.07015891551971436,
"step": 3550
},
{
"epoch": 1.172256593943341,
"grad_norm": 0.9814662933349609,
"learning_rate": 3.0489525670248564e-05,
"loss": 0.09184465408325196,
"step": 3600
},
{
"epoch": 1.1885379355258874,
"grad_norm": 0.16037984192371368,
"learning_rate": 3.0218169977206123e-05,
"loss": 0.10087477684020996,
"step": 3650
},
{
"epoch": 1.2048192771084336,
"grad_norm": 0.4658585488796234,
"learning_rate": 2.994681428416368e-05,
"loss": 0.0878927993774414,
"step": 3700
},
{
"epoch": 1.22110061869098,
"grad_norm": 0.6178460717201233,
"learning_rate": 2.967545859112124e-05,
"loss": 0.08248810768127442,
"step": 3750
},
{
"epoch": 1.2373819602735265,
"grad_norm": 0.8095784783363342,
"learning_rate": 2.9404102898078806e-05,
"loss": 0.07741629600524902,
"step": 3800
},
{
"epoch": 1.253663301856073,
"grad_norm": 0.7121015191078186,
"learning_rate": 2.9132747205036365e-05,
"loss": 0.06926633358001709,
"step": 3850
},
{
"epoch": 1.2699446434386195,
"grad_norm": 0.9626070857048035,
"learning_rate": 2.8861391511993923e-05,
"loss": 0.08737580299377441,
"step": 3900
},
{
"epoch": 1.2862259850211657,
"grad_norm": 1.617689847946167,
"learning_rate": 2.8590035818951482e-05,
"loss": 0.08954649925231933,
"step": 3950
},
{
"epoch": 1.3025073266037122,
"grad_norm": 0.17025412619113922,
"learning_rate": 2.831868012590904e-05,
"loss": 0.07303418159484863,
"step": 4000
},
{
"epoch": 1.3187886681862586,
"grad_norm": 0.8474647402763367,
"learning_rate": 2.80473244328666e-05,
"loss": 0.10014421463012696,
"step": 4050
},
{
"epoch": 1.3350700097688049,
"grad_norm": 1.1335641145706177,
"learning_rate": 2.7775968739824165e-05,
"loss": 0.09378931999206543,
"step": 4100
},
{
"epoch": 1.3513513513513513,
"grad_norm": 0.05914885550737381,
"learning_rate": 2.7504613046781724e-05,
"loss": 0.0685378360748291,
"step": 4150
},
{
"epoch": 1.3676326929338978,
"grad_norm": 0.49404996633529663,
"learning_rate": 2.7233257353739283e-05,
"loss": 0.0691972017288208,
"step": 4200
},
{
"epoch": 1.3839140345164442,
"grad_norm": 0.49692803621292114,
"learning_rate": 2.6961901660696842e-05,
"loss": 0.07013116836547852,
"step": 4250
},
{
"epoch": 1.4001953760989905,
"grad_norm": 1.2489663362503052,
"learning_rate": 2.66905459676544e-05,
"loss": 0.06815986156463623,
"step": 4300
},
{
"epoch": 1.416476717681537,
"grad_norm": 1.234505534172058,
"learning_rate": 2.641919027461196e-05,
"loss": 0.07438003540039062,
"step": 4350
},
{
"epoch": 1.4327580592640834,
"grad_norm": 0.5595135688781738,
"learning_rate": 2.6147834581569525e-05,
"loss": 0.08010281562805176,
"step": 4400
},
{
"epoch": 1.4490394008466296,
"grad_norm": 0.713994026184082,
"learning_rate": 2.5876478888527084e-05,
"loss": 0.08089996337890625,
"step": 4450
},
{
"epoch": 1.465320742429176,
"grad_norm": 0.41522467136383057,
"learning_rate": 2.5605123195484643e-05,
"loss": 0.07183042049407959,
"step": 4500
},
{
"epoch": 1.4816020840117226,
"grad_norm": 0.4079296290874481,
"learning_rate": 2.53337675024422e-05,
"loss": 0.07589399337768554,
"step": 4550
},
{
"epoch": 1.497883425594269,
"grad_norm": 0.3075660169124603,
"learning_rate": 2.506241180939976e-05,
"loss": 0.07919666767120362,
"step": 4600
},
{
"epoch": 1.5141647671768155,
"grad_norm": 1.5832964181900024,
"learning_rate": 2.4791056116357323e-05,
"loss": 0.06133227825164795,
"step": 4650
},
{
"epoch": 1.530446108759362,
"grad_norm": 0.32941189408302307,
"learning_rate": 2.451970042331488e-05,
"loss": 0.07278666496276856,
"step": 4700
},
{
"epoch": 1.5467274503419082,
"grad_norm": 0.5237034559249878,
"learning_rate": 2.4248344730272444e-05,
"loss": 0.07373996734619141,
"step": 4750
},
{
"epoch": 1.5630087919244544,
"grad_norm": 0.056225214153528214,
"learning_rate": 2.3976989037230003e-05,
"loss": 0.08032115936279297,
"step": 4800
},
{
"epoch": 1.5792901335070009,
"grad_norm": 0.6325415372848511,
"learning_rate": 2.370563334418756e-05,
"loss": 0.08559741973876953,
"step": 4850
},
{
"epoch": 1.5955714750895473,
"grad_norm": 1.230356216430664,
"learning_rate": 2.3434277651145124e-05,
"loss": 0.07180691242218018,
"step": 4900
},
{
"epoch": 1.6118528166720938,
"grad_norm": 3.57700252532959,
"learning_rate": 2.3162921958102682e-05,
"loss": 0.06951488494873047,
"step": 4950
},
{
"epoch": 1.6281341582546403,
"grad_norm": 1.004461646080017,
"learning_rate": 2.289156626506024e-05,
"loss": 0.057218775749206544,
"step": 5000
},
{
"epoch": 1.6444154998371867,
"grad_norm": 0.44509896636009216,
"learning_rate": 2.2620210572017803e-05,
"loss": 0.08383867263793945,
"step": 5050
},
{
"epoch": 1.660696841419733,
"grad_norm": 0.6665693521499634,
"learning_rate": 2.2348854878975362e-05,
"loss": 0.0708467960357666,
"step": 5100
},
{
"epoch": 1.6769781830022794,
"grad_norm": 0.520028293132782,
"learning_rate": 2.207749918593292e-05,
"loss": 0.07018136024475098,
"step": 5150
},
{
"epoch": 1.6932595245848256,
"grad_norm": 0.7581444382667542,
"learning_rate": 2.1806143492890483e-05,
"loss": 0.06788209915161132,
"step": 5200
},
{
"epoch": 1.709540866167372,
"grad_norm": 0.34040266275405884,
"learning_rate": 2.1534787799848042e-05,
"loss": 0.08334577560424805,
"step": 5250
},
{
"epoch": 1.7258222077499186,
"grad_norm": 0.5161302089691162,
"learning_rate": 2.1263432106805604e-05,
"loss": 0.06911201477050781,
"step": 5300
},
{
"epoch": 1.742103549332465,
"grad_norm": 0.8025581240653992,
"learning_rate": 2.0992076413763163e-05,
"loss": 0.06495306968688964,
"step": 5350
},
{
"epoch": 1.7583848909150115,
"grad_norm": 1.0504302978515625,
"learning_rate": 2.0720720720720722e-05,
"loss": 0.06523369789123536,
"step": 5400
},
{
"epoch": 1.774666232497558,
"grad_norm": 1.5722064971923828,
"learning_rate": 2.0449365027678284e-05,
"loss": 0.06998776435852051,
"step": 5450
},
{
"epoch": 1.7909475740801042,
"grad_norm": 1.4498728513717651,
"learning_rate": 2.0178009334635843e-05,
"loss": 0.07263383388519287,
"step": 5500
},
{
"epoch": 1.8072289156626506,
"grad_norm": 0.1697084903717041,
"learning_rate": 1.9906653641593402e-05,
"loss": 0.06083515644073487,
"step": 5550
},
{
"epoch": 1.8235102572451969,
"grad_norm": 0.043431248515844345,
"learning_rate": 1.9635297948550964e-05,
"loss": 0.0591968297958374,
"step": 5600
},
{
"epoch": 1.8397915988277433,
"grad_norm": 0.9290309548377991,
"learning_rate": 1.9363942255508523e-05,
"loss": 0.060645227432250974,
"step": 5650
},
{
"epoch": 1.8560729404102898,
"grad_norm": 1.0422381162643433,
"learning_rate": 1.9092586562466082e-05,
"loss": 0.07442611217498779,
"step": 5700
},
{
"epoch": 1.8723542819928363,
"grad_norm": 0.3466901183128357,
"learning_rate": 1.8821230869423644e-05,
"loss": 0.07767025470733642,
"step": 5750
},
{
"epoch": 1.8886356235753827,
"grad_norm": 0.39657458662986755,
"learning_rate": 1.8549875176381203e-05,
"loss": 0.06347317218780518,
"step": 5800
},
{
"epoch": 1.904916965157929,
"grad_norm": 1.00450599193573,
"learning_rate": 1.827851948333876e-05,
"loss": 0.06967205524444581,
"step": 5850
},
{
"epoch": 1.9211983067404754,
"grad_norm": 1.1727004051208496,
"learning_rate": 1.800716379029632e-05,
"loss": 0.06747759819030762,
"step": 5900
},
{
"epoch": 1.9374796483230217,
"grad_norm": 0.8829087615013123,
"learning_rate": 1.7735808097253883e-05,
"loss": 0.07414731979370118,
"step": 5950
},
{
"epoch": 1.9537609899055681,
"grad_norm": 1.3967463970184326,
"learning_rate": 1.746445240421144e-05,
"loss": 0.06446901321411133,
"step": 6000
},
{
"epoch": 1.9700423314881146,
"grad_norm": 0.9375430345535278,
"learning_rate": 1.7193096711169e-05,
"loss": 0.06800864696502686,
"step": 6050
},
{
"epoch": 1.986323673070661,
"grad_norm": 0.9516276717185974,
"learning_rate": 1.692174101812656e-05,
"loss": 0.08866607666015625,
"step": 6100
},
{
"epoch": 2.0,
"eval_bertscore_f1": 0.9923369143584934,
"eval_bleu": 0.8956281706064034,
"eval_loss": 0.06535279005765915,
"eval_meteor": 0.938086576675145,
"eval_rouge1": 0.950788798151768,
"eval_rouge2": 0.9176212368118313,
"eval_runtime": 58.3038,
"eval_samples_per_second": 22.16,
"eval_steps_per_second": 2.779,
"step": 6142
},
{
"epoch": 2.0026050146532075,
"grad_norm": 0.3171500861644745,
"learning_rate": 1.665038532508412e-05,
"loss": 0.06093011379241944,
"step": 6150
},
{
"epoch": 2.018886356235754,
"grad_norm": 0.3844246566295624,
"learning_rate": 1.637902963204168e-05,
"loss": 0.05403701782226562,
"step": 6200
},
{
"epoch": 2.0351676978183004,
"grad_norm": 1.5091606378555298,
"learning_rate": 1.610767393899924e-05,
"loss": 0.06063016414642334,
"step": 6250
},
{
"epoch": 2.0514490394008464,
"grad_norm": 0.5060765743255615,
"learning_rate": 1.58363182459568e-05,
"loss": 0.06424860954284668,
"step": 6300
},
{
"epoch": 2.067730380983393,
"grad_norm": 0.5501185059547424,
"learning_rate": 1.556496255291436e-05,
"loss": 0.052588853836059574,
"step": 6350
},
{
"epoch": 2.0840117225659394,
"grad_norm": 0.5140529274940491,
"learning_rate": 1.529360685987192e-05,
"loss": 0.05470933437347412,
"step": 6400
},
{
"epoch": 2.100293064148486,
"grad_norm": 0.13059721887111664,
"learning_rate": 1.5022251166829483e-05,
"loss": 0.05880857944488525,
"step": 6450
},
{
"epoch": 2.1165744057310323,
"grad_norm": 0.5545864701271057,
"learning_rate": 1.4750895473787041e-05,
"loss": 0.05454400062561035,
"step": 6500
},
{
"epoch": 2.1328557473135787,
"grad_norm": 0.7566473484039307,
"learning_rate": 1.44795397807446e-05,
"loss": 0.05996315956115723,
"step": 6550
},
{
"epoch": 2.149137088896125,
"grad_norm": 0.6309687495231628,
"learning_rate": 1.420818408770216e-05,
"loss": 0.06270824909210206,
"step": 6600
},
{
"epoch": 2.165418430478671,
"grad_norm": 0.6882494688034058,
"learning_rate": 1.3936828394659721e-05,
"loss": 0.0579791259765625,
"step": 6650
},
{
"epoch": 2.1816997720612177,
"grad_norm": 0.5102435946464539,
"learning_rate": 1.366547270161728e-05,
"loss": 0.05909278869628906,
"step": 6700
},
{
"epoch": 2.197981113643764,
"grad_norm": 0.5612519979476929,
"learning_rate": 1.339411700857484e-05,
"loss": 0.0631598711013794,
"step": 6750
},
{
"epoch": 2.2142624552263106,
"grad_norm": 0.5335197448730469,
"learning_rate": 1.31227613155324e-05,
"loss": 0.061668686866760254,
"step": 6800
},
{
"epoch": 2.230543796808857,
"grad_norm": 0.26907965540885925,
"learning_rate": 1.285140562248996e-05,
"loss": 0.05813938617706299,
"step": 6850
},
{
"epoch": 2.2468251383914035,
"grad_norm": 0.9871731996536255,
"learning_rate": 1.258004992944752e-05,
"loss": 0.06166846752166748,
"step": 6900
},
{
"epoch": 2.26310647997395,
"grad_norm": 0.7092576622962952,
"learning_rate": 1.230869423640508e-05,
"loss": 0.05214274883270264,
"step": 6950
},
{
"epoch": 2.2793878215564964,
"grad_norm": 0.6084023714065552,
"learning_rate": 1.203733854336264e-05,
"loss": 0.06180807590484619,
"step": 7000
},
{
"epoch": 2.295669163139043,
"grad_norm": 1.0545355081558228,
"learning_rate": 1.17659828503202e-05,
"loss": 0.05810202598571777,
"step": 7050
},
{
"epoch": 2.311950504721589,
"grad_norm": 0.9563855528831482,
"learning_rate": 1.149462715727776e-05,
"loss": 0.05618003368377686,
"step": 7100
},
{
"epoch": 2.3282318463041354,
"grad_norm": 0.6173250079154968,
"learning_rate": 1.122327146423532e-05,
"loss": 0.06320930480957031,
"step": 7150
},
{
"epoch": 2.344513187886682,
"grad_norm": 0.08047935366630554,
"learning_rate": 1.095191577119288e-05,
"loss": 0.058518905639648434,
"step": 7200
},
{
"epoch": 2.3607945294692283,
"grad_norm": 0.6877385973930359,
"learning_rate": 1.0680560078150439e-05,
"loss": 0.07214242458343506,
"step": 7250
},
{
"epoch": 2.3770758710517748,
"grad_norm": 0.7921647429466248,
"learning_rate": 1.0409204385108e-05,
"loss": 0.0554658842086792,
"step": 7300
},
{
"epoch": 2.393357212634321,
"grad_norm": 0.23214460909366608,
"learning_rate": 1.013784869206556e-05,
"loss": 0.05945809364318848,
"step": 7350
},
{
"epoch": 2.4096385542168672,
"grad_norm": 0.23501083254814148,
"learning_rate": 9.86649299902312e-06,
"loss": 0.04184418678283691,
"step": 7400
},
{
"epoch": 2.4259198957994137,
"grad_norm": 0.19243040680885315,
"learning_rate": 9.59513730598068e-06,
"loss": 0.0685301399230957,
"step": 7450
},
{
"epoch": 2.44220123738196,
"grad_norm": 0.03571745380759239,
"learning_rate": 9.32378161293824e-06,
"loss": 0.05060723781585694,
"step": 7500
},
{
"epoch": 2.4584825789645066,
"grad_norm": 0.06310860812664032,
"learning_rate": 9.0524259198958e-06,
"loss": 0.06717358589172363,
"step": 7550
},
{
"epoch": 2.474763920547053,
"grad_norm": 0.4403184950351715,
"learning_rate": 8.78107022685336e-06,
"loss": 0.0508097505569458,
"step": 7600
},
{
"epoch": 2.4910452621295995,
"grad_norm": 0.1725953370332718,
"learning_rate": 8.50971453381092e-06,
"loss": 0.06436698913574218,
"step": 7650
},
{
"epoch": 2.507326603712146,
"grad_norm": 0.560205340385437,
"learning_rate": 8.23835884076848e-06,
"loss": 0.05466559410095215,
"step": 7700
},
{
"epoch": 2.5236079452946925,
"grad_norm": 0.8589635491371155,
"learning_rate": 7.96700314772604e-06,
"loss": 0.052462191581726075,
"step": 7750
},
{
"epoch": 2.539889286877239,
"grad_norm": 0.40856632590293884,
"learning_rate": 7.6956474546836e-06,
"loss": 0.057110257148742676,
"step": 7800
},
{
"epoch": 2.556170628459785,
"grad_norm": 0.2351612001657486,
"learning_rate": 7.424291761641159e-06,
"loss": 0.049067635536193845,
"step": 7850
},
{
"epoch": 2.5724519700423314,
"grad_norm": 0.3204529583454132,
"learning_rate": 7.15293606859872e-06,
"loss": 0.049645824432373045,
"step": 7900
},
{
"epoch": 2.588733311624878,
"grad_norm": 0.43326708674430847,
"learning_rate": 6.881580375556279e-06,
"loss": 0.050102224349975584,
"step": 7950
},
{
"epoch": 2.6050146532074243,
"grad_norm": 1.065234899520874,
"learning_rate": 6.610224682513839e-06,
"loss": 0.06275768280029297,
"step": 8000
},
{
"epoch": 2.6212959947899708,
"grad_norm": 0.26160168647766113,
"learning_rate": 6.3388689894714e-06,
"loss": 0.04881012439727783,
"step": 8050
},
{
"epoch": 2.6375773363725172,
"grad_norm": 0.6686789989471436,
"learning_rate": 6.067513296428959e-06,
"loss": 0.058712401390075684,
"step": 8100
},
{
"epoch": 2.6538586779550632,
"grad_norm": 0.4735671281814575,
"learning_rate": 5.796157603386519e-06,
"loss": 0.05793766498565674,
"step": 8150
},
{
"epoch": 2.6701400195376097,
"grad_norm": 0.9112767577171326,
"learning_rate": 5.5248019103440796e-06,
"loss": 0.05646980285644531,
"step": 8200
},
{
"epoch": 2.686421361120156,
"grad_norm": 0.3665359914302826,
"learning_rate": 5.253446217301639e-06,
"loss": 0.05863104820251465,
"step": 8250
},
{
"epoch": 2.7027027027027026,
"grad_norm": 0.39087387919425964,
"learning_rate": 4.982090524259199e-06,
"loss": 0.04811685085296631,
"step": 8300
},
{
"epoch": 2.718984044285249,
"grad_norm": 0.7103152871131897,
"learning_rate": 4.7107348312167594e-06,
"loss": 0.0660721492767334,
"step": 8350
},
{
"epoch": 2.7352653858677956,
"grad_norm": 0.30644118785858154,
"learning_rate": 4.439379138174319e-06,
"loss": 0.061232595443725585,
"step": 8400
},
{
"epoch": 2.751546727450342,
"grad_norm": 0.6912480592727661,
"learning_rate": 4.16802344513188e-06,
"loss": 0.0465062952041626,
"step": 8450
},
{
"epoch": 2.7678280690328885,
"grad_norm": 0.2372223436832428,
"learning_rate": 3.896667752089439e-06,
"loss": 0.05613251686096191,
"step": 8500
},
{
"epoch": 2.784109410615435,
"grad_norm": 0.3588544428348541,
"learning_rate": 3.6253120590469985e-06,
"loss": 0.06228278636932373,
"step": 8550
},
{
"epoch": 2.800390752197981,
"grad_norm": 0.8760668039321899,
"learning_rate": 3.353956366004559e-06,
"loss": 0.058021135330200195,
"step": 8600
},
{
"epoch": 2.8166720937805274,
"grad_norm": 0.39105167984962463,
"learning_rate": 3.0826006729621187e-06,
"loss": 0.05042066097259521,
"step": 8650
},
{
"epoch": 2.832953435363074,
"grad_norm": 0.8453779816627502,
"learning_rate": 2.811244979919679e-06,
"loss": 0.05310141086578369,
"step": 8700
},
{
"epoch": 2.8492347769456203,
"grad_norm": 0.3931414484977722,
"learning_rate": 2.539889286877239e-06,
"loss": 0.04613284111022949,
"step": 8750
},
{
"epoch": 2.865516118528167,
"grad_norm": 0.601372480392456,
"learning_rate": 2.268533593834799e-06,
"loss": 0.055337414741516114,
"step": 8800
},
{
"epoch": 2.8817974601107132,
"grad_norm": 0.40091976523399353,
"learning_rate": 1.9971779007923587e-06,
"loss": 0.05075720310211182,
"step": 8850
},
{
"epoch": 2.8980788016932593,
"grad_norm": 0.9332064986228943,
"learning_rate": 1.7258222077499185e-06,
"loss": 0.04717796325683594,
"step": 8900
},
{
"epoch": 2.9143601432758057,
"grad_norm": 0.7941976189613342,
"learning_rate": 1.4544665147074786e-06,
"loss": 0.07562547206878661,
"step": 8950
},
{
"epoch": 2.930641484858352,
"grad_norm": 0.6598140597343445,
"learning_rate": 1.1831108216650385e-06,
"loss": 0.05916054248809814,
"step": 9000
},
{
"epoch": 2.9469228264408986,
"grad_norm": 0.07086297869682312,
"learning_rate": 9.117551286225986e-07,
"loss": 0.05104278087615967,
"step": 9050
},
{
"epoch": 2.963204168023445,
"grad_norm": 0.5035263299942017,
"learning_rate": 6.403994355801585e-07,
"loss": 0.04357606887817383,
"step": 9100
},
{
"epoch": 2.9794855096059916,
"grad_norm": 0.28602153062820435,
"learning_rate": 3.690437425377185e-07,
"loss": 0.04314669132232666,
"step": 9150
},
{
"epoch": 2.995766851188538,
"grad_norm": 0.24384021759033203,
"learning_rate": 9.768804949527842e-08,
"loss": 0.04970499038696289,
"step": 9200
},
{
"epoch": 3.0,
"eval_bertscore_f1": 0.9924511363724068,
"eval_bleu": 0.9035076605970417,
"eval_loss": 0.058707889169454575,
"eval_meteor": 0.9399283468508673,
"eval_rouge1": 0.9525840238092467,
"eval_rouge2": 0.9200012975801428,
"eval_runtime": 58.5972,
"eval_samples_per_second": 22.049,
"eval_steps_per_second": 2.765,
"step": 9213
}
],
"logging_steps": 50,
"max_steps": 9213,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.24385758724096e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}