codet5_qlora / checkpoint-14145 /trainer_state.json
gpol13's picture
Upload folder using huggingface_hub
70116e4 verified
{
"best_global_step": 14145,
"best_metric": 0.9660587414250811,
"best_model_checkpoint": "/kaggle/working/codet5-k8s-qlora/checkpoint-14145",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 14145,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017674089784376106,
"grad_norm": 2.658311605453491,
"learning_rate": 4.983032873806999e-05,
"loss": 4.0893,
"step": 50
},
{
"epoch": 0.03534817956875221,
"grad_norm": 6.100900173187256,
"learning_rate": 4.9657122658183106e-05,
"loss": 3.214,
"step": 100
},
{
"epoch": 0.053022269353128315,
"grad_norm": 2.4247324466705322,
"learning_rate": 4.948391657829622e-05,
"loss": 2.2694,
"step": 150
},
{
"epoch": 0.07069635913750442,
"grad_norm": 2.4390416145324707,
"learning_rate": 4.930717568045246e-05,
"loss": 1.9621,
"step": 200
},
{
"epoch": 0.08837044892188052,
"grad_norm": 3.003971576690674,
"learning_rate": 4.91304347826087e-05,
"loss": 1.8377,
"step": 250
},
{
"epoch": 0.10604453870625663,
"grad_norm": 2.6893651485443115,
"learning_rate": 4.895369388476494e-05,
"loss": 1.7639,
"step": 300
},
{
"epoch": 0.12371862849063273,
"grad_norm": 2.8361988067626953,
"learning_rate": 4.8776952986921177e-05,
"loss": 1.6632,
"step": 350
},
{
"epoch": 0.14139271827500885,
"grad_norm": 2.17179012298584,
"learning_rate": 4.8600212089077416e-05,
"loss": 1.5525,
"step": 400
},
{
"epoch": 0.15906680805938495,
"grad_norm": 4.485565185546875,
"learning_rate": 4.842700600919053e-05,
"loss": 1.4975,
"step": 450
},
{
"epoch": 0.17674089784376104,
"grad_norm": 3.197230577468872,
"learning_rate": 4.825026511134676e-05,
"loss": 1.4888,
"step": 500
},
{
"epoch": 0.19441498762813716,
"grad_norm": 2.8129756450653076,
"learning_rate": 4.807352421350301e-05,
"loss": 1.32,
"step": 550
},
{
"epoch": 0.21208907741251326,
"grad_norm": 2.888892650604248,
"learning_rate": 4.789678331565924e-05,
"loss": 1.4137,
"step": 600
},
{
"epoch": 0.22976316719688936,
"grad_norm": 3.6058623790740967,
"learning_rate": 4.7720042417815487e-05,
"loss": 1.3793,
"step": 650
},
{
"epoch": 0.24743725698126545,
"grad_norm": 3.077688217163086,
"learning_rate": 4.7543301519971726e-05,
"loss": 1.2157,
"step": 700
},
{
"epoch": 0.2651113467656416,
"grad_norm": 4.21675443649292,
"learning_rate": 4.7366560622127965e-05,
"loss": 1.3435,
"step": 750
},
{
"epoch": 0.2827854365500177,
"grad_norm": 3.459958076477051,
"learning_rate": 4.7189819724284204e-05,
"loss": 1.2747,
"step": 800
},
{
"epoch": 0.30045952633439377,
"grad_norm": 3.2092440128326416,
"learning_rate": 4.701307882644044e-05,
"loss": 1.1624,
"step": 850
},
{
"epoch": 0.3181336161187699,
"grad_norm": 3.1231963634490967,
"learning_rate": 4.683633792859668e-05,
"loss": 1.1956,
"step": 900
},
{
"epoch": 0.335807705903146,
"grad_norm": 3.332000970840454,
"learning_rate": 4.6659597030752915e-05,
"loss": 1.118,
"step": 950
},
{
"epoch": 0.3534817956875221,
"grad_norm": 2.992741823196411,
"learning_rate": 4.648285613290916e-05,
"loss": 1.1513,
"step": 1000
},
{
"epoch": 0.3711558854718982,
"grad_norm": 2.8758022785186768,
"learning_rate": 4.630611523506539e-05,
"loss": 1.0998,
"step": 1050
},
{
"epoch": 0.38882997525627433,
"grad_norm": 3.870368480682373,
"learning_rate": 4.612937433722164e-05,
"loss": 1.0723,
"step": 1100
},
{
"epoch": 0.4065040650406504,
"grad_norm": 4.177937030792236,
"learning_rate": 4.595263343937787e-05,
"loss": 1.0612,
"step": 1150
},
{
"epoch": 0.4241781548250265,
"grad_norm": 2.760124921798706,
"learning_rate": 4.577589254153412e-05,
"loss": 1.0086,
"step": 1200
},
{
"epoch": 0.4418522446094026,
"grad_norm": 3.0196070671081543,
"learning_rate": 4.559915164369035e-05,
"loss": 1.06,
"step": 1250
},
{
"epoch": 0.4595263343937787,
"grad_norm": 2.649152994155884,
"learning_rate": 4.542241074584659e-05,
"loss": 0.9997,
"step": 1300
},
{
"epoch": 0.47720042417815484,
"grad_norm": 3.8896467685699463,
"learning_rate": 4.524566984800283e-05,
"loss": 1.0067,
"step": 1350
},
{
"epoch": 0.4948745139625309,
"grad_norm": 3.186890125274658,
"learning_rate": 4.506892895015907e-05,
"loss": 0.9501,
"step": 1400
},
{
"epoch": 0.512548603746907,
"grad_norm": 3.991669178009033,
"learning_rate": 4.489218805231531e-05,
"loss": 1.0347,
"step": 1450
},
{
"epoch": 0.5302226935312832,
"grad_norm": 11.242384910583496,
"learning_rate": 4.4715447154471546e-05,
"loss": 0.9635,
"step": 1500
},
{
"epoch": 0.5478967833156593,
"grad_norm": 2.5245680809020996,
"learning_rate": 4.4538706256627785e-05,
"loss": 0.9248,
"step": 1550
},
{
"epoch": 0.5655708731000354,
"grad_norm": 4.0713114738464355,
"learning_rate": 4.4361965358784024e-05,
"loss": 0.906,
"step": 1600
},
{
"epoch": 0.5832449628844114,
"grad_norm": 3.434156656265259,
"learning_rate": 4.4185224460940264e-05,
"loss": 0.9438,
"step": 1650
},
{
"epoch": 0.6009190526687875,
"grad_norm": 3.6341230869293213,
"learning_rate": 4.40084835630965e-05,
"loss": 0.8156,
"step": 1700
},
{
"epoch": 0.6185931424531637,
"grad_norm": 4.359820365905762,
"learning_rate": 4.383174266525274e-05,
"loss": 0.9052,
"step": 1750
},
{
"epoch": 0.6362672322375398,
"grad_norm": 3.804647445678711,
"learning_rate": 4.365500176740898e-05,
"loss": 0.8758,
"step": 1800
},
{
"epoch": 0.6539413220219159,
"grad_norm": 21.193464279174805,
"learning_rate": 4.347826086956522e-05,
"loss": 0.8776,
"step": 1850
},
{
"epoch": 0.671615411806292,
"grad_norm": 3.002357244491577,
"learning_rate": 4.330151997172146e-05,
"loss": 0.8658,
"step": 1900
},
{
"epoch": 0.689289501590668,
"grad_norm": 4.116176605224609,
"learning_rate": 4.31247790738777e-05,
"loss": 0.8419,
"step": 1950
},
{
"epoch": 0.7069635913750442,
"grad_norm": 6.561131954193115,
"learning_rate": 4.294803817603394e-05,
"loss": 0.8204,
"step": 2000
},
{
"epoch": 0.7246376811594203,
"grad_norm": 3.203460931777954,
"learning_rate": 4.277129727819018e-05,
"loss": 0.7557,
"step": 2050
},
{
"epoch": 0.7423117709437964,
"grad_norm": 4.0467705726623535,
"learning_rate": 4.259455638034641e-05,
"loss": 0.8053,
"step": 2100
},
{
"epoch": 0.7599858607281725,
"grad_norm": 3.575634002685547,
"learning_rate": 4.242135030045953e-05,
"loss": 0.8128,
"step": 2150
},
{
"epoch": 0.7776599505125487,
"grad_norm": 5.7353363037109375,
"learning_rate": 4.224460940261576e-05,
"loss": 0.8339,
"step": 2200
},
{
"epoch": 0.7953340402969247,
"grad_norm": 4.916664123535156,
"learning_rate": 4.206786850477201e-05,
"loss": 0.7758,
"step": 2250
},
{
"epoch": 0.8130081300813008,
"grad_norm": 4.233948230743408,
"learning_rate": 4.189112760692824e-05,
"loss": 0.7485,
"step": 2300
},
{
"epoch": 0.8306822198656769,
"grad_norm": 3.754826545715332,
"learning_rate": 4.171438670908449e-05,
"loss": 0.6988,
"step": 2350
},
{
"epoch": 0.848356309650053,
"grad_norm": 3.185098171234131,
"learning_rate": 4.153764581124072e-05,
"loss": 0.7067,
"step": 2400
},
{
"epoch": 0.8660303994344292,
"grad_norm": 3.515683650970459,
"learning_rate": 4.1360904913396966e-05,
"loss": 0.7559,
"step": 2450
},
{
"epoch": 0.8837044892188052,
"grad_norm": 4.783038139343262,
"learning_rate": 4.11841640155532e-05,
"loss": 0.7444,
"step": 2500
},
{
"epoch": 0.9013785790031813,
"grad_norm": 3.4242937564849854,
"learning_rate": 4.1007423117709444e-05,
"loss": 0.7676,
"step": 2550
},
{
"epoch": 0.9190526687875574,
"grad_norm": 3.3563663959503174,
"learning_rate": 4.0830682219865676e-05,
"loss": 0.7416,
"step": 2600
},
{
"epoch": 0.9367267585719335,
"grad_norm": 21.883926391601562,
"learning_rate": 4.0653941322021916e-05,
"loss": 0.6892,
"step": 2650
},
{
"epoch": 0.9544008483563097,
"grad_norm": 3.8259048461914062,
"learning_rate": 4.0477200424178155e-05,
"loss": 0.7489,
"step": 2700
},
{
"epoch": 0.9720749381406858,
"grad_norm": 3.026655912399292,
"learning_rate": 4.0300459526334394e-05,
"loss": 0.6679,
"step": 2750
},
{
"epoch": 0.9897490279250618,
"grad_norm": 7.62285041809082,
"learning_rate": 4.012371862849063e-05,
"loss": 0.7393,
"step": 2800
},
{
"epoch": 1.0,
"eval_bertscore_f1": 0.9551081777928342,
"eval_bleu": 0.47406093922979725,
"eval_loss": 0.5141507983207703,
"eval_meteor": 0.6443492142009581,
"eval_rouge1": 0.7935683439864762,
"eval_rouge2": 0.6796198647957756,
"eval_runtime": 1335.9702,
"eval_samples_per_second": 4.838,
"eval_steps_per_second": 0.605,
"step": 2829
},
{
"epoch": 1.007423117709438,
"grad_norm": 3.7401936054229736,
"learning_rate": 3.994697773064687e-05,
"loss": 0.7272,
"step": 2850
},
{
"epoch": 1.025097207493814,
"grad_norm": 4.575202941894531,
"learning_rate": 3.977023683280312e-05,
"loss": 0.6891,
"step": 2900
},
{
"epoch": 1.0427712972781902,
"grad_norm": 2.909268379211426,
"learning_rate": 3.959349593495935e-05,
"loss": 0.6751,
"step": 2950
},
{
"epoch": 1.0604453870625663,
"grad_norm": 5.258713722229004,
"learning_rate": 3.941675503711559e-05,
"loss": 0.7308,
"step": 3000
},
{
"epoch": 1.0781194768469424,
"grad_norm": 4.8982462882995605,
"learning_rate": 3.924001413927183e-05,
"loss": 0.5938,
"step": 3050
},
{
"epoch": 1.0957935666313185,
"grad_norm": 3.7401649951934814,
"learning_rate": 3.906327324142807e-05,
"loss": 0.7358,
"step": 3100
},
{
"epoch": 1.1134676564156947,
"grad_norm": 2.2274134159088135,
"learning_rate": 3.888653234358431e-05,
"loss": 0.6251,
"step": 3150
},
{
"epoch": 1.1311417462000706,
"grad_norm": 4.285720348358154,
"learning_rate": 3.870979144574055e-05,
"loss": 0.6773,
"step": 3200
},
{
"epoch": 1.148815835984447,
"grad_norm": 3.1202948093414307,
"learning_rate": 3.8533050547896786e-05,
"loss": 0.6485,
"step": 3250
},
{
"epoch": 1.1664899257688228,
"grad_norm": 2.96162486076355,
"learning_rate": 3.8356309650053025e-05,
"loss": 0.6733,
"step": 3300
},
{
"epoch": 1.184164015553199,
"grad_norm": 6.456724166870117,
"learning_rate": 3.817956875220926e-05,
"loss": 0.6142,
"step": 3350
},
{
"epoch": 1.201838105337575,
"grad_norm": 5.0712690353393555,
"learning_rate": 3.8002827854365503e-05,
"loss": 0.6952,
"step": 3400
},
{
"epoch": 1.2195121951219512,
"grad_norm": 5.074472904205322,
"learning_rate": 3.7826086956521736e-05,
"loss": 0.6147,
"step": 3450
},
{
"epoch": 1.2371862849063273,
"grad_norm": 4.572699546813965,
"learning_rate": 3.764934605867798e-05,
"loss": 0.6172,
"step": 3500
},
{
"epoch": 1.2548603746907034,
"grad_norm": 3.24722957611084,
"learning_rate": 3.747260516083422e-05,
"loss": 0.6657,
"step": 3550
},
{
"epoch": 1.2725344644750796,
"grad_norm": 3.6657183170318604,
"learning_rate": 3.729586426299046e-05,
"loss": 0.6999,
"step": 3600
},
{
"epoch": 1.2902085542594557,
"grad_norm": 3.2770209312438965,
"learning_rate": 3.71191233651467e-05,
"loss": 0.6882,
"step": 3650
},
{
"epoch": 1.3078826440438318,
"grad_norm": 4.611114501953125,
"learning_rate": 3.694238246730294e-05,
"loss": 0.6767,
"step": 3700
},
{
"epoch": 1.3255567338282077,
"grad_norm": 3.4801883697509766,
"learning_rate": 3.676564156945918e-05,
"loss": 0.6503,
"step": 3750
},
{
"epoch": 1.343230823612584,
"grad_norm": 4.582475185394287,
"learning_rate": 3.658890067161541e-05,
"loss": 0.5833,
"step": 3800
},
{
"epoch": 1.36090491339696,
"grad_norm": 3.0982961654663086,
"learning_rate": 3.6412159773771656e-05,
"loss": 0.6271,
"step": 3850
},
{
"epoch": 1.378579003181336,
"grad_norm": 3.592360734939575,
"learning_rate": 3.623541887592789e-05,
"loss": 0.6688,
"step": 3900
},
{
"epoch": 1.3962530929657122,
"grad_norm": 4.296905994415283,
"learning_rate": 3.6058677978084134e-05,
"loss": 0.5931,
"step": 3950
},
{
"epoch": 1.4139271827500883,
"grad_norm": 3.616574764251709,
"learning_rate": 3.588193708024037e-05,
"loss": 0.6297,
"step": 4000
},
{
"epoch": 1.4316012725344645,
"grad_norm": 3.1819770336151123,
"learning_rate": 3.570519618239661e-05,
"loss": 0.5801,
"step": 4050
},
{
"epoch": 1.4492753623188406,
"grad_norm": 3.5812184810638428,
"learning_rate": 3.5528455284552845e-05,
"loss": 0.5826,
"step": 4100
},
{
"epoch": 1.4669494521032167,
"grad_norm": 2.889911651611328,
"learning_rate": 3.5351714386709084e-05,
"loss": 0.5396,
"step": 4150
},
{
"epoch": 1.4846235418875928,
"grad_norm": 3.532849073410034,
"learning_rate": 3.5174973488865324e-05,
"loss": 0.5218,
"step": 4200
},
{
"epoch": 1.502297631671969,
"grad_norm": 2.939161777496338,
"learning_rate": 3.499823259102156e-05,
"loss": 0.5701,
"step": 4250
},
{
"epoch": 1.5199717214563448,
"grad_norm": 3.500262975692749,
"learning_rate": 3.48214916931778e-05,
"loss": 0.5117,
"step": 4300
},
{
"epoch": 1.5376458112407212,
"grad_norm": 3.612431526184082,
"learning_rate": 3.464475079533404e-05,
"loss": 0.5067,
"step": 4350
},
{
"epoch": 1.555319901025097,
"grad_norm": 3.3735318183898926,
"learning_rate": 3.446800989749028e-05,
"loss": 0.5403,
"step": 4400
},
{
"epoch": 1.5729939908094734,
"grad_norm": 28.255231857299805,
"learning_rate": 3.429126899964652e-05,
"loss": 0.5442,
"step": 4450
},
{
"epoch": 1.5906680805938493,
"grad_norm": 4.424487113952637,
"learning_rate": 3.411452810180276e-05,
"loss": 0.5769,
"step": 4500
},
{
"epoch": 1.6083421703782255,
"grad_norm": 4.6517109870910645,
"learning_rate": 3.3937787203959e-05,
"loss": 0.5291,
"step": 4550
},
{
"epoch": 1.6260162601626016,
"grad_norm": 4.276078701019287,
"learning_rate": 3.376104630611524e-05,
"loss": 0.6207,
"step": 4600
},
{
"epoch": 1.6436903499469777,
"grad_norm": 3.1325790882110596,
"learning_rate": 3.3584305408271476e-05,
"loss": 0.5807,
"step": 4650
},
{
"epoch": 1.6613644397313538,
"grad_norm": 3.2780227661132812,
"learning_rate": 3.3407564510427716e-05,
"loss": 0.5487,
"step": 4700
},
{
"epoch": 1.67903852951573,
"grad_norm": 3.9542007446289062,
"learning_rate": 3.3230823612583955e-05,
"loss": 0.6385,
"step": 4750
},
{
"epoch": 1.696712619300106,
"grad_norm": 4.091352462768555,
"learning_rate": 3.3054082714740194e-05,
"loss": 0.5845,
"step": 4800
},
{
"epoch": 1.714386709084482,
"grad_norm": 2.3576905727386475,
"learning_rate": 3.2877341816896426e-05,
"loss": 0.4949,
"step": 4850
},
{
"epoch": 1.7320607988688583,
"grad_norm": 3.200242519378662,
"learning_rate": 3.270060091905267e-05,
"loss": 0.5922,
"step": 4900
},
{
"epoch": 1.7497348886532342,
"grad_norm": 3.1346006393432617,
"learning_rate": 3.2523860021208905e-05,
"loss": 0.5259,
"step": 4950
},
{
"epoch": 1.7674089784376106,
"grad_norm": 3.4066524505615234,
"learning_rate": 3.234711912336515e-05,
"loss": 0.562,
"step": 5000
},
{
"epoch": 1.7850830682219865,
"grad_norm": 5.18930196762085,
"learning_rate": 3.217037822552138e-05,
"loss": 0.5825,
"step": 5050
},
{
"epoch": 1.8027571580063628,
"grad_norm": 4.159862995147705,
"learning_rate": 3.199363732767763e-05,
"loss": 0.5616,
"step": 5100
},
{
"epoch": 1.8204312477907387,
"grad_norm": 4.439573287963867,
"learning_rate": 3.181689642983386e-05,
"loss": 0.5334,
"step": 5150
},
{
"epoch": 1.8381053375751149,
"grad_norm": 6.196533203125,
"learning_rate": 3.164015553199011e-05,
"loss": 0.5887,
"step": 5200
},
{
"epoch": 1.855779427359491,
"grad_norm": 3.715372323989868,
"learning_rate": 3.146341463414634e-05,
"loss": 0.5379,
"step": 5250
},
{
"epoch": 1.873453517143867,
"grad_norm": 4.34264612197876,
"learning_rate": 3.128667373630258e-05,
"loss": 0.4827,
"step": 5300
},
{
"epoch": 1.8911276069282432,
"grad_norm": 2.337557315826416,
"learning_rate": 3.1109932838458825e-05,
"loss": 0.4685,
"step": 5350
},
{
"epoch": 1.9088016967126193,
"grad_norm": 3.325277805328369,
"learning_rate": 3.093319194061506e-05,
"loss": 0.4983,
"step": 5400
},
{
"epoch": 1.9264757864969955,
"grad_norm": 2.976592540740967,
"learning_rate": 3.0756451042771303e-05,
"loss": 0.5814,
"step": 5450
},
{
"epoch": 1.9441498762813714,
"grad_norm": 9.608305931091309,
"learning_rate": 3.0579710144927536e-05,
"loss": 0.5062,
"step": 5500
},
{
"epoch": 1.9618239660657477,
"grad_norm": 3.443791151046753,
"learning_rate": 3.040296924708378e-05,
"loss": 0.5092,
"step": 5550
},
{
"epoch": 1.9794980558501236,
"grad_norm": 3.4817845821380615,
"learning_rate": 3.0226228349240014e-05,
"loss": 0.5584,
"step": 5600
},
{
"epoch": 1.9971721456345,
"grad_norm": 3.2107975482940674,
"learning_rate": 3.0049487451396253e-05,
"loss": 0.506,
"step": 5650
},
{
"epoch": 2.0,
"eval_bertscore_f1": 0.9621683897930059,
"eval_bleu": 0.5494076455991572,
"eval_loss": 0.37199869751930237,
"eval_meteor": 0.7077566730507359,
"eval_rouge1": 0.8237541199852757,
"eval_rouge2": 0.7474098813874757,
"eval_runtime": 1281.6235,
"eval_samples_per_second": 5.044,
"eval_steps_per_second": 0.63,
"step": 5658
},
{
"epoch": 2.014846235418876,
"grad_norm": 3.7251229286193848,
"learning_rate": 2.9872746553552493e-05,
"loss": 0.4928,
"step": 5700
},
{
"epoch": 2.032520325203252,
"grad_norm": 3.801664113998413,
"learning_rate": 2.9696005655708732e-05,
"loss": 0.5748,
"step": 5750
},
{
"epoch": 2.050194414987628,
"grad_norm": 5.817806243896484,
"learning_rate": 2.9519264757864974e-05,
"loss": 0.4844,
"step": 5800
},
{
"epoch": 2.0678685047720045,
"grad_norm": 3.028961658477783,
"learning_rate": 2.934252386002121e-05,
"loss": 0.4626,
"step": 5850
},
{
"epoch": 2.0855425945563804,
"grad_norm": 3.974060297012329,
"learning_rate": 2.9165782962177453e-05,
"loss": 0.5274,
"step": 5900
},
{
"epoch": 2.1032166843407563,
"grad_norm": 2.532444953918457,
"learning_rate": 2.898904206433369e-05,
"loss": 0.4887,
"step": 5950
},
{
"epoch": 2.1208907741251326,
"grad_norm": 2.0569326877593994,
"learning_rate": 2.8812301166489924e-05,
"loss": 0.4353,
"step": 6000
},
{
"epoch": 2.1385648639095085,
"grad_norm": 3.0496156215667725,
"learning_rate": 2.8635560268646167e-05,
"loss": 0.4347,
"step": 6050
},
{
"epoch": 2.156238953693885,
"grad_norm": 2.635395050048828,
"learning_rate": 2.8458819370802403e-05,
"loss": 0.5406,
"step": 6100
},
{
"epoch": 2.1739130434782608,
"grad_norm": 4.091008186340332,
"learning_rate": 2.8282078472958645e-05,
"loss": 0.4885,
"step": 6150
},
{
"epoch": 2.191587133262637,
"grad_norm": 3.228792905807495,
"learning_rate": 2.810533757511488e-05,
"loss": 0.5072,
"step": 6200
},
{
"epoch": 2.209261223047013,
"grad_norm": 2.479149341583252,
"learning_rate": 2.7928596677271124e-05,
"loss": 0.5475,
"step": 6250
},
{
"epoch": 2.2269353128313893,
"grad_norm": 4.617306709289551,
"learning_rate": 2.775185577942736e-05,
"loss": 0.5468,
"step": 6300
},
{
"epoch": 2.2446094026157652,
"grad_norm": 4.416631698608398,
"learning_rate": 2.7575114881583602e-05,
"loss": 0.5125,
"step": 6350
},
{
"epoch": 2.262283492400141,
"grad_norm": 3.7900924682617188,
"learning_rate": 2.7398373983739838e-05,
"loss": 0.5824,
"step": 6400
},
{
"epoch": 2.2799575821845175,
"grad_norm": 3.695364236831665,
"learning_rate": 2.7221633085896077e-05,
"loss": 0.4806,
"step": 6450
},
{
"epoch": 2.297631671968894,
"grad_norm": 2.609520196914673,
"learning_rate": 2.7044892188052316e-05,
"loss": 0.4537,
"step": 6500
},
{
"epoch": 2.3153057617532697,
"grad_norm": 4.006641864776611,
"learning_rate": 2.6868151290208555e-05,
"loss": 0.5172,
"step": 6550
},
{
"epoch": 2.3329798515376456,
"grad_norm": 3.581960439682007,
"learning_rate": 2.6691410392364795e-05,
"loss": 0.5089,
"step": 6600
},
{
"epoch": 2.350653941322022,
"grad_norm": 2.6414718627929688,
"learning_rate": 2.6514669494521034e-05,
"loss": 0.4936,
"step": 6650
},
{
"epoch": 2.368328031106398,
"grad_norm": 3.3889434337615967,
"learning_rate": 2.6337928596677276e-05,
"loss": 0.535,
"step": 6700
},
{
"epoch": 2.3860021208907742,
"grad_norm": 4.371047496795654,
"learning_rate": 2.6161187698833512e-05,
"loss": 0.4651,
"step": 6750
},
{
"epoch": 2.40367621067515,
"grad_norm": 4.057021617889404,
"learning_rate": 2.5984446800989748e-05,
"loss": 0.4369,
"step": 6800
},
{
"epoch": 2.4213503004595265,
"grad_norm": 4.6812615394592285,
"learning_rate": 2.580770590314599e-05,
"loss": 0.5067,
"step": 6850
},
{
"epoch": 2.4390243902439024,
"grad_norm": 6.067279815673828,
"learning_rate": 2.5630965005302226e-05,
"loss": 0.4901,
"step": 6900
},
{
"epoch": 2.4566984800282787,
"grad_norm": 3.8635661602020264,
"learning_rate": 2.545422410745847e-05,
"loss": 0.415,
"step": 6950
},
{
"epoch": 2.4743725698126546,
"grad_norm": 1.7011466026306152,
"learning_rate": 2.5277483209614705e-05,
"loss": 0.4893,
"step": 7000
},
{
"epoch": 2.4920466595970305,
"grad_norm": 3.8497934341430664,
"learning_rate": 2.5100742311770947e-05,
"loss": 0.4504,
"step": 7050
},
{
"epoch": 2.509720749381407,
"grad_norm": 3.670374631881714,
"learning_rate": 2.4924001413927183e-05,
"loss": 0.4883,
"step": 7100
},
{
"epoch": 2.5273948391657832,
"grad_norm": 3.130357503890991,
"learning_rate": 2.4747260516083422e-05,
"loss": 0.5031,
"step": 7150
},
{
"epoch": 2.545068928950159,
"grad_norm": 3.647500514984131,
"learning_rate": 2.457051961823966e-05,
"loss": 0.4368,
"step": 7200
},
{
"epoch": 2.562743018734535,
"grad_norm": 3.6657369136810303,
"learning_rate": 2.43937787203959e-05,
"loss": 0.4686,
"step": 7250
},
{
"epoch": 2.5804171085189114,
"grad_norm": 5.371551036834717,
"learning_rate": 2.421703782255214e-05,
"loss": 0.4433,
"step": 7300
},
{
"epoch": 2.5980911983032873,
"grad_norm": 3.593418598175049,
"learning_rate": 2.404029692470838e-05,
"loss": 0.4901,
"step": 7350
},
{
"epoch": 2.6157652880876636,
"grad_norm": 3.1181206703186035,
"learning_rate": 2.3863556026864618e-05,
"loss": 0.4834,
"step": 7400
},
{
"epoch": 2.6334393778720395,
"grad_norm": 4.218138217926025,
"learning_rate": 2.3686815129020857e-05,
"loss": 0.5013,
"step": 7450
},
{
"epoch": 2.6511134676564154,
"grad_norm": 3.5063066482543945,
"learning_rate": 2.3510074231177097e-05,
"loss": 0.4705,
"step": 7500
},
{
"epoch": 2.6687875574407918,
"grad_norm": 2.8965365886688232,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.464,
"step": 7550
},
{
"epoch": 2.686461647225168,
"grad_norm": 2.336358070373535,
"learning_rate": 2.3156592435489575e-05,
"loss": 0.4591,
"step": 7600
},
{
"epoch": 2.704135737009544,
"grad_norm": 3.5483410358428955,
"learning_rate": 2.2979851537645814e-05,
"loss": 0.4419,
"step": 7650
},
{
"epoch": 2.72180982679392,
"grad_norm": 4.550882816314697,
"learning_rate": 2.280311063980205e-05,
"loss": 0.4181,
"step": 7700
},
{
"epoch": 2.7394839165782963,
"grad_norm": 4.471234321594238,
"learning_rate": 2.262636974195829e-05,
"loss": 0.4558,
"step": 7750
},
{
"epoch": 2.757158006362672,
"grad_norm": 3.0595200061798096,
"learning_rate": 2.244962884411453e-05,
"loss": 0.4188,
"step": 7800
},
{
"epoch": 2.7748320961470485,
"grad_norm": 7.5111403465271,
"learning_rate": 2.2272887946270768e-05,
"loss": 0.4834,
"step": 7850
},
{
"epoch": 2.7925061859314244,
"grad_norm": 2.2414655685424805,
"learning_rate": 2.2096147048427007e-05,
"loss": 0.442,
"step": 7900
},
{
"epoch": 2.8101802757158008,
"grad_norm": 4.036431789398193,
"learning_rate": 2.1919406150583246e-05,
"loss": 0.4254,
"step": 7950
},
{
"epoch": 2.8278543655001767,
"grad_norm": 3.3172266483306885,
"learning_rate": 2.1742665252739485e-05,
"loss": 0.4852,
"step": 8000
},
{
"epoch": 2.845528455284553,
"grad_norm": 4.143049240112305,
"learning_rate": 2.1565924354895724e-05,
"loss": 0.4858,
"step": 8050
},
{
"epoch": 2.863202545068929,
"grad_norm": 5.017402172088623,
"learning_rate": 2.138918345705196e-05,
"loss": 0.3824,
"step": 8100
},
{
"epoch": 2.880876634853305,
"grad_norm": 2.974952459335327,
"learning_rate": 2.1212442559208203e-05,
"loss": 0.4777,
"step": 8150
},
{
"epoch": 2.898550724637681,
"grad_norm": 7.074586868286133,
"learning_rate": 2.1035701661364442e-05,
"loss": 0.4465,
"step": 8200
},
{
"epoch": 2.9162248144220575,
"grad_norm": 3.585792064666748,
"learning_rate": 2.085896076352068e-05,
"loss": 0.4307,
"step": 8250
},
{
"epoch": 2.9338989042064334,
"grad_norm": 1.6561566591262817,
"learning_rate": 2.068221986567692e-05,
"loss": 0.3917,
"step": 8300
},
{
"epoch": 2.9515729939908093,
"grad_norm": 4.920962810516357,
"learning_rate": 2.050547896783316e-05,
"loss": 0.4334,
"step": 8350
},
{
"epoch": 2.9692470837751856,
"grad_norm": 2.6819636821746826,
"learning_rate": 2.03287380699894e-05,
"loss": 0.4679,
"step": 8400
},
{
"epoch": 2.9869211735595615,
"grad_norm": 3.442260265350342,
"learning_rate": 2.0151997172145634e-05,
"loss": 0.4466,
"step": 8450
},
{
"epoch": 3.0,
"eval_bertscore_f1": 0.9646675708510055,
"eval_bleu": 0.5742982540038749,
"eval_loss": 0.3243306279182434,
"eval_meteor": 0.7281699575301964,
"eval_rouge1": 0.8328916554556949,
"eval_rouge2": 0.7666932565109175,
"eval_runtime": 1288.8847,
"eval_samples_per_second": 5.015,
"eval_steps_per_second": 0.627,
"step": 8487
},
{
"epoch": 3.004595263343938,
"grad_norm": 3.7356512546539307,
"learning_rate": 1.9975256274301874e-05,
"loss": 0.394,
"step": 8500
},
{
"epoch": 3.022269353128314,
"grad_norm": 3.7725515365600586,
"learning_rate": 1.9798515376458113e-05,
"loss": 0.4484,
"step": 8550
},
{
"epoch": 3.03994344291269,
"grad_norm": 2.475839138031006,
"learning_rate": 1.9621774478614352e-05,
"loss": 0.4463,
"step": 8600
},
{
"epoch": 3.057617532697066,
"grad_norm": 2.853266716003418,
"learning_rate": 1.944503358077059e-05,
"loss": 0.4398,
"step": 8650
},
{
"epoch": 3.0752916224814424,
"grad_norm": 2.7079474925994873,
"learning_rate": 1.926829268292683e-05,
"loss": 0.4021,
"step": 8700
},
{
"epoch": 3.0929657122658183,
"grad_norm": 5.04539680480957,
"learning_rate": 1.909155178508307e-05,
"loss": 0.3996,
"step": 8750
},
{
"epoch": 3.110639802050194,
"grad_norm": 4.626221656799316,
"learning_rate": 1.8918345705196184e-05,
"loss": 0.4948,
"step": 8800
},
{
"epoch": 3.1283138918345705,
"grad_norm": 4.644408226013184,
"learning_rate": 1.8741604807352423e-05,
"loss": 0.4156,
"step": 8850
},
{
"epoch": 3.1459879816189464,
"grad_norm": 4.299105167388916,
"learning_rate": 1.8564863909508662e-05,
"loss": 0.3977,
"step": 8900
},
{
"epoch": 3.163662071403323,
"grad_norm": 4.650149345397949,
"learning_rate": 1.83881230116649e-05,
"loss": 0.4229,
"step": 8950
},
{
"epoch": 3.1813361611876987,
"grad_norm": 2.89013409614563,
"learning_rate": 1.821138211382114e-05,
"loss": 0.4506,
"step": 9000
},
{
"epoch": 3.199010250972075,
"grad_norm": 2.281370162963867,
"learning_rate": 1.8034641215977376e-05,
"loss": 0.4288,
"step": 9050
},
{
"epoch": 3.216684340756451,
"grad_norm": 4.948707103729248,
"learning_rate": 1.7857900318133615e-05,
"loss": 0.4633,
"step": 9100
},
{
"epoch": 3.2343584305408273,
"grad_norm": 3.5856571197509766,
"learning_rate": 1.7681159420289855e-05,
"loss": 0.3965,
"step": 9150
},
{
"epoch": 3.252032520325203,
"grad_norm": 3.416271686553955,
"learning_rate": 1.7504418522446094e-05,
"loss": 0.4904,
"step": 9200
},
{
"epoch": 3.2697066101095795,
"grad_norm": 3.599717617034912,
"learning_rate": 1.7327677624602333e-05,
"loss": 0.4648,
"step": 9250
},
{
"epoch": 3.2873806998939554,
"grad_norm": 2.8439853191375732,
"learning_rate": 1.7150936726758572e-05,
"loss": 0.3734,
"step": 9300
},
{
"epoch": 3.3050547896783318,
"grad_norm": 4.0927863121032715,
"learning_rate": 1.697419582891481e-05,
"loss": 0.3913,
"step": 9350
},
{
"epoch": 3.3227288794627077,
"grad_norm": 4.16766881942749,
"learning_rate": 1.679745493107105e-05,
"loss": 0.4303,
"step": 9400
},
{
"epoch": 3.3404029692470836,
"grad_norm": 3.417738199234009,
"learning_rate": 1.662071403322729e-05,
"loss": 0.4068,
"step": 9450
},
{
"epoch": 3.35807705903146,
"grad_norm": 4.66575813293457,
"learning_rate": 1.644397313538353e-05,
"loss": 0.5116,
"step": 9500
},
{
"epoch": 3.375751148815836,
"grad_norm": 6.112340927124023,
"learning_rate": 1.6267232237539768e-05,
"loss": 0.4244,
"step": 9550
},
{
"epoch": 3.393425238600212,
"grad_norm": 3.322610378265381,
"learning_rate": 1.6090491339696007e-05,
"loss": 0.4252,
"step": 9600
},
{
"epoch": 3.411099328384588,
"grad_norm": 4.941850185394287,
"learning_rate": 1.5913750441852247e-05,
"loss": 0.402,
"step": 9650
},
{
"epoch": 3.4287734181689644,
"grad_norm": 2.177600860595703,
"learning_rate": 1.5737009544008486e-05,
"loss": 0.3437,
"step": 9700
},
{
"epoch": 3.4464475079533403,
"grad_norm": 2.1570093631744385,
"learning_rate": 1.5560268646164725e-05,
"loss": 0.4871,
"step": 9750
},
{
"epoch": 3.4641215977377167,
"grad_norm": 7.6717305183410645,
"learning_rate": 1.538352774832096e-05,
"loss": 0.4224,
"step": 9800
},
{
"epoch": 3.4817956875220926,
"grad_norm": 3.082805871963501,
"learning_rate": 1.52067868504772e-05,
"loss": 0.4474,
"step": 9850
},
{
"epoch": 3.499469777306469,
"grad_norm": 2.8141167163848877,
"learning_rate": 1.5033580770590316e-05,
"loss": 0.4476,
"step": 9900
},
{
"epoch": 3.517143867090845,
"grad_norm": 3.179436206817627,
"learning_rate": 1.4856839872746553e-05,
"loss": 0.3936,
"step": 9950
},
{
"epoch": 3.534817956875221,
"grad_norm": 3.908020257949829,
"learning_rate": 1.4680098974902792e-05,
"loss": 0.4164,
"step": 10000
},
{
"epoch": 3.552492046659597,
"grad_norm": 4.998553276062012,
"learning_rate": 1.4503358077059032e-05,
"loss": 0.4534,
"step": 10050
},
{
"epoch": 3.570166136443973,
"grad_norm": 4.064126014709473,
"learning_rate": 1.432661717921527e-05,
"loss": 0.458,
"step": 10100
},
{
"epoch": 3.5878402262283493,
"grad_norm": 2.2527036666870117,
"learning_rate": 1.414987628137151e-05,
"loss": 0.4285,
"step": 10150
},
{
"epoch": 3.605514316012725,
"grad_norm": 3.3799755573272705,
"learning_rate": 1.397313538352775e-05,
"loss": 0.5488,
"step": 10200
},
{
"epoch": 3.6231884057971016,
"grad_norm": 3.4317479133605957,
"learning_rate": 1.3796394485683988e-05,
"loss": 0.4326,
"step": 10250
},
{
"epoch": 3.6408624955814775,
"grad_norm": 2.245337724685669,
"learning_rate": 1.3619653587840228e-05,
"loss": 0.4279,
"step": 10300
},
{
"epoch": 3.658536585365854,
"grad_norm": 2.9092109203338623,
"learning_rate": 1.3442912689996465e-05,
"loss": 0.4116,
"step": 10350
},
{
"epoch": 3.6762106751502297,
"grad_norm": 2.79837965965271,
"learning_rate": 1.3266171792152704e-05,
"loss": 0.4506,
"step": 10400
},
{
"epoch": 3.693884764934606,
"grad_norm": 1.416994333267212,
"learning_rate": 1.3089430894308943e-05,
"loss": 0.3532,
"step": 10450
},
{
"epoch": 3.711558854718982,
"grad_norm": 4.927233695983887,
"learning_rate": 1.2912689996465183e-05,
"loss": 0.4302,
"step": 10500
},
{
"epoch": 3.729232944503358,
"grad_norm": 2.069500684738159,
"learning_rate": 1.2735949098621422e-05,
"loss": 0.4608,
"step": 10550
},
{
"epoch": 3.746907034287734,
"grad_norm": 3.3507018089294434,
"learning_rate": 1.2559208200777661e-05,
"loss": 0.4024,
"step": 10600
},
{
"epoch": 3.7645811240721105,
"grad_norm": 2.64599871635437,
"learning_rate": 1.2382467302933899e-05,
"loss": 0.3817,
"step": 10650
},
{
"epoch": 3.7822552138564864,
"grad_norm": 2.3984270095825195,
"learning_rate": 1.2205726405090138e-05,
"loss": 0.4399,
"step": 10700
},
{
"epoch": 3.7999293036408623,
"grad_norm": 5.132211685180664,
"learning_rate": 1.2028985507246379e-05,
"loss": 0.4537,
"step": 10750
},
{
"epoch": 3.8176033934252387,
"grad_norm": 3.9488821029663086,
"learning_rate": 1.1852244609402616e-05,
"loss": 0.4433,
"step": 10800
},
{
"epoch": 3.8352774832096146,
"grad_norm": 4.978783130645752,
"learning_rate": 1.1675503711558855e-05,
"loss": 0.3722,
"step": 10850
},
{
"epoch": 3.852951572993991,
"grad_norm": 2.1942172050476074,
"learning_rate": 1.1498762813715094e-05,
"loss": 0.3641,
"step": 10900
},
{
"epoch": 3.870625662778367,
"grad_norm": 1.962399959564209,
"learning_rate": 1.1322021915871334e-05,
"loss": 0.3966,
"step": 10950
},
{
"epoch": 3.888299752562743,
"grad_norm": 2.3611438274383545,
"learning_rate": 1.1145281018027571e-05,
"loss": 0.3872,
"step": 11000
},
{
"epoch": 3.905973842347119,
"grad_norm": 2.8562467098236084,
"learning_rate": 1.096854012018381e-05,
"loss": 0.3823,
"step": 11050
},
{
"epoch": 3.9236479321314954,
"grad_norm": 3.315880060195923,
"learning_rate": 1.079179922234005e-05,
"loss": 0.4215,
"step": 11100
},
{
"epoch": 3.9413220219158713,
"grad_norm": 4.15437650680542,
"learning_rate": 1.0615058324496289e-05,
"loss": 0.4126,
"step": 11150
},
{
"epoch": 3.9589961117002472,
"grad_norm": 3.9605205059051514,
"learning_rate": 1.0438317426652528e-05,
"loss": 0.3773,
"step": 11200
},
{
"epoch": 3.9766702014846236,
"grad_norm": 3.106764793395996,
"learning_rate": 1.0261576528808767e-05,
"loss": 0.4297,
"step": 11250
},
{
"epoch": 3.9943442912689995,
"grad_norm": 3.4298675060272217,
"learning_rate": 1.0084835630965006e-05,
"loss": 0.4305,
"step": 11300
},
{
"epoch": 4.0,
"eval_bertscore_f1": 0.9656413255425373,
"eval_bleu": 0.5848426882684508,
"eval_loss": 0.3005247414112091,
"eval_meteor": 0.73697495147188,
"eval_rouge1": 0.8370075787215339,
"eval_rouge2": 0.7752220988783712,
"eval_runtime": 1268.4642,
"eval_samples_per_second": 5.096,
"eval_steps_per_second": 0.637,
"step": 11316
},
{
"epoch": 4.012018381053376,
"grad_norm": 4.263380527496338,
"learning_rate": 9.908094733121245e-06,
"loss": 0.4285,
"step": 11350
},
{
"epoch": 4.029692470837752,
"grad_norm": 14.104089736938477,
"learning_rate": 9.731353835277483e-06,
"loss": 0.3837,
"step": 11400
},
{
"epoch": 4.047366560622128,
"grad_norm": 2.5981857776641846,
"learning_rate": 9.554612937433722e-06,
"loss": 0.3773,
"step": 11450
},
{
"epoch": 4.065040650406504,
"grad_norm": 4.44357967376709,
"learning_rate": 9.377872039589961e-06,
"loss": 0.4325,
"step": 11500
},
{
"epoch": 4.08271474019088,
"grad_norm": 3.7187113761901855,
"learning_rate": 9.2011311417462e-06,
"loss": 0.427,
"step": 11550
},
{
"epoch": 4.100388829975256,
"grad_norm": 2.364908218383789,
"learning_rate": 9.02439024390244e-06,
"loss": 0.3617,
"step": 11600
},
{
"epoch": 4.118062919759632,
"grad_norm": 2.663651704788208,
"learning_rate": 8.847649346058679e-06,
"loss": 0.4174,
"step": 11650
},
{
"epoch": 4.135737009544009,
"grad_norm": 3.6699295043945312,
"learning_rate": 8.670908448214918e-06,
"loss": 0.4183,
"step": 11700
},
{
"epoch": 4.153411099328385,
"grad_norm": 4.236429214477539,
"learning_rate": 8.494167550371156e-06,
"loss": 0.4074,
"step": 11750
},
{
"epoch": 4.171085189112761,
"grad_norm": 4.3517632484436035,
"learning_rate": 8.317426652527395e-06,
"loss": 0.3905,
"step": 11800
},
{
"epoch": 4.188759278897137,
"grad_norm": 2.440966844558716,
"learning_rate": 8.140685754683634e-06,
"loss": 0.408,
"step": 11850
},
{
"epoch": 4.2064333686815125,
"grad_norm": 3.0445733070373535,
"learning_rate": 7.963944856839873e-06,
"loss": 0.3646,
"step": 11900
},
{
"epoch": 4.224107458465889,
"grad_norm": 3.174678325653076,
"learning_rate": 7.787203958996112e-06,
"loss": 0.4027,
"step": 11950
},
{
"epoch": 4.241781548250265,
"grad_norm": 4.445051193237305,
"learning_rate": 7.610463061152351e-06,
"loss": 0.4111,
"step": 12000
},
{
"epoch": 4.259455638034641,
"grad_norm": 3.7955079078674316,
"learning_rate": 7.43372216330859e-06,
"loss": 0.3815,
"step": 12050
},
{
"epoch": 4.277129727819017,
"grad_norm": 3.0276503562927246,
"learning_rate": 7.256981265464829e-06,
"loss": 0.3765,
"step": 12100
},
{
"epoch": 4.294803817603394,
"grad_norm": 1.8871873617172241,
"learning_rate": 7.080240367621067e-06,
"loss": 0.3771,
"step": 12150
},
{
"epoch": 4.31247790738777,
"grad_norm": 9.927197456359863,
"learning_rate": 6.903499469777307e-06,
"loss": 0.4112,
"step": 12200
},
{
"epoch": 4.330151997172146,
"grad_norm": 4.721640586853027,
"learning_rate": 6.726758571933546e-06,
"loss": 0.4451,
"step": 12250
},
{
"epoch": 4.3478260869565215,
"grad_norm": 2.7340986728668213,
"learning_rate": 6.550017674089785e-06,
"loss": 0.4254,
"step": 12300
},
{
"epoch": 4.365500176740898,
"grad_norm": 3.780824661254883,
"learning_rate": 6.373276776246023e-06,
"loss": 0.4557,
"step": 12350
},
{
"epoch": 4.383174266525274,
"grad_norm": 3.429931640625,
"learning_rate": 6.1965358784022625e-06,
"loss": 0.3858,
"step": 12400
},
{
"epoch": 4.40084835630965,
"grad_norm": 3.944438934326172,
"learning_rate": 6.019794980558501e-06,
"loss": 0.3569,
"step": 12450
},
{
"epoch": 4.418522446094026,
"grad_norm": 2.19978666305542,
"learning_rate": 5.843054082714741e-06,
"loss": 0.4232,
"step": 12500
},
{
"epoch": 4.436196535878402,
"grad_norm": 1.6702100038528442,
"learning_rate": 5.666313184870979e-06,
"loss": 0.4216,
"step": 12550
},
{
"epoch": 4.453870625662779,
"grad_norm": 5.39310884475708,
"learning_rate": 5.4895722870272184e-06,
"loss": 0.393,
"step": 12600
},
{
"epoch": 4.471544715447155,
"grad_norm": 2.8727235794067383,
"learning_rate": 5.312831389183457e-06,
"loss": 0.4104,
"step": 12650
},
{
"epoch": 4.4892188052315305,
"grad_norm": 1.9998319149017334,
"learning_rate": 5.136090491339696e-06,
"loss": 0.4592,
"step": 12700
},
{
"epoch": 4.506892895015906,
"grad_norm": 3.140760660171509,
"learning_rate": 4.959349593495935e-06,
"loss": 0.3582,
"step": 12750
},
{
"epoch": 4.524566984800282,
"grad_norm": 4.489378929138184,
"learning_rate": 4.782608695652174e-06,
"loss": 0.3891,
"step": 12800
},
{
"epoch": 4.542241074584659,
"grad_norm": 3.2630345821380615,
"learning_rate": 4.605867797808413e-06,
"loss": 0.3703,
"step": 12850
},
{
"epoch": 4.559915164369035,
"grad_norm": 2.898639440536499,
"learning_rate": 4.429126899964652e-06,
"loss": 0.4201,
"step": 12900
},
{
"epoch": 4.577589254153411,
"grad_norm": 3.266235589981079,
"learning_rate": 4.252386002120891e-06,
"loss": 0.4142,
"step": 12950
},
{
"epoch": 4.595263343937788,
"grad_norm": 3.594919204711914,
"learning_rate": 4.07564510427713e-06,
"loss": 0.4224,
"step": 13000
},
{
"epoch": 4.612937433722164,
"grad_norm": 4.3656439781188965,
"learning_rate": 3.898904206433369e-06,
"loss": 0.4155,
"step": 13050
},
{
"epoch": 4.6306115235065395,
"grad_norm": 3.863250494003296,
"learning_rate": 3.722163308589608e-06,
"loss": 0.3933,
"step": 13100
},
{
"epoch": 4.648285613290915,
"grad_norm": 3.6120657920837402,
"learning_rate": 3.5454224107458466e-06,
"loss": 0.4009,
"step": 13150
},
{
"epoch": 4.665959703075291,
"grad_norm": 3.4946892261505127,
"learning_rate": 3.368681512902086e-06,
"loss": 0.4538,
"step": 13200
},
{
"epoch": 4.683633792859668,
"grad_norm": 3.0893940925598145,
"learning_rate": 3.1919406150583245e-06,
"loss": 0.3429,
"step": 13250
},
{
"epoch": 4.701307882644044,
"grad_norm": 3.190537929534912,
"learning_rate": 3.0151997172145637e-06,
"loss": 0.489,
"step": 13300
},
{
"epoch": 4.71898197242842,
"grad_norm": 5.128622531890869,
"learning_rate": 2.8384588193708025e-06,
"loss": 0.3926,
"step": 13350
},
{
"epoch": 4.736656062212796,
"grad_norm": 3.6680729389190674,
"learning_rate": 2.6617179215270417e-06,
"loss": 0.3859,
"step": 13400
},
{
"epoch": 4.754330151997172,
"grad_norm": 2.93373703956604,
"learning_rate": 2.4849770236832804e-06,
"loss": 0.3637,
"step": 13450
},
{
"epoch": 4.7720042417815485,
"grad_norm": 3.649087429046631,
"learning_rate": 2.3082361258395196e-06,
"loss": 0.4155,
"step": 13500
},
{
"epoch": 4.789678331565924,
"grad_norm": 1.2933834791183472,
"learning_rate": 2.1314952279957584e-06,
"loss": 0.3868,
"step": 13550
},
{
"epoch": 4.8073524213503,
"grad_norm": 2.177612781524658,
"learning_rate": 1.9547543301519976e-06,
"loss": 0.4799,
"step": 13600
},
{
"epoch": 4.825026511134676,
"grad_norm": 3.7405126094818115,
"learning_rate": 1.7780134323082363e-06,
"loss": 0.455,
"step": 13650
},
{
"epoch": 4.842700600919053,
"grad_norm": 6.44041633605957,
"learning_rate": 1.6048073524213503e-06,
"loss": 0.3766,
"step": 13700
},
{
"epoch": 4.860374690703429,
"grad_norm": 2.510866165161133,
"learning_rate": 1.4280664545775892e-06,
"loss": 0.3779,
"step": 13750
},
{
"epoch": 4.878048780487805,
"grad_norm": 4.789300441741943,
"learning_rate": 1.2513255567338282e-06,
"loss": 0.3892,
"step": 13800
},
{
"epoch": 4.895722870272181,
"grad_norm": 2.6004765033721924,
"learning_rate": 1.0745846588900672e-06,
"loss": 0.3897,
"step": 13850
},
{
"epoch": 4.9133969600565575,
"grad_norm": 4.115776062011719,
"learning_rate": 8.978437610463062e-07,
"loss": 0.3896,
"step": 13900
},
{
"epoch": 4.931071049840933,
"grad_norm": 9.11878776550293,
"learning_rate": 7.211028632025451e-07,
"loss": 0.3918,
"step": 13950
},
{
"epoch": 4.948745139625309,
"grad_norm": 3.8003361225128174,
"learning_rate": 5.44361965358784e-07,
"loss": 0.476,
"step": 14000
},
{
"epoch": 4.966419229409685,
"grad_norm": 2.117197275161743,
"learning_rate": 3.67621067515023e-07,
"loss": 0.373,
"step": 14050
},
{
"epoch": 4.984093319194061,
"grad_norm": 1.8130935430526733,
"learning_rate": 1.9088016967126194e-07,
"loss": 0.4102,
"step": 14100
},
{
"epoch": 5.0,
"eval_bertscore_f1": 0.9660587414250811,
"eval_bleu": 0.5882761107143478,
"eval_loss": 0.29442909359931946,
"eval_meteor": 0.7392640094761435,
"eval_rouge1": 0.8386605714105622,
"eval_rouge2": 0.7781271007162897,
"eval_runtime": 1266.3046,
"eval_samples_per_second": 5.105,
"eval_steps_per_second": 0.638,
"step": 14145
}
],
"logging_steps": 50,
"max_steps": 14145,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.730048539557888e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}