{ "best_global_step": 14145, "best_metric": 0.9660587414250811, "best_model_checkpoint": "/kaggle/working/codet5-k8s-qlora/checkpoint-14145", "epoch": 5.0, "eval_steps": 500, "global_step": 14145, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017674089784376106, "grad_norm": 2.658311605453491, "learning_rate": 4.983032873806999e-05, "loss": 4.0893, "step": 50 }, { "epoch": 0.03534817956875221, "grad_norm": 6.100900173187256, "learning_rate": 4.9657122658183106e-05, "loss": 3.214, "step": 100 }, { "epoch": 0.053022269353128315, "grad_norm": 2.4247324466705322, "learning_rate": 4.948391657829622e-05, "loss": 2.2694, "step": 150 }, { "epoch": 0.07069635913750442, "grad_norm": 2.4390416145324707, "learning_rate": 4.930717568045246e-05, "loss": 1.9621, "step": 200 }, { "epoch": 0.08837044892188052, "grad_norm": 3.003971576690674, "learning_rate": 4.91304347826087e-05, "loss": 1.8377, "step": 250 }, { "epoch": 0.10604453870625663, "grad_norm": 2.6893651485443115, "learning_rate": 4.895369388476494e-05, "loss": 1.7639, "step": 300 }, { "epoch": 0.12371862849063273, "grad_norm": 2.8361988067626953, "learning_rate": 4.8776952986921177e-05, "loss": 1.6632, "step": 350 }, { "epoch": 0.14139271827500885, "grad_norm": 2.17179012298584, "learning_rate": 4.8600212089077416e-05, "loss": 1.5525, "step": 400 }, { "epoch": 0.15906680805938495, "grad_norm": 4.485565185546875, "learning_rate": 4.842700600919053e-05, "loss": 1.4975, "step": 450 }, { "epoch": 0.17674089784376104, "grad_norm": 3.197230577468872, "learning_rate": 4.825026511134676e-05, "loss": 1.4888, "step": 500 }, { "epoch": 0.19441498762813716, "grad_norm": 2.8129756450653076, "learning_rate": 4.807352421350301e-05, "loss": 1.32, "step": 550 }, { "epoch": 0.21208907741251326, "grad_norm": 2.888892650604248, "learning_rate": 4.789678331565924e-05, "loss": 1.4137, "step": 600 }, { "epoch": 0.22976316719688936, "grad_norm": 3.6058623790740967, "learning_rate": 4.7720042417815487e-05, "loss": 1.3793, "step": 650 }, { "epoch": 0.24743725698126545, "grad_norm": 3.077688217163086, "learning_rate": 4.7543301519971726e-05, "loss": 1.2157, "step": 700 }, { "epoch": 0.2651113467656416, "grad_norm": 4.21675443649292, "learning_rate": 4.7366560622127965e-05, "loss": 1.3435, "step": 750 }, { "epoch": 0.2827854365500177, "grad_norm": 3.459958076477051, "learning_rate": 4.7189819724284204e-05, "loss": 1.2747, "step": 800 }, { "epoch": 0.30045952633439377, "grad_norm": 3.2092440128326416, "learning_rate": 4.701307882644044e-05, "loss": 1.1624, "step": 850 }, { "epoch": 0.3181336161187699, "grad_norm": 3.1231963634490967, "learning_rate": 4.683633792859668e-05, "loss": 1.1956, "step": 900 }, { "epoch": 0.335807705903146, "grad_norm": 3.332000970840454, "learning_rate": 4.6659597030752915e-05, "loss": 1.118, "step": 950 }, { "epoch": 0.3534817956875221, "grad_norm": 2.992741823196411, "learning_rate": 4.648285613290916e-05, "loss": 1.1513, "step": 1000 }, { "epoch": 0.3711558854718982, "grad_norm": 2.8758022785186768, "learning_rate": 4.630611523506539e-05, "loss": 1.0998, "step": 1050 }, { "epoch": 0.38882997525627433, "grad_norm": 3.870368480682373, "learning_rate": 4.612937433722164e-05, "loss": 1.0723, "step": 1100 }, { "epoch": 0.4065040650406504, "grad_norm": 4.177937030792236, "learning_rate": 4.595263343937787e-05, "loss": 1.0612, "step": 1150 }, { "epoch": 0.4241781548250265, "grad_norm": 2.760124921798706, "learning_rate": 4.577589254153412e-05, "loss": 1.0086, "step": 1200 }, { "epoch": 0.4418522446094026, "grad_norm": 3.0196070671081543, "learning_rate": 4.559915164369035e-05, "loss": 1.06, "step": 1250 }, { "epoch": 0.4595263343937787, "grad_norm": 2.649152994155884, "learning_rate": 4.542241074584659e-05, "loss": 0.9997, "step": 1300 }, { "epoch": 0.47720042417815484, "grad_norm": 3.8896467685699463, "learning_rate": 4.524566984800283e-05, "loss": 1.0067, "step": 1350 }, { "epoch": 0.4948745139625309, "grad_norm": 3.186890125274658, "learning_rate": 4.506892895015907e-05, "loss": 0.9501, "step": 1400 }, { "epoch": 0.512548603746907, "grad_norm": 3.991669178009033, "learning_rate": 4.489218805231531e-05, "loss": 1.0347, "step": 1450 }, { "epoch": 0.5302226935312832, "grad_norm": 11.242384910583496, "learning_rate": 4.4715447154471546e-05, "loss": 0.9635, "step": 1500 }, { "epoch": 0.5478967833156593, "grad_norm": 2.5245680809020996, "learning_rate": 4.4538706256627785e-05, "loss": 0.9248, "step": 1550 }, { "epoch": 0.5655708731000354, "grad_norm": 4.0713114738464355, "learning_rate": 4.4361965358784024e-05, "loss": 0.906, "step": 1600 }, { "epoch": 0.5832449628844114, "grad_norm": 3.434156656265259, "learning_rate": 4.4185224460940264e-05, "loss": 0.9438, "step": 1650 }, { "epoch": 0.6009190526687875, "grad_norm": 3.6341230869293213, "learning_rate": 4.40084835630965e-05, "loss": 0.8156, "step": 1700 }, { "epoch": 0.6185931424531637, "grad_norm": 4.359820365905762, "learning_rate": 4.383174266525274e-05, "loss": 0.9052, "step": 1750 }, { "epoch": 0.6362672322375398, "grad_norm": 3.804647445678711, "learning_rate": 4.365500176740898e-05, "loss": 0.8758, "step": 1800 }, { "epoch": 0.6539413220219159, "grad_norm": 21.193464279174805, "learning_rate": 4.347826086956522e-05, "loss": 0.8776, "step": 1850 }, { "epoch": 0.671615411806292, "grad_norm": 3.002357244491577, "learning_rate": 4.330151997172146e-05, "loss": 0.8658, "step": 1900 }, { "epoch": 0.689289501590668, "grad_norm": 4.116176605224609, "learning_rate": 4.31247790738777e-05, "loss": 0.8419, "step": 1950 }, { "epoch": 0.7069635913750442, "grad_norm": 6.561131954193115, "learning_rate": 4.294803817603394e-05, "loss": 0.8204, "step": 2000 }, { "epoch": 0.7246376811594203, "grad_norm": 3.203460931777954, "learning_rate": 4.277129727819018e-05, "loss": 0.7557, "step": 2050 }, { "epoch": 0.7423117709437964, "grad_norm": 4.0467705726623535, "learning_rate": 4.259455638034641e-05, "loss": 0.8053, "step": 2100 }, { "epoch": 0.7599858607281725, "grad_norm": 3.575634002685547, "learning_rate": 4.242135030045953e-05, "loss": 0.8128, "step": 2150 }, { "epoch": 0.7776599505125487, "grad_norm": 5.7353363037109375, "learning_rate": 4.224460940261576e-05, "loss": 0.8339, "step": 2200 }, { "epoch": 0.7953340402969247, "grad_norm": 4.916664123535156, "learning_rate": 4.206786850477201e-05, "loss": 0.7758, "step": 2250 }, { "epoch": 0.8130081300813008, "grad_norm": 4.233948230743408, "learning_rate": 4.189112760692824e-05, "loss": 0.7485, "step": 2300 }, { "epoch": 0.8306822198656769, "grad_norm": 3.754826545715332, "learning_rate": 4.171438670908449e-05, "loss": 0.6988, "step": 2350 }, { "epoch": 0.848356309650053, "grad_norm": 3.185098171234131, "learning_rate": 4.153764581124072e-05, "loss": 0.7067, "step": 2400 }, { "epoch": 0.8660303994344292, "grad_norm": 3.515683650970459, "learning_rate": 4.1360904913396966e-05, "loss": 0.7559, "step": 2450 }, { "epoch": 0.8837044892188052, "grad_norm": 4.783038139343262, "learning_rate": 4.11841640155532e-05, "loss": 0.7444, "step": 2500 }, { "epoch": 0.9013785790031813, "grad_norm": 3.4242937564849854, "learning_rate": 4.1007423117709444e-05, "loss": 0.7676, "step": 2550 }, { "epoch": 0.9190526687875574, "grad_norm": 3.3563663959503174, "learning_rate": 4.0830682219865676e-05, "loss": 0.7416, "step": 2600 }, { "epoch": 0.9367267585719335, "grad_norm": 21.883926391601562, "learning_rate": 4.0653941322021916e-05, "loss": 0.6892, "step": 2650 }, { "epoch": 0.9544008483563097, "grad_norm": 3.8259048461914062, "learning_rate": 4.0477200424178155e-05, "loss": 0.7489, "step": 2700 }, { "epoch": 0.9720749381406858, "grad_norm": 3.026655912399292, "learning_rate": 4.0300459526334394e-05, "loss": 0.6679, "step": 2750 }, { "epoch": 0.9897490279250618, "grad_norm": 7.62285041809082, "learning_rate": 4.012371862849063e-05, "loss": 0.7393, "step": 2800 }, { "epoch": 1.0, "eval_bertscore_f1": 0.9551081777928342, "eval_bleu": 0.47406093922979725, "eval_loss": 0.5141507983207703, "eval_meteor": 0.6443492142009581, "eval_rouge1": 0.7935683439864762, "eval_rouge2": 0.6796198647957756, "eval_runtime": 1335.9702, "eval_samples_per_second": 4.838, "eval_steps_per_second": 0.605, "step": 2829 }, { "epoch": 1.007423117709438, "grad_norm": 3.7401936054229736, "learning_rate": 3.994697773064687e-05, "loss": 0.7272, "step": 2850 }, { "epoch": 1.025097207493814, "grad_norm": 4.575202941894531, "learning_rate": 3.977023683280312e-05, "loss": 0.6891, "step": 2900 }, { "epoch": 1.0427712972781902, "grad_norm": 2.909268379211426, "learning_rate": 3.959349593495935e-05, "loss": 0.6751, "step": 2950 }, { "epoch": 1.0604453870625663, "grad_norm": 5.258713722229004, "learning_rate": 3.941675503711559e-05, "loss": 0.7308, "step": 3000 }, { "epoch": 1.0781194768469424, "grad_norm": 4.8982462882995605, "learning_rate": 3.924001413927183e-05, "loss": 0.5938, "step": 3050 }, { "epoch": 1.0957935666313185, "grad_norm": 3.7401649951934814, "learning_rate": 3.906327324142807e-05, "loss": 0.7358, "step": 3100 }, { "epoch": 1.1134676564156947, "grad_norm": 2.2274134159088135, "learning_rate": 3.888653234358431e-05, "loss": 0.6251, "step": 3150 }, { "epoch": 1.1311417462000706, "grad_norm": 4.285720348358154, "learning_rate": 3.870979144574055e-05, "loss": 0.6773, "step": 3200 }, { "epoch": 1.148815835984447, "grad_norm": 3.1202948093414307, "learning_rate": 3.8533050547896786e-05, "loss": 0.6485, "step": 3250 }, { "epoch": 1.1664899257688228, "grad_norm": 2.96162486076355, "learning_rate": 3.8356309650053025e-05, "loss": 0.6733, "step": 3300 }, { "epoch": 1.184164015553199, "grad_norm": 6.456724166870117, "learning_rate": 3.817956875220926e-05, "loss": 0.6142, "step": 3350 }, { "epoch": 1.201838105337575, "grad_norm": 5.0712690353393555, "learning_rate": 3.8002827854365503e-05, "loss": 0.6952, "step": 3400 }, { "epoch": 1.2195121951219512, "grad_norm": 5.074472904205322, "learning_rate": 3.7826086956521736e-05, "loss": 0.6147, "step": 3450 }, { "epoch": 1.2371862849063273, "grad_norm": 4.572699546813965, "learning_rate": 3.764934605867798e-05, "loss": 0.6172, "step": 3500 }, { "epoch": 1.2548603746907034, "grad_norm": 3.24722957611084, "learning_rate": 3.747260516083422e-05, "loss": 0.6657, "step": 3550 }, { "epoch": 1.2725344644750796, "grad_norm": 3.6657183170318604, "learning_rate": 3.729586426299046e-05, "loss": 0.6999, "step": 3600 }, { "epoch": 1.2902085542594557, "grad_norm": 3.2770209312438965, "learning_rate": 3.71191233651467e-05, "loss": 0.6882, "step": 3650 }, { "epoch": 1.3078826440438318, "grad_norm": 4.611114501953125, "learning_rate": 3.694238246730294e-05, "loss": 0.6767, "step": 3700 }, { "epoch": 1.3255567338282077, "grad_norm": 3.4801883697509766, "learning_rate": 3.676564156945918e-05, "loss": 0.6503, "step": 3750 }, { "epoch": 1.343230823612584, "grad_norm": 4.582475185394287, "learning_rate": 3.658890067161541e-05, "loss": 0.5833, "step": 3800 }, { "epoch": 1.36090491339696, "grad_norm": 3.0982961654663086, "learning_rate": 3.6412159773771656e-05, "loss": 0.6271, "step": 3850 }, { "epoch": 1.378579003181336, "grad_norm": 3.592360734939575, "learning_rate": 3.623541887592789e-05, "loss": 0.6688, "step": 3900 }, { "epoch": 1.3962530929657122, "grad_norm": 4.296905994415283, "learning_rate": 3.6058677978084134e-05, "loss": 0.5931, "step": 3950 }, { "epoch": 1.4139271827500883, "grad_norm": 3.616574764251709, "learning_rate": 3.588193708024037e-05, "loss": 0.6297, "step": 4000 }, { "epoch": 1.4316012725344645, "grad_norm": 3.1819770336151123, "learning_rate": 3.570519618239661e-05, "loss": 0.5801, "step": 4050 }, { "epoch": 1.4492753623188406, "grad_norm": 3.5812184810638428, "learning_rate": 3.5528455284552845e-05, "loss": 0.5826, "step": 4100 }, { "epoch": 1.4669494521032167, "grad_norm": 2.889911651611328, "learning_rate": 3.5351714386709084e-05, "loss": 0.5396, "step": 4150 }, { "epoch": 1.4846235418875928, "grad_norm": 3.532849073410034, "learning_rate": 3.5174973488865324e-05, "loss": 0.5218, "step": 4200 }, { "epoch": 1.502297631671969, "grad_norm": 2.939161777496338, "learning_rate": 3.499823259102156e-05, "loss": 0.5701, "step": 4250 }, { "epoch": 1.5199717214563448, "grad_norm": 3.500262975692749, "learning_rate": 3.48214916931778e-05, "loss": 0.5117, "step": 4300 }, { "epoch": 1.5376458112407212, "grad_norm": 3.612431526184082, "learning_rate": 3.464475079533404e-05, "loss": 0.5067, "step": 4350 }, { "epoch": 1.555319901025097, "grad_norm": 3.3735318183898926, "learning_rate": 3.446800989749028e-05, "loss": 0.5403, "step": 4400 }, { "epoch": 1.5729939908094734, "grad_norm": 28.255231857299805, "learning_rate": 3.429126899964652e-05, "loss": 0.5442, "step": 4450 }, { "epoch": 1.5906680805938493, "grad_norm": 4.424487113952637, "learning_rate": 3.411452810180276e-05, "loss": 0.5769, "step": 4500 }, { "epoch": 1.6083421703782255, "grad_norm": 4.6517109870910645, "learning_rate": 3.3937787203959e-05, "loss": 0.5291, "step": 4550 }, { "epoch": 1.6260162601626016, "grad_norm": 4.276078701019287, "learning_rate": 3.376104630611524e-05, "loss": 0.6207, "step": 4600 }, { "epoch": 1.6436903499469777, "grad_norm": 3.1325790882110596, "learning_rate": 3.3584305408271476e-05, "loss": 0.5807, "step": 4650 }, { "epoch": 1.6613644397313538, "grad_norm": 3.2780227661132812, "learning_rate": 3.3407564510427716e-05, "loss": 0.5487, "step": 4700 }, { "epoch": 1.67903852951573, "grad_norm": 3.9542007446289062, "learning_rate": 3.3230823612583955e-05, "loss": 0.6385, "step": 4750 }, { "epoch": 1.696712619300106, "grad_norm": 4.091352462768555, "learning_rate": 3.3054082714740194e-05, "loss": 0.5845, "step": 4800 }, { "epoch": 1.714386709084482, "grad_norm": 2.3576905727386475, "learning_rate": 3.2877341816896426e-05, "loss": 0.4949, "step": 4850 }, { "epoch": 1.7320607988688583, "grad_norm": 3.200242519378662, "learning_rate": 3.270060091905267e-05, "loss": 0.5922, "step": 4900 }, { "epoch": 1.7497348886532342, "grad_norm": 3.1346006393432617, "learning_rate": 3.2523860021208905e-05, "loss": 0.5259, "step": 4950 }, { "epoch": 1.7674089784376106, "grad_norm": 3.4066524505615234, "learning_rate": 3.234711912336515e-05, "loss": 0.562, "step": 5000 }, { "epoch": 1.7850830682219865, "grad_norm": 5.18930196762085, "learning_rate": 3.217037822552138e-05, "loss": 0.5825, "step": 5050 }, { "epoch": 1.8027571580063628, "grad_norm": 4.159862995147705, "learning_rate": 3.199363732767763e-05, "loss": 0.5616, "step": 5100 }, { "epoch": 1.8204312477907387, "grad_norm": 4.439573287963867, "learning_rate": 3.181689642983386e-05, "loss": 0.5334, "step": 5150 }, { "epoch": 1.8381053375751149, "grad_norm": 6.196533203125, "learning_rate": 3.164015553199011e-05, "loss": 0.5887, "step": 5200 }, { "epoch": 1.855779427359491, "grad_norm": 3.715372323989868, "learning_rate": 3.146341463414634e-05, "loss": 0.5379, "step": 5250 }, { "epoch": 1.873453517143867, "grad_norm": 4.34264612197876, "learning_rate": 3.128667373630258e-05, "loss": 0.4827, "step": 5300 }, { "epoch": 1.8911276069282432, "grad_norm": 2.337557315826416, "learning_rate": 3.1109932838458825e-05, "loss": 0.4685, "step": 5350 }, { "epoch": 1.9088016967126193, "grad_norm": 3.325277805328369, "learning_rate": 3.093319194061506e-05, "loss": 0.4983, "step": 5400 }, { "epoch": 1.9264757864969955, "grad_norm": 2.976592540740967, "learning_rate": 3.0756451042771303e-05, "loss": 0.5814, "step": 5450 }, { "epoch": 1.9441498762813714, "grad_norm": 9.608305931091309, "learning_rate": 3.0579710144927536e-05, "loss": 0.5062, "step": 5500 }, { "epoch": 1.9618239660657477, "grad_norm": 3.443791151046753, "learning_rate": 3.040296924708378e-05, "loss": 0.5092, "step": 5550 }, { "epoch": 1.9794980558501236, "grad_norm": 3.4817845821380615, "learning_rate": 3.0226228349240014e-05, "loss": 0.5584, "step": 5600 }, { "epoch": 1.9971721456345, "grad_norm": 3.2107975482940674, "learning_rate": 3.0049487451396253e-05, "loss": 0.506, "step": 5650 }, { "epoch": 2.0, "eval_bertscore_f1": 0.9621683897930059, "eval_bleu": 0.5494076455991572, "eval_loss": 0.37199869751930237, "eval_meteor": 0.7077566730507359, "eval_rouge1": 0.8237541199852757, "eval_rouge2": 0.7474098813874757, "eval_runtime": 1281.6235, "eval_samples_per_second": 5.044, "eval_steps_per_second": 0.63, "step": 5658 }, { "epoch": 2.014846235418876, "grad_norm": 3.7251229286193848, "learning_rate": 2.9872746553552493e-05, "loss": 0.4928, "step": 5700 }, { "epoch": 2.032520325203252, "grad_norm": 3.801664113998413, "learning_rate": 2.9696005655708732e-05, "loss": 0.5748, "step": 5750 }, { "epoch": 2.050194414987628, "grad_norm": 5.817806243896484, "learning_rate": 2.9519264757864974e-05, "loss": 0.4844, "step": 5800 }, { "epoch": 2.0678685047720045, "grad_norm": 3.028961658477783, "learning_rate": 2.934252386002121e-05, "loss": 0.4626, "step": 5850 }, { "epoch": 2.0855425945563804, "grad_norm": 3.974060297012329, "learning_rate": 2.9165782962177453e-05, "loss": 0.5274, "step": 5900 }, { "epoch": 2.1032166843407563, "grad_norm": 2.532444953918457, "learning_rate": 2.898904206433369e-05, "loss": 0.4887, "step": 5950 }, { "epoch": 2.1208907741251326, "grad_norm": 2.0569326877593994, "learning_rate": 2.8812301166489924e-05, "loss": 0.4353, "step": 6000 }, { "epoch": 2.1385648639095085, "grad_norm": 3.0496156215667725, "learning_rate": 2.8635560268646167e-05, "loss": 0.4347, "step": 6050 }, { "epoch": 2.156238953693885, "grad_norm": 2.635395050048828, "learning_rate": 2.8458819370802403e-05, "loss": 0.5406, "step": 6100 }, { "epoch": 2.1739130434782608, "grad_norm": 4.091008186340332, "learning_rate": 2.8282078472958645e-05, "loss": 0.4885, "step": 6150 }, { "epoch": 2.191587133262637, "grad_norm": 3.228792905807495, "learning_rate": 2.810533757511488e-05, "loss": 0.5072, "step": 6200 }, { "epoch": 2.209261223047013, "grad_norm": 2.479149341583252, "learning_rate": 2.7928596677271124e-05, "loss": 0.5475, "step": 6250 }, { "epoch": 2.2269353128313893, "grad_norm": 4.617306709289551, "learning_rate": 2.775185577942736e-05, "loss": 0.5468, "step": 6300 }, { "epoch": 2.2446094026157652, "grad_norm": 4.416631698608398, "learning_rate": 2.7575114881583602e-05, "loss": 0.5125, "step": 6350 }, { "epoch": 2.262283492400141, "grad_norm": 3.7900924682617188, "learning_rate": 2.7398373983739838e-05, "loss": 0.5824, "step": 6400 }, { "epoch": 2.2799575821845175, "grad_norm": 3.695364236831665, "learning_rate": 2.7221633085896077e-05, "loss": 0.4806, "step": 6450 }, { "epoch": 2.297631671968894, "grad_norm": 2.609520196914673, "learning_rate": 2.7044892188052316e-05, "loss": 0.4537, "step": 6500 }, { "epoch": 2.3153057617532697, "grad_norm": 4.006641864776611, "learning_rate": 2.6868151290208555e-05, "loss": 0.5172, "step": 6550 }, { "epoch": 2.3329798515376456, "grad_norm": 3.581960439682007, "learning_rate": 2.6691410392364795e-05, "loss": 0.5089, "step": 6600 }, { "epoch": 2.350653941322022, "grad_norm": 2.6414718627929688, "learning_rate": 2.6514669494521034e-05, "loss": 0.4936, "step": 6650 }, { "epoch": 2.368328031106398, "grad_norm": 3.3889434337615967, "learning_rate": 2.6337928596677276e-05, "loss": 0.535, "step": 6700 }, { "epoch": 2.3860021208907742, "grad_norm": 4.371047496795654, "learning_rate": 2.6161187698833512e-05, "loss": 0.4651, "step": 6750 }, { "epoch": 2.40367621067515, "grad_norm": 4.057021617889404, "learning_rate": 2.5984446800989748e-05, "loss": 0.4369, "step": 6800 }, { "epoch": 2.4213503004595265, "grad_norm": 4.6812615394592285, "learning_rate": 2.580770590314599e-05, "loss": 0.5067, "step": 6850 }, { "epoch": 2.4390243902439024, "grad_norm": 6.067279815673828, "learning_rate": 2.5630965005302226e-05, "loss": 0.4901, "step": 6900 }, { "epoch": 2.4566984800282787, "grad_norm": 3.8635661602020264, "learning_rate": 2.545422410745847e-05, "loss": 0.415, "step": 6950 }, { "epoch": 2.4743725698126546, "grad_norm": 1.7011466026306152, "learning_rate": 2.5277483209614705e-05, "loss": 0.4893, "step": 7000 }, { "epoch": 2.4920466595970305, "grad_norm": 3.8497934341430664, "learning_rate": 2.5100742311770947e-05, "loss": 0.4504, "step": 7050 }, { "epoch": 2.509720749381407, "grad_norm": 3.670374631881714, "learning_rate": 2.4924001413927183e-05, "loss": 0.4883, "step": 7100 }, { "epoch": 2.5273948391657832, "grad_norm": 3.130357503890991, "learning_rate": 2.4747260516083422e-05, "loss": 0.5031, "step": 7150 }, { "epoch": 2.545068928950159, "grad_norm": 3.647500514984131, "learning_rate": 2.457051961823966e-05, "loss": 0.4368, "step": 7200 }, { "epoch": 2.562743018734535, "grad_norm": 3.6657369136810303, "learning_rate": 2.43937787203959e-05, "loss": 0.4686, "step": 7250 }, { "epoch": 2.5804171085189114, "grad_norm": 5.371551036834717, "learning_rate": 2.421703782255214e-05, "loss": 0.4433, "step": 7300 }, { "epoch": 2.5980911983032873, "grad_norm": 3.593418598175049, "learning_rate": 2.404029692470838e-05, "loss": 0.4901, "step": 7350 }, { "epoch": 2.6157652880876636, "grad_norm": 3.1181206703186035, "learning_rate": 2.3863556026864618e-05, "loss": 0.4834, "step": 7400 }, { "epoch": 2.6334393778720395, "grad_norm": 4.218138217926025, "learning_rate": 2.3686815129020857e-05, "loss": 0.5013, "step": 7450 }, { "epoch": 2.6511134676564154, "grad_norm": 3.5063066482543945, "learning_rate": 2.3510074231177097e-05, "loss": 0.4705, "step": 7500 }, { "epoch": 2.6687875574407918, "grad_norm": 2.8965365886688232, "learning_rate": 2.3333333333333336e-05, "loss": 0.464, "step": 7550 }, { "epoch": 2.686461647225168, "grad_norm": 2.336358070373535, "learning_rate": 2.3156592435489575e-05, "loss": 0.4591, "step": 7600 }, { "epoch": 2.704135737009544, "grad_norm": 3.5483410358428955, "learning_rate": 2.2979851537645814e-05, "loss": 0.4419, "step": 7650 }, { "epoch": 2.72180982679392, "grad_norm": 4.550882816314697, "learning_rate": 2.280311063980205e-05, "loss": 0.4181, "step": 7700 }, { "epoch": 2.7394839165782963, "grad_norm": 4.471234321594238, "learning_rate": 2.262636974195829e-05, "loss": 0.4558, "step": 7750 }, { "epoch": 2.757158006362672, "grad_norm": 3.0595200061798096, "learning_rate": 2.244962884411453e-05, "loss": 0.4188, "step": 7800 }, { "epoch": 2.7748320961470485, "grad_norm": 7.5111403465271, "learning_rate": 2.2272887946270768e-05, "loss": 0.4834, "step": 7850 }, { "epoch": 2.7925061859314244, "grad_norm": 2.2414655685424805, "learning_rate": 2.2096147048427007e-05, "loss": 0.442, "step": 7900 }, { "epoch": 2.8101802757158008, "grad_norm": 4.036431789398193, "learning_rate": 2.1919406150583246e-05, "loss": 0.4254, "step": 7950 }, { "epoch": 2.8278543655001767, "grad_norm": 3.3172266483306885, "learning_rate": 2.1742665252739485e-05, "loss": 0.4852, "step": 8000 }, { "epoch": 2.845528455284553, "grad_norm": 4.143049240112305, "learning_rate": 2.1565924354895724e-05, "loss": 0.4858, "step": 8050 }, { "epoch": 2.863202545068929, "grad_norm": 5.017402172088623, "learning_rate": 2.138918345705196e-05, "loss": 0.3824, "step": 8100 }, { "epoch": 2.880876634853305, "grad_norm": 2.974952459335327, "learning_rate": 2.1212442559208203e-05, "loss": 0.4777, "step": 8150 }, { "epoch": 2.898550724637681, "grad_norm": 7.074586868286133, "learning_rate": 2.1035701661364442e-05, "loss": 0.4465, "step": 8200 }, { "epoch": 2.9162248144220575, "grad_norm": 3.585792064666748, "learning_rate": 2.085896076352068e-05, "loss": 0.4307, "step": 8250 }, { "epoch": 2.9338989042064334, "grad_norm": 1.6561566591262817, "learning_rate": 2.068221986567692e-05, "loss": 0.3917, "step": 8300 }, { "epoch": 2.9515729939908093, "grad_norm": 4.920962810516357, "learning_rate": 2.050547896783316e-05, "loss": 0.4334, "step": 8350 }, { "epoch": 2.9692470837751856, "grad_norm": 2.6819636821746826, "learning_rate": 2.03287380699894e-05, "loss": 0.4679, "step": 8400 }, { "epoch": 2.9869211735595615, "grad_norm": 3.442260265350342, "learning_rate": 2.0151997172145634e-05, "loss": 0.4466, "step": 8450 }, { "epoch": 3.0, "eval_bertscore_f1": 0.9646675708510055, "eval_bleu": 0.5742982540038749, "eval_loss": 0.3243306279182434, "eval_meteor": 0.7281699575301964, "eval_rouge1": 0.8328916554556949, "eval_rouge2": 0.7666932565109175, "eval_runtime": 1288.8847, "eval_samples_per_second": 5.015, "eval_steps_per_second": 0.627, "step": 8487 }, { "epoch": 3.004595263343938, "grad_norm": 3.7356512546539307, "learning_rate": 1.9975256274301874e-05, "loss": 0.394, "step": 8500 }, { "epoch": 3.022269353128314, "grad_norm": 3.7725515365600586, "learning_rate": 1.9798515376458113e-05, "loss": 0.4484, "step": 8550 }, { "epoch": 3.03994344291269, "grad_norm": 2.475839138031006, "learning_rate": 1.9621774478614352e-05, "loss": 0.4463, "step": 8600 }, { "epoch": 3.057617532697066, "grad_norm": 2.853266716003418, "learning_rate": 1.944503358077059e-05, "loss": 0.4398, "step": 8650 }, { "epoch": 3.0752916224814424, "grad_norm": 2.7079474925994873, "learning_rate": 1.926829268292683e-05, "loss": 0.4021, "step": 8700 }, { "epoch": 3.0929657122658183, "grad_norm": 5.04539680480957, "learning_rate": 1.909155178508307e-05, "loss": 0.3996, "step": 8750 }, { "epoch": 3.110639802050194, "grad_norm": 4.626221656799316, "learning_rate": 1.8918345705196184e-05, "loss": 0.4948, "step": 8800 }, { "epoch": 3.1283138918345705, "grad_norm": 4.644408226013184, "learning_rate": 1.8741604807352423e-05, "loss": 0.4156, "step": 8850 }, { "epoch": 3.1459879816189464, "grad_norm": 4.299105167388916, "learning_rate": 1.8564863909508662e-05, "loss": 0.3977, "step": 8900 }, { "epoch": 3.163662071403323, "grad_norm": 4.650149345397949, "learning_rate": 1.83881230116649e-05, "loss": 0.4229, "step": 8950 }, { "epoch": 3.1813361611876987, "grad_norm": 2.89013409614563, "learning_rate": 1.821138211382114e-05, "loss": 0.4506, "step": 9000 }, { "epoch": 3.199010250972075, "grad_norm": 2.281370162963867, "learning_rate": 1.8034641215977376e-05, "loss": 0.4288, "step": 9050 }, { "epoch": 3.216684340756451, "grad_norm": 4.948707103729248, "learning_rate": 1.7857900318133615e-05, "loss": 0.4633, "step": 9100 }, { "epoch": 3.2343584305408273, "grad_norm": 3.5856571197509766, "learning_rate": 1.7681159420289855e-05, "loss": 0.3965, "step": 9150 }, { "epoch": 3.252032520325203, "grad_norm": 3.416271686553955, "learning_rate": 1.7504418522446094e-05, "loss": 0.4904, "step": 9200 }, { "epoch": 3.2697066101095795, "grad_norm": 3.599717617034912, "learning_rate": 1.7327677624602333e-05, "loss": 0.4648, "step": 9250 }, { "epoch": 3.2873806998939554, "grad_norm": 2.8439853191375732, "learning_rate": 1.7150936726758572e-05, "loss": 0.3734, "step": 9300 }, { "epoch": 3.3050547896783318, "grad_norm": 4.0927863121032715, "learning_rate": 1.697419582891481e-05, "loss": 0.3913, "step": 9350 }, { "epoch": 3.3227288794627077, "grad_norm": 4.16766881942749, "learning_rate": 1.679745493107105e-05, "loss": 0.4303, "step": 9400 }, { "epoch": 3.3404029692470836, "grad_norm": 3.417738199234009, "learning_rate": 1.662071403322729e-05, "loss": 0.4068, "step": 9450 }, { "epoch": 3.35807705903146, "grad_norm": 4.66575813293457, "learning_rate": 1.644397313538353e-05, "loss": 0.5116, "step": 9500 }, { "epoch": 3.375751148815836, "grad_norm": 6.112340927124023, "learning_rate": 1.6267232237539768e-05, "loss": 0.4244, "step": 9550 }, { "epoch": 3.393425238600212, "grad_norm": 3.322610378265381, "learning_rate": 1.6090491339696007e-05, "loss": 0.4252, "step": 9600 }, { "epoch": 3.411099328384588, "grad_norm": 4.941850185394287, "learning_rate": 1.5913750441852247e-05, "loss": 0.402, "step": 9650 }, { "epoch": 3.4287734181689644, "grad_norm": 2.177600860595703, "learning_rate": 1.5737009544008486e-05, "loss": 0.3437, "step": 9700 }, { "epoch": 3.4464475079533403, "grad_norm": 2.1570093631744385, "learning_rate": 1.5560268646164725e-05, "loss": 0.4871, "step": 9750 }, { "epoch": 3.4641215977377167, "grad_norm": 7.6717305183410645, "learning_rate": 1.538352774832096e-05, "loss": 0.4224, "step": 9800 }, { "epoch": 3.4817956875220926, "grad_norm": 3.082805871963501, "learning_rate": 1.52067868504772e-05, "loss": 0.4474, "step": 9850 }, { "epoch": 3.499469777306469, "grad_norm": 2.8141167163848877, "learning_rate": 1.5033580770590316e-05, "loss": 0.4476, "step": 9900 }, { "epoch": 3.517143867090845, "grad_norm": 3.179436206817627, "learning_rate": 1.4856839872746553e-05, "loss": 0.3936, "step": 9950 }, { "epoch": 3.534817956875221, "grad_norm": 3.908020257949829, "learning_rate": 1.4680098974902792e-05, "loss": 0.4164, "step": 10000 }, { "epoch": 3.552492046659597, "grad_norm": 4.998553276062012, "learning_rate": 1.4503358077059032e-05, "loss": 0.4534, "step": 10050 }, { "epoch": 3.570166136443973, "grad_norm": 4.064126014709473, "learning_rate": 1.432661717921527e-05, "loss": 0.458, "step": 10100 }, { "epoch": 3.5878402262283493, "grad_norm": 2.2527036666870117, "learning_rate": 1.414987628137151e-05, "loss": 0.4285, "step": 10150 }, { "epoch": 3.605514316012725, "grad_norm": 3.3799755573272705, "learning_rate": 1.397313538352775e-05, "loss": 0.5488, "step": 10200 }, { "epoch": 3.6231884057971016, "grad_norm": 3.4317479133605957, "learning_rate": 1.3796394485683988e-05, "loss": 0.4326, "step": 10250 }, { "epoch": 3.6408624955814775, "grad_norm": 2.245337724685669, "learning_rate": 1.3619653587840228e-05, "loss": 0.4279, "step": 10300 }, { "epoch": 3.658536585365854, "grad_norm": 2.9092109203338623, "learning_rate": 1.3442912689996465e-05, "loss": 0.4116, "step": 10350 }, { "epoch": 3.6762106751502297, "grad_norm": 2.79837965965271, "learning_rate": 1.3266171792152704e-05, "loss": 0.4506, "step": 10400 }, { "epoch": 3.693884764934606, "grad_norm": 1.416994333267212, "learning_rate": 1.3089430894308943e-05, "loss": 0.3532, "step": 10450 }, { "epoch": 3.711558854718982, "grad_norm": 4.927233695983887, "learning_rate": 1.2912689996465183e-05, "loss": 0.4302, "step": 10500 }, { "epoch": 3.729232944503358, "grad_norm": 2.069500684738159, "learning_rate": 1.2735949098621422e-05, "loss": 0.4608, "step": 10550 }, { "epoch": 3.746907034287734, "grad_norm": 3.3507018089294434, "learning_rate": 1.2559208200777661e-05, "loss": 0.4024, "step": 10600 }, { "epoch": 3.7645811240721105, "grad_norm": 2.64599871635437, "learning_rate": 1.2382467302933899e-05, "loss": 0.3817, "step": 10650 }, { "epoch": 3.7822552138564864, "grad_norm": 2.3984270095825195, "learning_rate": 1.2205726405090138e-05, "loss": 0.4399, "step": 10700 }, { "epoch": 3.7999293036408623, "grad_norm": 5.132211685180664, "learning_rate": 1.2028985507246379e-05, "loss": 0.4537, "step": 10750 }, { "epoch": 3.8176033934252387, "grad_norm": 3.9488821029663086, "learning_rate": 1.1852244609402616e-05, "loss": 0.4433, "step": 10800 }, { "epoch": 3.8352774832096146, "grad_norm": 4.978783130645752, "learning_rate": 1.1675503711558855e-05, "loss": 0.3722, "step": 10850 }, { "epoch": 3.852951572993991, "grad_norm": 2.1942172050476074, "learning_rate": 1.1498762813715094e-05, "loss": 0.3641, "step": 10900 }, { "epoch": 3.870625662778367, "grad_norm": 1.962399959564209, "learning_rate": 1.1322021915871334e-05, "loss": 0.3966, "step": 10950 }, { "epoch": 3.888299752562743, "grad_norm": 2.3611438274383545, "learning_rate": 1.1145281018027571e-05, "loss": 0.3872, "step": 11000 }, { "epoch": 3.905973842347119, "grad_norm": 2.8562467098236084, "learning_rate": 1.096854012018381e-05, "loss": 0.3823, "step": 11050 }, { "epoch": 3.9236479321314954, "grad_norm": 3.315880060195923, "learning_rate": 1.079179922234005e-05, "loss": 0.4215, "step": 11100 }, { "epoch": 3.9413220219158713, "grad_norm": 4.15437650680542, "learning_rate": 1.0615058324496289e-05, "loss": 0.4126, "step": 11150 }, { "epoch": 3.9589961117002472, "grad_norm": 3.9605205059051514, "learning_rate": 1.0438317426652528e-05, "loss": 0.3773, "step": 11200 }, { "epoch": 3.9766702014846236, "grad_norm": 3.106764793395996, "learning_rate": 1.0261576528808767e-05, "loss": 0.4297, "step": 11250 }, { "epoch": 3.9943442912689995, "grad_norm": 3.4298675060272217, "learning_rate": 1.0084835630965006e-05, "loss": 0.4305, "step": 11300 }, { "epoch": 4.0, "eval_bertscore_f1": 0.9656413255425373, "eval_bleu": 0.5848426882684508, "eval_loss": 0.3005247414112091, "eval_meteor": 0.73697495147188, "eval_rouge1": 0.8370075787215339, "eval_rouge2": 0.7752220988783712, "eval_runtime": 1268.4642, "eval_samples_per_second": 5.096, "eval_steps_per_second": 0.637, "step": 11316 }, { "epoch": 4.012018381053376, "grad_norm": 4.263380527496338, "learning_rate": 9.908094733121245e-06, "loss": 0.4285, "step": 11350 }, { "epoch": 4.029692470837752, "grad_norm": 14.104089736938477, "learning_rate": 9.731353835277483e-06, "loss": 0.3837, "step": 11400 }, { "epoch": 4.047366560622128, "grad_norm": 2.5981857776641846, "learning_rate": 9.554612937433722e-06, "loss": 0.3773, "step": 11450 }, { "epoch": 4.065040650406504, "grad_norm": 4.44357967376709, "learning_rate": 9.377872039589961e-06, "loss": 0.4325, "step": 11500 }, { "epoch": 4.08271474019088, "grad_norm": 3.7187113761901855, "learning_rate": 9.2011311417462e-06, "loss": 0.427, "step": 11550 }, { "epoch": 4.100388829975256, "grad_norm": 2.364908218383789, "learning_rate": 9.02439024390244e-06, "loss": 0.3617, "step": 11600 }, { "epoch": 4.118062919759632, "grad_norm": 2.663651704788208, "learning_rate": 8.847649346058679e-06, "loss": 0.4174, "step": 11650 }, { "epoch": 4.135737009544009, "grad_norm": 3.6699295043945312, "learning_rate": 8.670908448214918e-06, "loss": 0.4183, "step": 11700 }, { "epoch": 4.153411099328385, "grad_norm": 4.236429214477539, "learning_rate": 8.494167550371156e-06, "loss": 0.4074, "step": 11750 }, { "epoch": 4.171085189112761, "grad_norm": 4.3517632484436035, "learning_rate": 8.317426652527395e-06, "loss": 0.3905, "step": 11800 }, { "epoch": 4.188759278897137, "grad_norm": 2.440966844558716, "learning_rate": 8.140685754683634e-06, "loss": 0.408, "step": 11850 }, { "epoch": 4.2064333686815125, "grad_norm": 3.0445733070373535, "learning_rate": 7.963944856839873e-06, "loss": 0.3646, "step": 11900 }, { "epoch": 4.224107458465889, "grad_norm": 3.174678325653076, "learning_rate": 7.787203958996112e-06, "loss": 0.4027, "step": 11950 }, { "epoch": 4.241781548250265, "grad_norm": 4.445051193237305, "learning_rate": 7.610463061152351e-06, "loss": 0.4111, "step": 12000 }, { "epoch": 4.259455638034641, "grad_norm": 3.7955079078674316, "learning_rate": 7.43372216330859e-06, "loss": 0.3815, "step": 12050 }, { "epoch": 4.277129727819017, "grad_norm": 3.0276503562927246, "learning_rate": 7.256981265464829e-06, "loss": 0.3765, "step": 12100 }, { "epoch": 4.294803817603394, "grad_norm": 1.8871873617172241, "learning_rate": 7.080240367621067e-06, "loss": 0.3771, "step": 12150 }, { "epoch": 4.31247790738777, "grad_norm": 9.927197456359863, "learning_rate": 6.903499469777307e-06, "loss": 0.4112, "step": 12200 }, { "epoch": 4.330151997172146, "grad_norm": 4.721640586853027, "learning_rate": 6.726758571933546e-06, "loss": 0.4451, "step": 12250 }, { "epoch": 4.3478260869565215, "grad_norm": 2.7340986728668213, "learning_rate": 6.550017674089785e-06, "loss": 0.4254, "step": 12300 }, { "epoch": 4.365500176740898, "grad_norm": 3.780824661254883, "learning_rate": 6.373276776246023e-06, "loss": 0.4557, "step": 12350 }, { "epoch": 4.383174266525274, "grad_norm": 3.429931640625, "learning_rate": 6.1965358784022625e-06, "loss": 0.3858, "step": 12400 }, { "epoch": 4.40084835630965, "grad_norm": 3.944438934326172, "learning_rate": 6.019794980558501e-06, "loss": 0.3569, "step": 12450 }, { "epoch": 4.418522446094026, "grad_norm": 2.19978666305542, "learning_rate": 5.843054082714741e-06, "loss": 0.4232, "step": 12500 }, { "epoch": 4.436196535878402, "grad_norm": 1.6702100038528442, "learning_rate": 5.666313184870979e-06, "loss": 0.4216, "step": 12550 }, { "epoch": 4.453870625662779, "grad_norm": 5.39310884475708, "learning_rate": 5.4895722870272184e-06, "loss": 0.393, "step": 12600 }, { "epoch": 4.471544715447155, "grad_norm": 2.8727235794067383, "learning_rate": 5.312831389183457e-06, "loss": 0.4104, "step": 12650 }, { "epoch": 4.4892188052315305, "grad_norm": 1.9998319149017334, "learning_rate": 5.136090491339696e-06, "loss": 0.4592, "step": 12700 }, { "epoch": 4.506892895015906, "grad_norm": 3.140760660171509, "learning_rate": 4.959349593495935e-06, "loss": 0.3582, "step": 12750 }, { "epoch": 4.524566984800282, "grad_norm": 4.489378929138184, "learning_rate": 4.782608695652174e-06, "loss": 0.3891, "step": 12800 }, { "epoch": 4.542241074584659, "grad_norm": 3.2630345821380615, "learning_rate": 4.605867797808413e-06, "loss": 0.3703, "step": 12850 }, { "epoch": 4.559915164369035, "grad_norm": 2.898639440536499, "learning_rate": 4.429126899964652e-06, "loss": 0.4201, "step": 12900 }, { "epoch": 4.577589254153411, "grad_norm": 3.266235589981079, "learning_rate": 4.252386002120891e-06, "loss": 0.4142, "step": 12950 }, { "epoch": 4.595263343937788, "grad_norm": 3.594919204711914, "learning_rate": 4.07564510427713e-06, "loss": 0.4224, "step": 13000 }, { "epoch": 4.612937433722164, "grad_norm": 4.3656439781188965, "learning_rate": 3.898904206433369e-06, "loss": 0.4155, "step": 13050 }, { "epoch": 4.6306115235065395, "grad_norm": 3.863250494003296, "learning_rate": 3.722163308589608e-06, "loss": 0.3933, "step": 13100 }, { "epoch": 4.648285613290915, "grad_norm": 3.6120657920837402, "learning_rate": 3.5454224107458466e-06, "loss": 0.4009, "step": 13150 }, { "epoch": 4.665959703075291, "grad_norm": 3.4946892261505127, "learning_rate": 3.368681512902086e-06, "loss": 0.4538, "step": 13200 }, { "epoch": 4.683633792859668, "grad_norm": 3.0893940925598145, "learning_rate": 3.1919406150583245e-06, "loss": 0.3429, "step": 13250 }, { "epoch": 4.701307882644044, "grad_norm": 3.190537929534912, "learning_rate": 3.0151997172145637e-06, "loss": 0.489, "step": 13300 }, { "epoch": 4.71898197242842, "grad_norm": 5.128622531890869, "learning_rate": 2.8384588193708025e-06, "loss": 0.3926, "step": 13350 }, { "epoch": 4.736656062212796, "grad_norm": 3.6680729389190674, "learning_rate": 2.6617179215270417e-06, "loss": 0.3859, "step": 13400 }, { "epoch": 4.754330151997172, "grad_norm": 2.93373703956604, "learning_rate": 2.4849770236832804e-06, "loss": 0.3637, "step": 13450 }, { "epoch": 4.7720042417815485, "grad_norm": 3.649087429046631, "learning_rate": 2.3082361258395196e-06, "loss": 0.4155, "step": 13500 }, { "epoch": 4.789678331565924, "grad_norm": 1.2933834791183472, "learning_rate": 2.1314952279957584e-06, "loss": 0.3868, "step": 13550 }, { "epoch": 4.8073524213503, "grad_norm": 2.177612781524658, "learning_rate": 1.9547543301519976e-06, "loss": 0.4799, "step": 13600 }, { "epoch": 4.825026511134676, "grad_norm": 3.7405126094818115, "learning_rate": 1.7780134323082363e-06, "loss": 0.455, "step": 13650 }, { "epoch": 4.842700600919053, "grad_norm": 6.44041633605957, "learning_rate": 1.6048073524213503e-06, "loss": 0.3766, "step": 13700 }, { "epoch": 4.860374690703429, "grad_norm": 2.510866165161133, "learning_rate": 1.4280664545775892e-06, "loss": 0.3779, "step": 13750 }, { "epoch": 4.878048780487805, "grad_norm": 4.789300441741943, "learning_rate": 1.2513255567338282e-06, "loss": 0.3892, "step": 13800 }, { "epoch": 4.895722870272181, "grad_norm": 2.6004765033721924, "learning_rate": 1.0745846588900672e-06, "loss": 0.3897, "step": 13850 }, { "epoch": 4.9133969600565575, "grad_norm": 4.115776062011719, "learning_rate": 8.978437610463062e-07, "loss": 0.3896, "step": 13900 }, { "epoch": 4.931071049840933, "grad_norm": 9.11878776550293, "learning_rate": 7.211028632025451e-07, "loss": 0.3918, "step": 13950 }, { "epoch": 4.948745139625309, "grad_norm": 3.8003361225128174, "learning_rate": 5.44361965358784e-07, "loss": 0.476, "step": 14000 }, { "epoch": 4.966419229409685, "grad_norm": 2.117197275161743, "learning_rate": 3.67621067515023e-07, "loss": 0.373, "step": 14050 }, { "epoch": 4.984093319194061, "grad_norm": 1.8130935430526733, "learning_rate": 1.9088016967126194e-07, "loss": 0.4102, "step": 14100 }, { "epoch": 5.0, "eval_bertscore_f1": 0.9660587414250811, "eval_bleu": 0.5882761107143478, "eval_loss": 0.29442909359931946, "eval_meteor": 0.7392640094761435, "eval_rouge1": 0.8386605714105622, "eval_rouge2": 0.7781271007162897, "eval_runtime": 1266.3046, "eval_samples_per_second": 5.105, "eval_steps_per_second": 0.638, "step": 14145 } ], "logging_steps": 50, "max_steps": 14145, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.730048539557888e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }