| { | |
| "best_global_step": 14145, | |
| "best_metric": 0.9660587414250811, | |
| "best_model_checkpoint": "/kaggle/working/codet5-k8s-qlora/checkpoint-14145", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 14145, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017674089784376106, | |
| "grad_norm": 2.658311605453491, | |
| "learning_rate": 4.983032873806999e-05, | |
| "loss": 4.0893, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03534817956875221, | |
| "grad_norm": 6.100900173187256, | |
| "learning_rate": 4.9657122658183106e-05, | |
| "loss": 3.214, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.053022269353128315, | |
| "grad_norm": 2.4247324466705322, | |
| "learning_rate": 4.948391657829622e-05, | |
| "loss": 2.2694, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07069635913750442, | |
| "grad_norm": 2.4390416145324707, | |
| "learning_rate": 4.930717568045246e-05, | |
| "loss": 1.9621, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08837044892188052, | |
| "grad_norm": 3.003971576690674, | |
| "learning_rate": 4.91304347826087e-05, | |
| "loss": 1.8377, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10604453870625663, | |
| "grad_norm": 2.6893651485443115, | |
| "learning_rate": 4.895369388476494e-05, | |
| "loss": 1.7639, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12371862849063273, | |
| "grad_norm": 2.8361988067626953, | |
| "learning_rate": 4.8776952986921177e-05, | |
| "loss": 1.6632, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14139271827500885, | |
| "grad_norm": 2.17179012298584, | |
| "learning_rate": 4.8600212089077416e-05, | |
| "loss": 1.5525, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15906680805938495, | |
| "grad_norm": 4.485565185546875, | |
| "learning_rate": 4.842700600919053e-05, | |
| "loss": 1.4975, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.17674089784376104, | |
| "grad_norm": 3.197230577468872, | |
| "learning_rate": 4.825026511134676e-05, | |
| "loss": 1.4888, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19441498762813716, | |
| "grad_norm": 2.8129756450653076, | |
| "learning_rate": 4.807352421350301e-05, | |
| "loss": 1.32, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.21208907741251326, | |
| "grad_norm": 2.888892650604248, | |
| "learning_rate": 4.789678331565924e-05, | |
| "loss": 1.4137, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.22976316719688936, | |
| "grad_norm": 3.6058623790740967, | |
| "learning_rate": 4.7720042417815487e-05, | |
| "loss": 1.3793, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.24743725698126545, | |
| "grad_norm": 3.077688217163086, | |
| "learning_rate": 4.7543301519971726e-05, | |
| "loss": 1.2157, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2651113467656416, | |
| "grad_norm": 4.21675443649292, | |
| "learning_rate": 4.7366560622127965e-05, | |
| "loss": 1.3435, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2827854365500177, | |
| "grad_norm": 3.459958076477051, | |
| "learning_rate": 4.7189819724284204e-05, | |
| "loss": 1.2747, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.30045952633439377, | |
| "grad_norm": 3.2092440128326416, | |
| "learning_rate": 4.701307882644044e-05, | |
| "loss": 1.1624, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3181336161187699, | |
| "grad_norm": 3.1231963634490967, | |
| "learning_rate": 4.683633792859668e-05, | |
| "loss": 1.1956, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.335807705903146, | |
| "grad_norm": 3.332000970840454, | |
| "learning_rate": 4.6659597030752915e-05, | |
| "loss": 1.118, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3534817956875221, | |
| "grad_norm": 2.992741823196411, | |
| "learning_rate": 4.648285613290916e-05, | |
| "loss": 1.1513, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3711558854718982, | |
| "grad_norm": 2.8758022785186768, | |
| "learning_rate": 4.630611523506539e-05, | |
| "loss": 1.0998, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.38882997525627433, | |
| "grad_norm": 3.870368480682373, | |
| "learning_rate": 4.612937433722164e-05, | |
| "loss": 1.0723, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4065040650406504, | |
| "grad_norm": 4.177937030792236, | |
| "learning_rate": 4.595263343937787e-05, | |
| "loss": 1.0612, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4241781548250265, | |
| "grad_norm": 2.760124921798706, | |
| "learning_rate": 4.577589254153412e-05, | |
| "loss": 1.0086, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4418522446094026, | |
| "grad_norm": 3.0196070671081543, | |
| "learning_rate": 4.559915164369035e-05, | |
| "loss": 1.06, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4595263343937787, | |
| "grad_norm": 2.649152994155884, | |
| "learning_rate": 4.542241074584659e-05, | |
| "loss": 0.9997, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.47720042417815484, | |
| "grad_norm": 3.8896467685699463, | |
| "learning_rate": 4.524566984800283e-05, | |
| "loss": 1.0067, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4948745139625309, | |
| "grad_norm": 3.186890125274658, | |
| "learning_rate": 4.506892895015907e-05, | |
| "loss": 0.9501, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.512548603746907, | |
| "grad_norm": 3.991669178009033, | |
| "learning_rate": 4.489218805231531e-05, | |
| "loss": 1.0347, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5302226935312832, | |
| "grad_norm": 11.242384910583496, | |
| "learning_rate": 4.4715447154471546e-05, | |
| "loss": 0.9635, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5478967833156593, | |
| "grad_norm": 2.5245680809020996, | |
| "learning_rate": 4.4538706256627785e-05, | |
| "loss": 0.9248, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5655708731000354, | |
| "grad_norm": 4.0713114738464355, | |
| "learning_rate": 4.4361965358784024e-05, | |
| "loss": 0.906, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5832449628844114, | |
| "grad_norm": 3.434156656265259, | |
| "learning_rate": 4.4185224460940264e-05, | |
| "loss": 0.9438, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6009190526687875, | |
| "grad_norm": 3.6341230869293213, | |
| "learning_rate": 4.40084835630965e-05, | |
| "loss": 0.8156, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6185931424531637, | |
| "grad_norm": 4.359820365905762, | |
| "learning_rate": 4.383174266525274e-05, | |
| "loss": 0.9052, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6362672322375398, | |
| "grad_norm": 3.804647445678711, | |
| "learning_rate": 4.365500176740898e-05, | |
| "loss": 0.8758, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6539413220219159, | |
| "grad_norm": 21.193464279174805, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 0.8776, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.671615411806292, | |
| "grad_norm": 3.002357244491577, | |
| "learning_rate": 4.330151997172146e-05, | |
| "loss": 0.8658, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.689289501590668, | |
| "grad_norm": 4.116176605224609, | |
| "learning_rate": 4.31247790738777e-05, | |
| "loss": 0.8419, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7069635913750442, | |
| "grad_norm": 6.561131954193115, | |
| "learning_rate": 4.294803817603394e-05, | |
| "loss": 0.8204, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7246376811594203, | |
| "grad_norm": 3.203460931777954, | |
| "learning_rate": 4.277129727819018e-05, | |
| "loss": 0.7557, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7423117709437964, | |
| "grad_norm": 4.0467705726623535, | |
| "learning_rate": 4.259455638034641e-05, | |
| "loss": 0.8053, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7599858607281725, | |
| "grad_norm": 3.575634002685547, | |
| "learning_rate": 4.242135030045953e-05, | |
| "loss": 0.8128, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.7776599505125487, | |
| "grad_norm": 5.7353363037109375, | |
| "learning_rate": 4.224460940261576e-05, | |
| "loss": 0.8339, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7953340402969247, | |
| "grad_norm": 4.916664123535156, | |
| "learning_rate": 4.206786850477201e-05, | |
| "loss": 0.7758, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8130081300813008, | |
| "grad_norm": 4.233948230743408, | |
| "learning_rate": 4.189112760692824e-05, | |
| "loss": 0.7485, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8306822198656769, | |
| "grad_norm": 3.754826545715332, | |
| "learning_rate": 4.171438670908449e-05, | |
| "loss": 0.6988, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.848356309650053, | |
| "grad_norm": 3.185098171234131, | |
| "learning_rate": 4.153764581124072e-05, | |
| "loss": 0.7067, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8660303994344292, | |
| "grad_norm": 3.515683650970459, | |
| "learning_rate": 4.1360904913396966e-05, | |
| "loss": 0.7559, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8837044892188052, | |
| "grad_norm": 4.783038139343262, | |
| "learning_rate": 4.11841640155532e-05, | |
| "loss": 0.7444, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9013785790031813, | |
| "grad_norm": 3.4242937564849854, | |
| "learning_rate": 4.1007423117709444e-05, | |
| "loss": 0.7676, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9190526687875574, | |
| "grad_norm": 3.3563663959503174, | |
| "learning_rate": 4.0830682219865676e-05, | |
| "loss": 0.7416, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9367267585719335, | |
| "grad_norm": 21.883926391601562, | |
| "learning_rate": 4.0653941322021916e-05, | |
| "loss": 0.6892, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9544008483563097, | |
| "grad_norm": 3.8259048461914062, | |
| "learning_rate": 4.0477200424178155e-05, | |
| "loss": 0.7489, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.9720749381406858, | |
| "grad_norm": 3.026655912399292, | |
| "learning_rate": 4.0300459526334394e-05, | |
| "loss": 0.6679, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.9897490279250618, | |
| "grad_norm": 7.62285041809082, | |
| "learning_rate": 4.012371862849063e-05, | |
| "loss": 0.7393, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bertscore_f1": 0.9551081777928342, | |
| "eval_bleu": 0.47406093922979725, | |
| "eval_loss": 0.5141507983207703, | |
| "eval_meteor": 0.6443492142009581, | |
| "eval_rouge1": 0.7935683439864762, | |
| "eval_rouge2": 0.6796198647957756, | |
| "eval_runtime": 1335.9702, | |
| "eval_samples_per_second": 4.838, | |
| "eval_steps_per_second": 0.605, | |
| "step": 2829 | |
| }, | |
| { | |
| "epoch": 1.007423117709438, | |
| "grad_norm": 3.7401936054229736, | |
| "learning_rate": 3.994697773064687e-05, | |
| "loss": 0.7272, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.025097207493814, | |
| "grad_norm": 4.575202941894531, | |
| "learning_rate": 3.977023683280312e-05, | |
| "loss": 0.6891, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.0427712972781902, | |
| "grad_norm": 2.909268379211426, | |
| "learning_rate": 3.959349593495935e-05, | |
| "loss": 0.6751, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.0604453870625663, | |
| "grad_norm": 5.258713722229004, | |
| "learning_rate": 3.941675503711559e-05, | |
| "loss": 0.7308, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.0781194768469424, | |
| "grad_norm": 4.8982462882995605, | |
| "learning_rate": 3.924001413927183e-05, | |
| "loss": 0.5938, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.0957935666313185, | |
| "grad_norm": 3.7401649951934814, | |
| "learning_rate": 3.906327324142807e-05, | |
| "loss": 0.7358, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.1134676564156947, | |
| "grad_norm": 2.2274134159088135, | |
| "learning_rate": 3.888653234358431e-05, | |
| "loss": 0.6251, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.1311417462000706, | |
| "grad_norm": 4.285720348358154, | |
| "learning_rate": 3.870979144574055e-05, | |
| "loss": 0.6773, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.148815835984447, | |
| "grad_norm": 3.1202948093414307, | |
| "learning_rate": 3.8533050547896786e-05, | |
| "loss": 0.6485, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.1664899257688228, | |
| "grad_norm": 2.96162486076355, | |
| "learning_rate": 3.8356309650053025e-05, | |
| "loss": 0.6733, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.184164015553199, | |
| "grad_norm": 6.456724166870117, | |
| "learning_rate": 3.817956875220926e-05, | |
| "loss": 0.6142, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.201838105337575, | |
| "grad_norm": 5.0712690353393555, | |
| "learning_rate": 3.8002827854365503e-05, | |
| "loss": 0.6952, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.2195121951219512, | |
| "grad_norm": 5.074472904205322, | |
| "learning_rate": 3.7826086956521736e-05, | |
| "loss": 0.6147, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.2371862849063273, | |
| "grad_norm": 4.572699546813965, | |
| "learning_rate": 3.764934605867798e-05, | |
| "loss": 0.6172, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.2548603746907034, | |
| "grad_norm": 3.24722957611084, | |
| "learning_rate": 3.747260516083422e-05, | |
| "loss": 0.6657, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.2725344644750796, | |
| "grad_norm": 3.6657183170318604, | |
| "learning_rate": 3.729586426299046e-05, | |
| "loss": 0.6999, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.2902085542594557, | |
| "grad_norm": 3.2770209312438965, | |
| "learning_rate": 3.71191233651467e-05, | |
| "loss": 0.6882, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.3078826440438318, | |
| "grad_norm": 4.611114501953125, | |
| "learning_rate": 3.694238246730294e-05, | |
| "loss": 0.6767, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.3255567338282077, | |
| "grad_norm": 3.4801883697509766, | |
| "learning_rate": 3.676564156945918e-05, | |
| "loss": 0.6503, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.343230823612584, | |
| "grad_norm": 4.582475185394287, | |
| "learning_rate": 3.658890067161541e-05, | |
| "loss": 0.5833, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.36090491339696, | |
| "grad_norm": 3.0982961654663086, | |
| "learning_rate": 3.6412159773771656e-05, | |
| "loss": 0.6271, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.378579003181336, | |
| "grad_norm": 3.592360734939575, | |
| "learning_rate": 3.623541887592789e-05, | |
| "loss": 0.6688, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.3962530929657122, | |
| "grad_norm": 4.296905994415283, | |
| "learning_rate": 3.6058677978084134e-05, | |
| "loss": 0.5931, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.4139271827500883, | |
| "grad_norm": 3.616574764251709, | |
| "learning_rate": 3.588193708024037e-05, | |
| "loss": 0.6297, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.4316012725344645, | |
| "grad_norm": 3.1819770336151123, | |
| "learning_rate": 3.570519618239661e-05, | |
| "loss": 0.5801, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.4492753623188406, | |
| "grad_norm": 3.5812184810638428, | |
| "learning_rate": 3.5528455284552845e-05, | |
| "loss": 0.5826, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.4669494521032167, | |
| "grad_norm": 2.889911651611328, | |
| "learning_rate": 3.5351714386709084e-05, | |
| "loss": 0.5396, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.4846235418875928, | |
| "grad_norm": 3.532849073410034, | |
| "learning_rate": 3.5174973488865324e-05, | |
| "loss": 0.5218, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.502297631671969, | |
| "grad_norm": 2.939161777496338, | |
| "learning_rate": 3.499823259102156e-05, | |
| "loss": 0.5701, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.5199717214563448, | |
| "grad_norm": 3.500262975692749, | |
| "learning_rate": 3.48214916931778e-05, | |
| "loss": 0.5117, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.5376458112407212, | |
| "grad_norm": 3.612431526184082, | |
| "learning_rate": 3.464475079533404e-05, | |
| "loss": 0.5067, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.555319901025097, | |
| "grad_norm": 3.3735318183898926, | |
| "learning_rate": 3.446800989749028e-05, | |
| "loss": 0.5403, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.5729939908094734, | |
| "grad_norm": 28.255231857299805, | |
| "learning_rate": 3.429126899964652e-05, | |
| "loss": 0.5442, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.5906680805938493, | |
| "grad_norm": 4.424487113952637, | |
| "learning_rate": 3.411452810180276e-05, | |
| "loss": 0.5769, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.6083421703782255, | |
| "grad_norm": 4.6517109870910645, | |
| "learning_rate": 3.3937787203959e-05, | |
| "loss": 0.5291, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.6260162601626016, | |
| "grad_norm": 4.276078701019287, | |
| "learning_rate": 3.376104630611524e-05, | |
| "loss": 0.6207, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.6436903499469777, | |
| "grad_norm": 3.1325790882110596, | |
| "learning_rate": 3.3584305408271476e-05, | |
| "loss": 0.5807, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.6613644397313538, | |
| "grad_norm": 3.2780227661132812, | |
| "learning_rate": 3.3407564510427716e-05, | |
| "loss": 0.5487, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.67903852951573, | |
| "grad_norm": 3.9542007446289062, | |
| "learning_rate": 3.3230823612583955e-05, | |
| "loss": 0.6385, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.696712619300106, | |
| "grad_norm": 4.091352462768555, | |
| "learning_rate": 3.3054082714740194e-05, | |
| "loss": 0.5845, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.714386709084482, | |
| "grad_norm": 2.3576905727386475, | |
| "learning_rate": 3.2877341816896426e-05, | |
| "loss": 0.4949, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.7320607988688583, | |
| "grad_norm": 3.200242519378662, | |
| "learning_rate": 3.270060091905267e-05, | |
| "loss": 0.5922, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.7497348886532342, | |
| "grad_norm": 3.1346006393432617, | |
| "learning_rate": 3.2523860021208905e-05, | |
| "loss": 0.5259, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.7674089784376106, | |
| "grad_norm": 3.4066524505615234, | |
| "learning_rate": 3.234711912336515e-05, | |
| "loss": 0.562, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.7850830682219865, | |
| "grad_norm": 5.18930196762085, | |
| "learning_rate": 3.217037822552138e-05, | |
| "loss": 0.5825, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.8027571580063628, | |
| "grad_norm": 4.159862995147705, | |
| "learning_rate": 3.199363732767763e-05, | |
| "loss": 0.5616, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.8204312477907387, | |
| "grad_norm": 4.439573287963867, | |
| "learning_rate": 3.181689642983386e-05, | |
| "loss": 0.5334, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.8381053375751149, | |
| "grad_norm": 6.196533203125, | |
| "learning_rate": 3.164015553199011e-05, | |
| "loss": 0.5887, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.855779427359491, | |
| "grad_norm": 3.715372323989868, | |
| "learning_rate": 3.146341463414634e-05, | |
| "loss": 0.5379, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.873453517143867, | |
| "grad_norm": 4.34264612197876, | |
| "learning_rate": 3.128667373630258e-05, | |
| "loss": 0.4827, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.8911276069282432, | |
| "grad_norm": 2.337557315826416, | |
| "learning_rate": 3.1109932838458825e-05, | |
| "loss": 0.4685, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.9088016967126193, | |
| "grad_norm": 3.325277805328369, | |
| "learning_rate": 3.093319194061506e-05, | |
| "loss": 0.4983, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.9264757864969955, | |
| "grad_norm": 2.976592540740967, | |
| "learning_rate": 3.0756451042771303e-05, | |
| "loss": 0.5814, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.9441498762813714, | |
| "grad_norm": 9.608305931091309, | |
| "learning_rate": 3.0579710144927536e-05, | |
| "loss": 0.5062, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.9618239660657477, | |
| "grad_norm": 3.443791151046753, | |
| "learning_rate": 3.040296924708378e-05, | |
| "loss": 0.5092, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.9794980558501236, | |
| "grad_norm": 3.4817845821380615, | |
| "learning_rate": 3.0226228349240014e-05, | |
| "loss": 0.5584, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.9971721456345, | |
| "grad_norm": 3.2107975482940674, | |
| "learning_rate": 3.0049487451396253e-05, | |
| "loss": 0.506, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bertscore_f1": 0.9621683897930059, | |
| "eval_bleu": 0.5494076455991572, | |
| "eval_loss": 0.37199869751930237, | |
| "eval_meteor": 0.7077566730507359, | |
| "eval_rouge1": 0.8237541199852757, | |
| "eval_rouge2": 0.7474098813874757, | |
| "eval_runtime": 1281.6235, | |
| "eval_samples_per_second": 5.044, | |
| "eval_steps_per_second": 0.63, | |
| "step": 5658 | |
| }, | |
| { | |
| "epoch": 2.014846235418876, | |
| "grad_norm": 3.7251229286193848, | |
| "learning_rate": 2.9872746553552493e-05, | |
| "loss": 0.4928, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.032520325203252, | |
| "grad_norm": 3.801664113998413, | |
| "learning_rate": 2.9696005655708732e-05, | |
| "loss": 0.5748, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.050194414987628, | |
| "grad_norm": 5.817806243896484, | |
| "learning_rate": 2.9519264757864974e-05, | |
| "loss": 0.4844, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.0678685047720045, | |
| "grad_norm": 3.028961658477783, | |
| "learning_rate": 2.934252386002121e-05, | |
| "loss": 0.4626, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.0855425945563804, | |
| "grad_norm": 3.974060297012329, | |
| "learning_rate": 2.9165782962177453e-05, | |
| "loss": 0.5274, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.1032166843407563, | |
| "grad_norm": 2.532444953918457, | |
| "learning_rate": 2.898904206433369e-05, | |
| "loss": 0.4887, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.1208907741251326, | |
| "grad_norm": 2.0569326877593994, | |
| "learning_rate": 2.8812301166489924e-05, | |
| "loss": 0.4353, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.1385648639095085, | |
| "grad_norm": 3.0496156215667725, | |
| "learning_rate": 2.8635560268646167e-05, | |
| "loss": 0.4347, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.156238953693885, | |
| "grad_norm": 2.635395050048828, | |
| "learning_rate": 2.8458819370802403e-05, | |
| "loss": 0.5406, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 4.091008186340332, | |
| "learning_rate": 2.8282078472958645e-05, | |
| "loss": 0.4885, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.191587133262637, | |
| "grad_norm": 3.228792905807495, | |
| "learning_rate": 2.810533757511488e-05, | |
| "loss": 0.5072, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.209261223047013, | |
| "grad_norm": 2.479149341583252, | |
| "learning_rate": 2.7928596677271124e-05, | |
| "loss": 0.5475, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.2269353128313893, | |
| "grad_norm": 4.617306709289551, | |
| "learning_rate": 2.775185577942736e-05, | |
| "loss": 0.5468, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.2446094026157652, | |
| "grad_norm": 4.416631698608398, | |
| "learning_rate": 2.7575114881583602e-05, | |
| "loss": 0.5125, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.262283492400141, | |
| "grad_norm": 3.7900924682617188, | |
| "learning_rate": 2.7398373983739838e-05, | |
| "loss": 0.5824, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.2799575821845175, | |
| "grad_norm": 3.695364236831665, | |
| "learning_rate": 2.7221633085896077e-05, | |
| "loss": 0.4806, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.297631671968894, | |
| "grad_norm": 2.609520196914673, | |
| "learning_rate": 2.7044892188052316e-05, | |
| "loss": 0.4537, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.3153057617532697, | |
| "grad_norm": 4.006641864776611, | |
| "learning_rate": 2.6868151290208555e-05, | |
| "loss": 0.5172, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.3329798515376456, | |
| "grad_norm": 3.581960439682007, | |
| "learning_rate": 2.6691410392364795e-05, | |
| "loss": 0.5089, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.350653941322022, | |
| "grad_norm": 2.6414718627929688, | |
| "learning_rate": 2.6514669494521034e-05, | |
| "loss": 0.4936, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.368328031106398, | |
| "grad_norm": 3.3889434337615967, | |
| "learning_rate": 2.6337928596677276e-05, | |
| "loss": 0.535, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.3860021208907742, | |
| "grad_norm": 4.371047496795654, | |
| "learning_rate": 2.6161187698833512e-05, | |
| "loss": 0.4651, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.40367621067515, | |
| "grad_norm": 4.057021617889404, | |
| "learning_rate": 2.5984446800989748e-05, | |
| "loss": 0.4369, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.4213503004595265, | |
| "grad_norm": 4.6812615394592285, | |
| "learning_rate": 2.580770590314599e-05, | |
| "loss": 0.5067, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.4390243902439024, | |
| "grad_norm": 6.067279815673828, | |
| "learning_rate": 2.5630965005302226e-05, | |
| "loss": 0.4901, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.4566984800282787, | |
| "grad_norm": 3.8635661602020264, | |
| "learning_rate": 2.545422410745847e-05, | |
| "loss": 0.415, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.4743725698126546, | |
| "grad_norm": 1.7011466026306152, | |
| "learning_rate": 2.5277483209614705e-05, | |
| "loss": 0.4893, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.4920466595970305, | |
| "grad_norm": 3.8497934341430664, | |
| "learning_rate": 2.5100742311770947e-05, | |
| "loss": 0.4504, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.509720749381407, | |
| "grad_norm": 3.670374631881714, | |
| "learning_rate": 2.4924001413927183e-05, | |
| "loss": 0.4883, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.5273948391657832, | |
| "grad_norm": 3.130357503890991, | |
| "learning_rate": 2.4747260516083422e-05, | |
| "loss": 0.5031, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.545068928950159, | |
| "grad_norm": 3.647500514984131, | |
| "learning_rate": 2.457051961823966e-05, | |
| "loss": 0.4368, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.562743018734535, | |
| "grad_norm": 3.6657369136810303, | |
| "learning_rate": 2.43937787203959e-05, | |
| "loss": 0.4686, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.5804171085189114, | |
| "grad_norm": 5.371551036834717, | |
| "learning_rate": 2.421703782255214e-05, | |
| "loss": 0.4433, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.5980911983032873, | |
| "grad_norm": 3.593418598175049, | |
| "learning_rate": 2.404029692470838e-05, | |
| "loss": 0.4901, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.6157652880876636, | |
| "grad_norm": 3.1181206703186035, | |
| "learning_rate": 2.3863556026864618e-05, | |
| "loss": 0.4834, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.6334393778720395, | |
| "grad_norm": 4.218138217926025, | |
| "learning_rate": 2.3686815129020857e-05, | |
| "loss": 0.5013, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.6511134676564154, | |
| "grad_norm": 3.5063066482543945, | |
| "learning_rate": 2.3510074231177097e-05, | |
| "loss": 0.4705, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.6687875574407918, | |
| "grad_norm": 2.8965365886688232, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.464, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.686461647225168, | |
| "grad_norm": 2.336358070373535, | |
| "learning_rate": 2.3156592435489575e-05, | |
| "loss": 0.4591, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.704135737009544, | |
| "grad_norm": 3.5483410358428955, | |
| "learning_rate": 2.2979851537645814e-05, | |
| "loss": 0.4419, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.72180982679392, | |
| "grad_norm": 4.550882816314697, | |
| "learning_rate": 2.280311063980205e-05, | |
| "loss": 0.4181, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.7394839165782963, | |
| "grad_norm": 4.471234321594238, | |
| "learning_rate": 2.262636974195829e-05, | |
| "loss": 0.4558, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.757158006362672, | |
| "grad_norm": 3.0595200061798096, | |
| "learning_rate": 2.244962884411453e-05, | |
| "loss": 0.4188, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.7748320961470485, | |
| "grad_norm": 7.5111403465271, | |
| "learning_rate": 2.2272887946270768e-05, | |
| "loss": 0.4834, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.7925061859314244, | |
| "grad_norm": 2.2414655685424805, | |
| "learning_rate": 2.2096147048427007e-05, | |
| "loss": 0.442, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.8101802757158008, | |
| "grad_norm": 4.036431789398193, | |
| "learning_rate": 2.1919406150583246e-05, | |
| "loss": 0.4254, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.8278543655001767, | |
| "grad_norm": 3.3172266483306885, | |
| "learning_rate": 2.1742665252739485e-05, | |
| "loss": 0.4852, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.845528455284553, | |
| "grad_norm": 4.143049240112305, | |
| "learning_rate": 2.1565924354895724e-05, | |
| "loss": 0.4858, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 2.863202545068929, | |
| "grad_norm": 5.017402172088623, | |
| "learning_rate": 2.138918345705196e-05, | |
| "loss": 0.3824, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.880876634853305, | |
| "grad_norm": 2.974952459335327, | |
| "learning_rate": 2.1212442559208203e-05, | |
| "loss": 0.4777, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 2.898550724637681, | |
| "grad_norm": 7.074586868286133, | |
| "learning_rate": 2.1035701661364442e-05, | |
| "loss": 0.4465, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.9162248144220575, | |
| "grad_norm": 3.585792064666748, | |
| "learning_rate": 2.085896076352068e-05, | |
| "loss": 0.4307, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 2.9338989042064334, | |
| "grad_norm": 1.6561566591262817, | |
| "learning_rate": 2.068221986567692e-05, | |
| "loss": 0.3917, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.9515729939908093, | |
| "grad_norm": 4.920962810516357, | |
| "learning_rate": 2.050547896783316e-05, | |
| "loss": 0.4334, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 2.9692470837751856, | |
| "grad_norm": 2.6819636821746826, | |
| "learning_rate": 2.03287380699894e-05, | |
| "loss": 0.4679, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.9869211735595615, | |
| "grad_norm": 3.442260265350342, | |
| "learning_rate": 2.0151997172145634e-05, | |
| "loss": 0.4466, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bertscore_f1": 0.9646675708510055, | |
| "eval_bleu": 0.5742982540038749, | |
| "eval_loss": 0.3243306279182434, | |
| "eval_meteor": 0.7281699575301964, | |
| "eval_rouge1": 0.8328916554556949, | |
| "eval_rouge2": 0.7666932565109175, | |
| "eval_runtime": 1288.8847, | |
| "eval_samples_per_second": 5.015, | |
| "eval_steps_per_second": 0.627, | |
| "step": 8487 | |
| }, | |
| { | |
| "epoch": 3.004595263343938, | |
| "grad_norm": 3.7356512546539307, | |
| "learning_rate": 1.9975256274301874e-05, | |
| "loss": 0.394, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.022269353128314, | |
| "grad_norm": 3.7725515365600586, | |
| "learning_rate": 1.9798515376458113e-05, | |
| "loss": 0.4484, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 3.03994344291269, | |
| "grad_norm": 2.475839138031006, | |
| "learning_rate": 1.9621774478614352e-05, | |
| "loss": 0.4463, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 3.057617532697066, | |
| "grad_norm": 2.853266716003418, | |
| "learning_rate": 1.944503358077059e-05, | |
| "loss": 0.4398, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 3.0752916224814424, | |
| "grad_norm": 2.7079474925994873, | |
| "learning_rate": 1.926829268292683e-05, | |
| "loss": 0.4021, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 3.0929657122658183, | |
| "grad_norm": 5.04539680480957, | |
| "learning_rate": 1.909155178508307e-05, | |
| "loss": 0.3996, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 3.110639802050194, | |
| "grad_norm": 4.626221656799316, | |
| "learning_rate": 1.8918345705196184e-05, | |
| "loss": 0.4948, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 3.1283138918345705, | |
| "grad_norm": 4.644408226013184, | |
| "learning_rate": 1.8741604807352423e-05, | |
| "loss": 0.4156, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 3.1459879816189464, | |
| "grad_norm": 4.299105167388916, | |
| "learning_rate": 1.8564863909508662e-05, | |
| "loss": 0.3977, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 3.163662071403323, | |
| "grad_norm": 4.650149345397949, | |
| "learning_rate": 1.83881230116649e-05, | |
| "loss": 0.4229, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 3.1813361611876987, | |
| "grad_norm": 2.89013409614563, | |
| "learning_rate": 1.821138211382114e-05, | |
| "loss": 0.4506, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.199010250972075, | |
| "grad_norm": 2.281370162963867, | |
| "learning_rate": 1.8034641215977376e-05, | |
| "loss": 0.4288, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 3.216684340756451, | |
| "grad_norm": 4.948707103729248, | |
| "learning_rate": 1.7857900318133615e-05, | |
| "loss": 0.4633, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 3.2343584305408273, | |
| "grad_norm": 3.5856571197509766, | |
| "learning_rate": 1.7681159420289855e-05, | |
| "loss": 0.3965, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 3.252032520325203, | |
| "grad_norm": 3.416271686553955, | |
| "learning_rate": 1.7504418522446094e-05, | |
| "loss": 0.4904, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 3.2697066101095795, | |
| "grad_norm": 3.599717617034912, | |
| "learning_rate": 1.7327677624602333e-05, | |
| "loss": 0.4648, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 3.2873806998939554, | |
| "grad_norm": 2.8439853191375732, | |
| "learning_rate": 1.7150936726758572e-05, | |
| "loss": 0.3734, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 3.3050547896783318, | |
| "grad_norm": 4.0927863121032715, | |
| "learning_rate": 1.697419582891481e-05, | |
| "loss": 0.3913, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 3.3227288794627077, | |
| "grad_norm": 4.16766881942749, | |
| "learning_rate": 1.679745493107105e-05, | |
| "loss": 0.4303, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 3.3404029692470836, | |
| "grad_norm": 3.417738199234009, | |
| "learning_rate": 1.662071403322729e-05, | |
| "loss": 0.4068, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 3.35807705903146, | |
| "grad_norm": 4.66575813293457, | |
| "learning_rate": 1.644397313538353e-05, | |
| "loss": 0.5116, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.375751148815836, | |
| "grad_norm": 6.112340927124023, | |
| "learning_rate": 1.6267232237539768e-05, | |
| "loss": 0.4244, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 3.393425238600212, | |
| "grad_norm": 3.322610378265381, | |
| "learning_rate": 1.6090491339696007e-05, | |
| "loss": 0.4252, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 3.411099328384588, | |
| "grad_norm": 4.941850185394287, | |
| "learning_rate": 1.5913750441852247e-05, | |
| "loss": 0.402, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 3.4287734181689644, | |
| "grad_norm": 2.177600860595703, | |
| "learning_rate": 1.5737009544008486e-05, | |
| "loss": 0.3437, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 3.4464475079533403, | |
| "grad_norm": 2.1570093631744385, | |
| "learning_rate": 1.5560268646164725e-05, | |
| "loss": 0.4871, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 3.4641215977377167, | |
| "grad_norm": 7.6717305183410645, | |
| "learning_rate": 1.538352774832096e-05, | |
| "loss": 0.4224, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 3.4817956875220926, | |
| "grad_norm": 3.082805871963501, | |
| "learning_rate": 1.52067868504772e-05, | |
| "loss": 0.4474, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 3.499469777306469, | |
| "grad_norm": 2.8141167163848877, | |
| "learning_rate": 1.5033580770590316e-05, | |
| "loss": 0.4476, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 3.517143867090845, | |
| "grad_norm": 3.179436206817627, | |
| "learning_rate": 1.4856839872746553e-05, | |
| "loss": 0.3936, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 3.534817956875221, | |
| "grad_norm": 3.908020257949829, | |
| "learning_rate": 1.4680098974902792e-05, | |
| "loss": 0.4164, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.552492046659597, | |
| "grad_norm": 4.998553276062012, | |
| "learning_rate": 1.4503358077059032e-05, | |
| "loss": 0.4534, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 3.570166136443973, | |
| "grad_norm": 4.064126014709473, | |
| "learning_rate": 1.432661717921527e-05, | |
| "loss": 0.458, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 3.5878402262283493, | |
| "grad_norm": 2.2527036666870117, | |
| "learning_rate": 1.414987628137151e-05, | |
| "loss": 0.4285, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 3.605514316012725, | |
| "grad_norm": 3.3799755573272705, | |
| "learning_rate": 1.397313538352775e-05, | |
| "loss": 0.5488, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 3.6231884057971016, | |
| "grad_norm": 3.4317479133605957, | |
| "learning_rate": 1.3796394485683988e-05, | |
| "loss": 0.4326, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 3.6408624955814775, | |
| "grad_norm": 2.245337724685669, | |
| "learning_rate": 1.3619653587840228e-05, | |
| "loss": 0.4279, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 3.658536585365854, | |
| "grad_norm": 2.9092109203338623, | |
| "learning_rate": 1.3442912689996465e-05, | |
| "loss": 0.4116, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 3.6762106751502297, | |
| "grad_norm": 2.79837965965271, | |
| "learning_rate": 1.3266171792152704e-05, | |
| "loss": 0.4506, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 3.693884764934606, | |
| "grad_norm": 1.416994333267212, | |
| "learning_rate": 1.3089430894308943e-05, | |
| "loss": 0.3532, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 3.711558854718982, | |
| "grad_norm": 4.927233695983887, | |
| "learning_rate": 1.2912689996465183e-05, | |
| "loss": 0.4302, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.729232944503358, | |
| "grad_norm": 2.069500684738159, | |
| "learning_rate": 1.2735949098621422e-05, | |
| "loss": 0.4608, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 3.746907034287734, | |
| "grad_norm": 3.3507018089294434, | |
| "learning_rate": 1.2559208200777661e-05, | |
| "loss": 0.4024, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 3.7645811240721105, | |
| "grad_norm": 2.64599871635437, | |
| "learning_rate": 1.2382467302933899e-05, | |
| "loss": 0.3817, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 3.7822552138564864, | |
| "grad_norm": 2.3984270095825195, | |
| "learning_rate": 1.2205726405090138e-05, | |
| "loss": 0.4399, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 3.7999293036408623, | |
| "grad_norm": 5.132211685180664, | |
| "learning_rate": 1.2028985507246379e-05, | |
| "loss": 0.4537, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 3.8176033934252387, | |
| "grad_norm": 3.9488821029663086, | |
| "learning_rate": 1.1852244609402616e-05, | |
| "loss": 0.4433, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 3.8352774832096146, | |
| "grad_norm": 4.978783130645752, | |
| "learning_rate": 1.1675503711558855e-05, | |
| "loss": 0.3722, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 3.852951572993991, | |
| "grad_norm": 2.1942172050476074, | |
| "learning_rate": 1.1498762813715094e-05, | |
| "loss": 0.3641, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 3.870625662778367, | |
| "grad_norm": 1.962399959564209, | |
| "learning_rate": 1.1322021915871334e-05, | |
| "loss": 0.3966, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 3.888299752562743, | |
| "grad_norm": 2.3611438274383545, | |
| "learning_rate": 1.1145281018027571e-05, | |
| "loss": 0.3872, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.905973842347119, | |
| "grad_norm": 2.8562467098236084, | |
| "learning_rate": 1.096854012018381e-05, | |
| "loss": 0.3823, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 3.9236479321314954, | |
| "grad_norm": 3.315880060195923, | |
| "learning_rate": 1.079179922234005e-05, | |
| "loss": 0.4215, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 3.9413220219158713, | |
| "grad_norm": 4.15437650680542, | |
| "learning_rate": 1.0615058324496289e-05, | |
| "loss": 0.4126, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 3.9589961117002472, | |
| "grad_norm": 3.9605205059051514, | |
| "learning_rate": 1.0438317426652528e-05, | |
| "loss": 0.3773, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 3.9766702014846236, | |
| "grad_norm": 3.106764793395996, | |
| "learning_rate": 1.0261576528808767e-05, | |
| "loss": 0.4297, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 3.9943442912689995, | |
| "grad_norm": 3.4298675060272217, | |
| "learning_rate": 1.0084835630965006e-05, | |
| "loss": 0.4305, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bertscore_f1": 0.9656413255425373, | |
| "eval_bleu": 0.5848426882684508, | |
| "eval_loss": 0.3005247414112091, | |
| "eval_meteor": 0.73697495147188, | |
| "eval_rouge1": 0.8370075787215339, | |
| "eval_rouge2": 0.7752220988783712, | |
| "eval_runtime": 1268.4642, | |
| "eval_samples_per_second": 5.096, | |
| "eval_steps_per_second": 0.637, | |
| "step": 11316 | |
| }, | |
| { | |
| "epoch": 4.012018381053376, | |
| "grad_norm": 4.263380527496338, | |
| "learning_rate": 9.908094733121245e-06, | |
| "loss": 0.4285, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 4.029692470837752, | |
| "grad_norm": 14.104089736938477, | |
| "learning_rate": 9.731353835277483e-06, | |
| "loss": 0.3837, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 4.047366560622128, | |
| "grad_norm": 2.5981857776641846, | |
| "learning_rate": 9.554612937433722e-06, | |
| "loss": 0.3773, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 4.065040650406504, | |
| "grad_norm": 4.44357967376709, | |
| "learning_rate": 9.377872039589961e-06, | |
| "loss": 0.4325, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 4.08271474019088, | |
| "grad_norm": 3.7187113761901855, | |
| "learning_rate": 9.2011311417462e-06, | |
| "loss": 0.427, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 4.100388829975256, | |
| "grad_norm": 2.364908218383789, | |
| "learning_rate": 9.02439024390244e-06, | |
| "loss": 0.3617, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 4.118062919759632, | |
| "grad_norm": 2.663651704788208, | |
| "learning_rate": 8.847649346058679e-06, | |
| "loss": 0.4174, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 4.135737009544009, | |
| "grad_norm": 3.6699295043945312, | |
| "learning_rate": 8.670908448214918e-06, | |
| "loss": 0.4183, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 4.153411099328385, | |
| "grad_norm": 4.236429214477539, | |
| "learning_rate": 8.494167550371156e-06, | |
| "loss": 0.4074, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 4.171085189112761, | |
| "grad_norm": 4.3517632484436035, | |
| "learning_rate": 8.317426652527395e-06, | |
| "loss": 0.3905, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 4.188759278897137, | |
| "grad_norm": 2.440966844558716, | |
| "learning_rate": 8.140685754683634e-06, | |
| "loss": 0.408, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 4.2064333686815125, | |
| "grad_norm": 3.0445733070373535, | |
| "learning_rate": 7.963944856839873e-06, | |
| "loss": 0.3646, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 4.224107458465889, | |
| "grad_norm": 3.174678325653076, | |
| "learning_rate": 7.787203958996112e-06, | |
| "loss": 0.4027, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 4.241781548250265, | |
| "grad_norm": 4.445051193237305, | |
| "learning_rate": 7.610463061152351e-06, | |
| "loss": 0.4111, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 4.259455638034641, | |
| "grad_norm": 3.7955079078674316, | |
| "learning_rate": 7.43372216330859e-06, | |
| "loss": 0.3815, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 4.277129727819017, | |
| "grad_norm": 3.0276503562927246, | |
| "learning_rate": 7.256981265464829e-06, | |
| "loss": 0.3765, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 4.294803817603394, | |
| "grad_norm": 1.8871873617172241, | |
| "learning_rate": 7.080240367621067e-06, | |
| "loss": 0.3771, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 4.31247790738777, | |
| "grad_norm": 9.927197456359863, | |
| "learning_rate": 6.903499469777307e-06, | |
| "loss": 0.4112, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 4.330151997172146, | |
| "grad_norm": 4.721640586853027, | |
| "learning_rate": 6.726758571933546e-06, | |
| "loss": 0.4451, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 4.3478260869565215, | |
| "grad_norm": 2.7340986728668213, | |
| "learning_rate": 6.550017674089785e-06, | |
| "loss": 0.4254, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 4.365500176740898, | |
| "grad_norm": 3.780824661254883, | |
| "learning_rate": 6.373276776246023e-06, | |
| "loss": 0.4557, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 4.383174266525274, | |
| "grad_norm": 3.429931640625, | |
| "learning_rate": 6.1965358784022625e-06, | |
| "loss": 0.3858, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 4.40084835630965, | |
| "grad_norm": 3.944438934326172, | |
| "learning_rate": 6.019794980558501e-06, | |
| "loss": 0.3569, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 4.418522446094026, | |
| "grad_norm": 2.19978666305542, | |
| "learning_rate": 5.843054082714741e-06, | |
| "loss": 0.4232, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 4.436196535878402, | |
| "grad_norm": 1.6702100038528442, | |
| "learning_rate": 5.666313184870979e-06, | |
| "loss": 0.4216, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 4.453870625662779, | |
| "grad_norm": 5.39310884475708, | |
| "learning_rate": 5.4895722870272184e-06, | |
| "loss": 0.393, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 4.471544715447155, | |
| "grad_norm": 2.8727235794067383, | |
| "learning_rate": 5.312831389183457e-06, | |
| "loss": 0.4104, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 4.4892188052315305, | |
| "grad_norm": 1.9998319149017334, | |
| "learning_rate": 5.136090491339696e-06, | |
| "loss": 0.4592, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 4.506892895015906, | |
| "grad_norm": 3.140760660171509, | |
| "learning_rate": 4.959349593495935e-06, | |
| "loss": 0.3582, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 4.524566984800282, | |
| "grad_norm": 4.489378929138184, | |
| "learning_rate": 4.782608695652174e-06, | |
| "loss": 0.3891, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 4.542241074584659, | |
| "grad_norm": 3.2630345821380615, | |
| "learning_rate": 4.605867797808413e-06, | |
| "loss": 0.3703, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 4.559915164369035, | |
| "grad_norm": 2.898639440536499, | |
| "learning_rate": 4.429126899964652e-06, | |
| "loss": 0.4201, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 4.577589254153411, | |
| "grad_norm": 3.266235589981079, | |
| "learning_rate": 4.252386002120891e-06, | |
| "loss": 0.4142, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 4.595263343937788, | |
| "grad_norm": 3.594919204711914, | |
| "learning_rate": 4.07564510427713e-06, | |
| "loss": 0.4224, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 4.612937433722164, | |
| "grad_norm": 4.3656439781188965, | |
| "learning_rate": 3.898904206433369e-06, | |
| "loss": 0.4155, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 4.6306115235065395, | |
| "grad_norm": 3.863250494003296, | |
| "learning_rate": 3.722163308589608e-06, | |
| "loss": 0.3933, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 4.648285613290915, | |
| "grad_norm": 3.6120657920837402, | |
| "learning_rate": 3.5454224107458466e-06, | |
| "loss": 0.4009, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 4.665959703075291, | |
| "grad_norm": 3.4946892261505127, | |
| "learning_rate": 3.368681512902086e-06, | |
| "loss": 0.4538, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 4.683633792859668, | |
| "grad_norm": 3.0893940925598145, | |
| "learning_rate": 3.1919406150583245e-06, | |
| "loss": 0.3429, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 4.701307882644044, | |
| "grad_norm": 3.190537929534912, | |
| "learning_rate": 3.0151997172145637e-06, | |
| "loss": 0.489, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 4.71898197242842, | |
| "grad_norm": 5.128622531890869, | |
| "learning_rate": 2.8384588193708025e-06, | |
| "loss": 0.3926, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 4.736656062212796, | |
| "grad_norm": 3.6680729389190674, | |
| "learning_rate": 2.6617179215270417e-06, | |
| "loss": 0.3859, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 4.754330151997172, | |
| "grad_norm": 2.93373703956604, | |
| "learning_rate": 2.4849770236832804e-06, | |
| "loss": 0.3637, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 4.7720042417815485, | |
| "grad_norm": 3.649087429046631, | |
| "learning_rate": 2.3082361258395196e-06, | |
| "loss": 0.4155, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.789678331565924, | |
| "grad_norm": 1.2933834791183472, | |
| "learning_rate": 2.1314952279957584e-06, | |
| "loss": 0.3868, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 4.8073524213503, | |
| "grad_norm": 2.177612781524658, | |
| "learning_rate": 1.9547543301519976e-06, | |
| "loss": 0.4799, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 4.825026511134676, | |
| "grad_norm": 3.7405126094818115, | |
| "learning_rate": 1.7780134323082363e-06, | |
| "loss": 0.455, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 4.842700600919053, | |
| "grad_norm": 6.44041633605957, | |
| "learning_rate": 1.6048073524213503e-06, | |
| "loss": 0.3766, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 4.860374690703429, | |
| "grad_norm": 2.510866165161133, | |
| "learning_rate": 1.4280664545775892e-06, | |
| "loss": 0.3779, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 4.878048780487805, | |
| "grad_norm": 4.789300441741943, | |
| "learning_rate": 1.2513255567338282e-06, | |
| "loss": 0.3892, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 4.895722870272181, | |
| "grad_norm": 2.6004765033721924, | |
| "learning_rate": 1.0745846588900672e-06, | |
| "loss": 0.3897, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 4.9133969600565575, | |
| "grad_norm": 4.115776062011719, | |
| "learning_rate": 8.978437610463062e-07, | |
| "loss": 0.3896, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 4.931071049840933, | |
| "grad_norm": 9.11878776550293, | |
| "learning_rate": 7.211028632025451e-07, | |
| "loss": 0.3918, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 4.948745139625309, | |
| "grad_norm": 3.8003361225128174, | |
| "learning_rate": 5.44361965358784e-07, | |
| "loss": 0.476, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.966419229409685, | |
| "grad_norm": 2.117197275161743, | |
| "learning_rate": 3.67621067515023e-07, | |
| "loss": 0.373, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 4.984093319194061, | |
| "grad_norm": 1.8130935430526733, | |
| "learning_rate": 1.9088016967126194e-07, | |
| "loss": 0.4102, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bertscore_f1": 0.9660587414250811, | |
| "eval_bleu": 0.5882761107143478, | |
| "eval_loss": 0.29442909359931946, | |
| "eval_meteor": 0.7392640094761435, | |
| "eval_rouge1": 0.8386605714105622, | |
| "eval_rouge2": 0.7781271007162897, | |
| "eval_runtime": 1266.3046, | |
| "eval_samples_per_second": 5.105, | |
| "eval_steps_per_second": 0.638, | |
| "step": 14145 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 14145, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.730048539557888e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |