| { | |
| "best_global_step": 8487, | |
| "best_metric": 0.9710737692450385, | |
| "best_model_checkpoint": "./codet5-qlora-k8s/checkpoint-8487", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 8487, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017674089784376106, | |
| "grad_norm": 1.1207759380340576, | |
| "learning_rate": 0.00029898197242841994, | |
| "loss": 3.2886, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03534817956875221, | |
| "grad_norm": 1.2978123426437378, | |
| "learning_rate": 0.0002979215270413573, | |
| "loss": 1.8567, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.053022269353128315, | |
| "grad_norm": 1.624740719795227, | |
| "learning_rate": 0.0002968610816542948, | |
| "loss": 1.5695, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07069635913750442, | |
| "grad_norm": 1.7711330652236938, | |
| "learning_rate": 0.0002958006362672322, | |
| "loss": 1.4205, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08837044892188052, | |
| "grad_norm": 1.62517511844635, | |
| "learning_rate": 0.0002947401908801697, | |
| "loss": 1.2732, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10604453870625663, | |
| "grad_norm": 2.038139820098877, | |
| "learning_rate": 0.00029367974549310706, | |
| "loss": 1.1913, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12371862849063273, | |
| "grad_norm": 2.262789487838745, | |
| "learning_rate": 0.00029264050901378576, | |
| "loss": 1.117, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14139271827500885, | |
| "grad_norm": 3.121687650680542, | |
| "learning_rate": 0.0002915800636267232, | |
| "loss": 1.0202, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15906680805938495, | |
| "grad_norm": 2.0951812267303467, | |
| "learning_rate": 0.0002905196182396606, | |
| "loss": 0.9499, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.17674089784376104, | |
| "grad_norm": 2.670121192932129, | |
| "learning_rate": 0.00028945917285259806, | |
| "loss": 0.9707, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19441498762813716, | |
| "grad_norm": 2.3631107807159424, | |
| "learning_rate": 0.00028841993637327676, | |
| "loss": 0.7961, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.21208907741251326, | |
| "grad_norm": 2.10772705078125, | |
| "learning_rate": 0.0002873594909862142, | |
| "loss": 0.8912, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.22976316719688936, | |
| "grad_norm": 2.360686779022217, | |
| "learning_rate": 0.00028629904559915163, | |
| "loss": 0.871, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.24743725698126545, | |
| "grad_norm": 2.191119432449341, | |
| "learning_rate": 0.0002852598091198303, | |
| "loss": 0.758, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2651113467656416, | |
| "grad_norm": 1.7646818161010742, | |
| "learning_rate": 0.00028419936373276776, | |
| "loss": 0.8244, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2827854365500177, | |
| "grad_norm": 2.3776354789733887, | |
| "learning_rate": 0.00028313891834570514, | |
| "loss": 0.7664, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.30045952633439377, | |
| "grad_norm": 2.8682475090026855, | |
| "learning_rate": 0.00028207847295864263, | |
| "loss": 0.6942, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3181336161187699, | |
| "grad_norm": 2.353091239929199, | |
| "learning_rate": 0.00028101802757158, | |
| "loss": 0.7323, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.335807705903146, | |
| "grad_norm": 1.9457337856292725, | |
| "learning_rate": 0.00027995758218451745, | |
| "loss": 0.6474, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3534817956875221, | |
| "grad_norm": 2.510075330734253, | |
| "learning_rate": 0.00027889713679745494, | |
| "loss": 0.6801, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3711558854718982, | |
| "grad_norm": 1.7497014999389648, | |
| "learning_rate": 0.0002778366914103923, | |
| "loss": 0.656, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.38882997525627433, | |
| "grad_norm": 2.862682342529297, | |
| "learning_rate": 0.0002767762460233298, | |
| "loss": 0.6238, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4065040650406504, | |
| "grad_norm": 1.998961091041565, | |
| "learning_rate": 0.0002757158006362672, | |
| "loss": 0.6306, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4241781548250265, | |
| "grad_norm": 1.854942798614502, | |
| "learning_rate": 0.0002746553552492047, | |
| "loss": 0.5689, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4418522446094026, | |
| "grad_norm": 1.8994203805923462, | |
| "learning_rate": 0.00027359490986214206, | |
| "loss": 0.6595, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4595263343937787, | |
| "grad_norm": 1.6235908269882202, | |
| "learning_rate": 0.0002725344644750795, | |
| "loss": 0.5665, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.47720042417815484, | |
| "grad_norm": 2.291989803314209, | |
| "learning_rate": 0.00027147401908801693, | |
| "loss": 0.5761, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4948745139625309, | |
| "grad_norm": 1.4632915258407593, | |
| "learning_rate": 0.00027041357370095437, | |
| "loss": 0.5171, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.512548603746907, | |
| "grad_norm": 2.1687259674072266, | |
| "learning_rate": 0.0002693531283138918, | |
| "loss": 0.6183, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5302226935312832, | |
| "grad_norm": 1.734108805656433, | |
| "learning_rate": 0.00026829268292682924, | |
| "loss": 0.5411, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5478967833156593, | |
| "grad_norm": 1.3890644311904907, | |
| "learning_rate": 0.00026723223753976667, | |
| "loss": 0.5092, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5655708731000354, | |
| "grad_norm": 1.98700749874115, | |
| "learning_rate": 0.0002661717921527041, | |
| "loss": 0.4804, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5832449628844114, | |
| "grad_norm": 1.1181468963623047, | |
| "learning_rate": 0.00026511134676564154, | |
| "loss": 0.5148, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6009190526687875, | |
| "grad_norm": 1.7994420528411865, | |
| "learning_rate": 0.000264050901378579, | |
| "loss": 0.4231, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6185931424531637, | |
| "grad_norm": 2.032198667526245, | |
| "learning_rate": 0.0002629904559915164, | |
| "loss": 0.5106, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6362672322375398, | |
| "grad_norm": 3.585948944091797, | |
| "learning_rate": 0.00026193001060445385, | |
| "loss": 0.4717, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6539413220219159, | |
| "grad_norm": 1.8610371351242065, | |
| "learning_rate": 0.0002608695652173913, | |
| "loss": 0.4765, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.671615411806292, | |
| "grad_norm": 1.2324624061584473, | |
| "learning_rate": 0.0002598091198303287, | |
| "loss": 0.4643, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.689289501590668, | |
| "grad_norm": 2.391714572906494, | |
| "learning_rate": 0.00025874867444326615, | |
| "loss": 0.4512, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7069635913750442, | |
| "grad_norm": 1.8863242864608765, | |
| "learning_rate": 0.0002576882290562036, | |
| "loss": 0.4115, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7246376811594203, | |
| "grad_norm": 0.7850649356842041, | |
| "learning_rate": 0.000256627783669141, | |
| "loss": 0.4341, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7423117709437964, | |
| "grad_norm": 1.5869959592819214, | |
| "learning_rate": 0.00025556733828207846, | |
| "loss": 0.4172, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7599858607281725, | |
| "grad_norm": 1.2584971189498901, | |
| "learning_rate": 0.0002545068928950159, | |
| "loss": 0.4384, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.7776599505125487, | |
| "grad_norm": 2.560710906982422, | |
| "learning_rate": 0.00025344644750795333, | |
| "loss": 0.4558, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7953340402969247, | |
| "grad_norm": 2.2893359661102295, | |
| "learning_rate": 0.00025238600212089076, | |
| "loss": 0.4345, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8130081300813008, | |
| "grad_norm": 1.5244982242584229, | |
| "learning_rate": 0.0002513255567338282, | |
| "loss": 0.4071, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8306822198656769, | |
| "grad_norm": 1.384102463722229, | |
| "learning_rate": 0.00025026511134676563, | |
| "loss": 0.3612, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.848356309650053, | |
| "grad_norm": 1.3080965280532837, | |
| "learning_rate": 0.00024920466595970307, | |
| "loss": 0.3556, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8660303994344292, | |
| "grad_norm": 1.3324400186538696, | |
| "learning_rate": 0.00024814422057264045, | |
| "loss": 0.3985, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8837044892188052, | |
| "grad_norm": 1.7705445289611816, | |
| "learning_rate": 0.00024708377518557794, | |
| "loss": 0.3895, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9013785790031813, | |
| "grad_norm": 1.352480173110962, | |
| "learning_rate": 0.0002460233297985153, | |
| "loss": 0.426, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9190526687875574, | |
| "grad_norm": 1.479979157447815, | |
| "learning_rate": 0.0002449628844114528, | |
| "loss": 0.4057, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9367267585719335, | |
| "grad_norm": 2.1380653381347656, | |
| "learning_rate": 0.00024390243902439022, | |
| "loss": 0.3689, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9544008483563097, | |
| "grad_norm": 1.9099682569503784, | |
| "learning_rate": 0.00024284199363732768, | |
| "loss": 0.3991, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.9720749381406858, | |
| "grad_norm": 1.399566411972046, | |
| "learning_rate": 0.0002417815482502651, | |
| "loss": 0.3412, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.9897490279250618, | |
| "grad_norm": 2.508267879486084, | |
| "learning_rate": 0.00024072110286320252, | |
| "loss": 0.3828, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bertscore_f1": 0.9667777874331811, | |
| "eval_bleu": 0.5973566262792636, | |
| "eval_loss": 0.27053505182266235, | |
| "eval_runtime": 1054.1237, | |
| "eval_samples_per_second": 6.132, | |
| "eval_steps_per_second": 0.767, | |
| "step": 2829 | |
| }, | |
| { | |
| "epoch": 1.007423117709438, | |
| "grad_norm": 1.6967344284057617, | |
| "learning_rate": 0.00023966065747613996, | |
| "loss": 0.3787, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.025097207493814, | |
| "grad_norm": 1.7119196653366089, | |
| "learning_rate": 0.0002386002120890774, | |
| "loss": 0.3507, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.0427712972781902, | |
| "grad_norm": 1.5456138849258423, | |
| "learning_rate": 0.00023753976670201483, | |
| "loss": 0.333, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.0604453870625663, | |
| "grad_norm": 1.3519443273544312, | |
| "learning_rate": 0.00023647932131495226, | |
| "loss": 0.3897, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.0781194768469424, | |
| "grad_norm": 1.4092153310775757, | |
| "learning_rate": 0.0002354188759278897, | |
| "loss": 0.3069, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.0957935666313185, | |
| "grad_norm": 1.67427659034729, | |
| "learning_rate": 0.00023435843054082713, | |
| "loss": 0.3876, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.1134676564156947, | |
| "grad_norm": 0.9288003444671631, | |
| "learning_rate": 0.00023329798515376457, | |
| "loss": 0.3052, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.1311417462000706, | |
| "grad_norm": 2.0493695735931396, | |
| "learning_rate": 0.000232237539766702, | |
| "loss": 0.3419, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.148815835984447, | |
| "grad_norm": 1.3473105430603027, | |
| "learning_rate": 0.0002311770943796394, | |
| "loss": 0.351, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.1664899257688228, | |
| "grad_norm": 2.2063777446746826, | |
| "learning_rate": 0.00023011664899257687, | |
| "loss": 0.3732, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.184164015553199, | |
| "grad_norm": 0.7194732427597046, | |
| "learning_rate": 0.00022905620360551428, | |
| "loss": 0.3098, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.201838105337575, | |
| "grad_norm": 1.8693958520889282, | |
| "learning_rate": 0.00022799575821845174, | |
| "loss": 0.3623, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.2195121951219512, | |
| "grad_norm": 1.7452648878097534, | |
| "learning_rate": 0.00022693531283138915, | |
| "loss": 0.2985, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.2371862849063273, | |
| "grad_norm": 2.7502336502075195, | |
| "learning_rate": 0.00022587486744432661, | |
| "loss": 0.2938, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.2548603746907034, | |
| "grad_norm": 1.0220433473587036, | |
| "learning_rate": 0.00022481442205726402, | |
| "loss": 0.3263, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.2725344644750796, | |
| "grad_norm": 1.1841455698013306, | |
| "learning_rate": 0.00022375397667020146, | |
| "loss": 0.3456, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.2902085542594557, | |
| "grad_norm": 1.1220083236694336, | |
| "learning_rate": 0.0002226935312831389, | |
| "loss": 0.3749, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.3078826440438318, | |
| "grad_norm": 2.557077646255493, | |
| "learning_rate": 0.00022163308589607633, | |
| "loss": 0.3479, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.3255567338282077, | |
| "grad_norm": 1.672131061553955, | |
| "learning_rate": 0.00022057264050901376, | |
| "loss": 0.3371, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.343230823612584, | |
| "grad_norm": 1.5530970096588135, | |
| "learning_rate": 0.0002195121951219512, | |
| "loss": 0.3062, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.36090491339696, | |
| "grad_norm": 0.8587738871574402, | |
| "learning_rate": 0.00021845174973488866, | |
| "loss": 0.3458, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.378579003181336, | |
| "grad_norm": 1.2779722213745117, | |
| "learning_rate": 0.00021739130434782607, | |
| "loss": 0.3582, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.3962530929657122, | |
| "grad_norm": 1.7616783380508423, | |
| "learning_rate": 0.00021633085896076348, | |
| "loss": 0.2999, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.4139271827500883, | |
| "grad_norm": 1.2923225164413452, | |
| "learning_rate": 0.00021527041357370094, | |
| "loss": 0.3079, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.4316012725344645, | |
| "grad_norm": 0.7930673360824585, | |
| "learning_rate": 0.00021420996818663835, | |
| "loss": 0.2973, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.4492753623188406, | |
| "grad_norm": 1.5622656345367432, | |
| "learning_rate": 0.0002131495227995758, | |
| "loss": 0.291, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.4669494521032167, | |
| "grad_norm": 0.8834390640258789, | |
| "learning_rate": 0.00021208907741251324, | |
| "loss": 0.2691, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.4846235418875928, | |
| "grad_norm": 1.2596232891082764, | |
| "learning_rate": 0.00021102863202545068, | |
| "loss": 0.247, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.502297631671969, | |
| "grad_norm": 0.7010456323623657, | |
| "learning_rate": 0.00020996818663838811, | |
| "loss": 0.3019, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.5199717214563448, | |
| "grad_norm": 1.071253776550293, | |
| "learning_rate": 0.00020890774125132552, | |
| "loss": 0.2447, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.5376458112407212, | |
| "grad_norm": 0.63275545835495, | |
| "learning_rate": 0.00020784729586426298, | |
| "loss": 0.246, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.555319901025097, | |
| "grad_norm": 1.0832668542861938, | |
| "learning_rate": 0.0002067868504772004, | |
| "loss": 0.249, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.5729939908094734, | |
| "grad_norm": 1.0748353004455566, | |
| "learning_rate": 0.00020572640509013785, | |
| "loss": 0.2585, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.5906680805938493, | |
| "grad_norm": 1.2410573959350586, | |
| "learning_rate": 0.00020466595970307526, | |
| "loss": 0.2821, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.6083421703782255, | |
| "grad_norm": 1.8322285413742065, | |
| "learning_rate": 0.00020360551431601272, | |
| "loss": 0.2642, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.6260162601626016, | |
| "grad_norm": 1.5231540203094482, | |
| "learning_rate": 0.00020254506892895013, | |
| "loss": 0.329, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.6436903499469777, | |
| "grad_norm": 0.8996387124061584, | |
| "learning_rate": 0.0002014846235418876, | |
| "loss": 0.2822, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.6613644397313538, | |
| "grad_norm": 1.107340693473816, | |
| "learning_rate": 0.000200424178154825, | |
| "loss": 0.2647, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.67903852951573, | |
| "grad_norm": 1.44370698928833, | |
| "learning_rate": 0.00019936373276776244, | |
| "loss": 0.3281, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.696712619300106, | |
| "grad_norm": 1.433866024017334, | |
| "learning_rate": 0.00019830328738069987, | |
| "loss": 0.2867, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.714386709084482, | |
| "grad_norm": 0.7778879404067993, | |
| "learning_rate": 0.0001972428419936373, | |
| "loss": 0.2363, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.7320607988688583, | |
| "grad_norm": 1.0693784952163696, | |
| "learning_rate": 0.00019618239660657474, | |
| "loss": 0.2989, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.7497348886532342, | |
| "grad_norm": 0.9680020213127136, | |
| "learning_rate": 0.00019512195121951218, | |
| "loss": 0.2512, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.7674089784376106, | |
| "grad_norm": 0.9300338625907898, | |
| "learning_rate": 0.00019406150583244961, | |
| "loss": 0.2814, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.7850830682219865, | |
| "grad_norm": 1.6086584329605103, | |
| "learning_rate": 0.00019300106044538705, | |
| "loss": 0.2895, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.8027571580063628, | |
| "grad_norm": 1.522153615951538, | |
| "learning_rate": 0.00019194061505832446, | |
| "loss": 0.2804, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.8204312477907387, | |
| "grad_norm": 1.3292605876922607, | |
| "learning_rate": 0.00019088016967126192, | |
| "loss": 0.2676, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.8381053375751149, | |
| "grad_norm": 1.0950225591659546, | |
| "learning_rate": 0.00018981972428419933, | |
| "loss": 0.2991, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.855779427359491, | |
| "grad_norm": 1.8333910703659058, | |
| "learning_rate": 0.0001887592788971368, | |
| "loss": 0.2742, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.873453517143867, | |
| "grad_norm": 1.447016716003418, | |
| "learning_rate": 0.0001876988335100742, | |
| "loss": 0.2125, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.8911276069282432, | |
| "grad_norm": 1.0409213304519653, | |
| "learning_rate": 0.00018663838812301166, | |
| "loss": 0.2372, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.9088016967126193, | |
| "grad_norm": 0.5701714158058167, | |
| "learning_rate": 0.00018557794273594907, | |
| "loss": 0.2332, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.9264757864969955, | |
| "grad_norm": 1.0092428922653198, | |
| "learning_rate": 0.0001845174973488865, | |
| "loss": 0.2897, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.9441498762813714, | |
| "grad_norm": 1.031217098236084, | |
| "learning_rate": 0.00018345705196182397, | |
| "loss": 0.2722, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.9618239660657477, | |
| "grad_norm": 1.2638362646102905, | |
| "learning_rate": 0.00018239660657476137, | |
| "loss": 0.2505, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.9794980558501236, | |
| "grad_norm": 1.3998290300369263, | |
| "learning_rate": 0.00018133616118769884, | |
| "loss": 0.2772, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.9971721456345, | |
| "grad_norm": 1.4681320190429688, | |
| "learning_rate": 0.00018027571580063624, | |
| "loss": 0.25, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bertscore_f1": 0.970109825833968, | |
| "eval_bleu": 0.6229920961802436, | |
| "eval_loss": 0.1904931217432022, | |
| "eval_runtime": 1018.8038, | |
| "eval_samples_per_second": 6.345, | |
| "eval_steps_per_second": 0.793, | |
| "step": 5658 | |
| }, | |
| { | |
| "epoch": 2.014846235418876, | |
| "grad_norm": 1.3499983549118042, | |
| "learning_rate": 0.0001792152704135737, | |
| "loss": 0.2412, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.032520325203252, | |
| "grad_norm": 1.2155545949935913, | |
| "learning_rate": 0.00017815482502651111, | |
| "loss": 0.2919, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.050194414987628, | |
| "grad_norm": 0.9294681549072266, | |
| "learning_rate": 0.00017709437963944858, | |
| "loss": 0.2191, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.0678685047720045, | |
| "grad_norm": 0.8069599270820618, | |
| "learning_rate": 0.00017603393425238598, | |
| "loss": 0.228, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.0855425945563804, | |
| "grad_norm": 1.1825474500656128, | |
| "learning_rate": 0.00017497348886532342, | |
| "loss": 0.2422, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.1032166843407563, | |
| "grad_norm": 1.2947015762329102, | |
| "learning_rate": 0.00017391304347826085, | |
| "loss": 0.2333, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.1208907741251326, | |
| "grad_norm": 1.0622906684875488, | |
| "learning_rate": 0.0001728525980911983, | |
| "loss": 0.2029, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.1385648639095085, | |
| "grad_norm": 0.8785162568092346, | |
| "learning_rate": 0.00017179215270413572, | |
| "loss": 0.2039, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.156238953693885, | |
| "grad_norm": 0.3702610433101654, | |
| "learning_rate": 0.00017073170731707316, | |
| "loss": 0.2631, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 1.0092154741287231, | |
| "learning_rate": 0.0001696712619300106, | |
| "loss": 0.2325, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.191587133262637, | |
| "grad_norm": 1.648000955581665, | |
| "learning_rate": 0.00016861081654294803, | |
| "loss": 0.2501, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.209261223047013, | |
| "grad_norm": 0.979069173336029, | |
| "learning_rate": 0.00016755037115588544, | |
| "loss": 0.256, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.2269353128313893, | |
| "grad_norm": 1.459558129310608, | |
| "learning_rate": 0.0001664899257688229, | |
| "loss": 0.2603, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.2446094026157652, | |
| "grad_norm": 1.5793472528457642, | |
| "learning_rate": 0.0001654294803817603, | |
| "loss": 0.2564, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.262283492400141, | |
| "grad_norm": 1.1787140369415283, | |
| "learning_rate": 0.00016436903499469777, | |
| "loss": 0.2782, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.2799575821845175, | |
| "grad_norm": 1.041374683380127, | |
| "learning_rate": 0.00016330858960763518, | |
| "loss": 0.2331, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.297631671968894, | |
| "grad_norm": 0.7799555063247681, | |
| "learning_rate": 0.00016224814422057264, | |
| "loss": 0.2338, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.3153057617532697, | |
| "grad_norm": 1.4405689239501953, | |
| "learning_rate": 0.00016118769883351005, | |
| "loss": 0.2737, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.3329798515376456, | |
| "grad_norm": 0.979608416557312, | |
| "learning_rate": 0.00016012725344644748, | |
| "loss": 0.2495, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.350653941322022, | |
| "grad_norm": 0.9300618171691895, | |
| "learning_rate": 0.00015906680805938492, | |
| "loss": 0.2157, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.368328031106398, | |
| "grad_norm": 0.8745370507240295, | |
| "learning_rate": 0.00015800636267232235, | |
| "loss": 0.2837, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.3860021208907742, | |
| "grad_norm": 0.9898168444633484, | |
| "learning_rate": 0.00015694591728525982, | |
| "loss": 0.221, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.40367621067515, | |
| "grad_norm": 0.8933513760566711, | |
| "learning_rate": 0.00015588547189819722, | |
| "loss": 0.1994, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.4213503004595265, | |
| "grad_norm": 1.7144904136657715, | |
| "learning_rate": 0.0001548250265111347, | |
| "loss": 0.2429, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.4390243902439024, | |
| "grad_norm": 1.5800135135650635, | |
| "learning_rate": 0.0001537645811240721, | |
| "loss": 0.2284, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.4566984800282787, | |
| "grad_norm": 1.0567731857299805, | |
| "learning_rate": 0.0001527041357370095, | |
| "loss": 0.2028, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.4743725698126546, | |
| "grad_norm": 0.59196537733078, | |
| "learning_rate": 0.00015164369034994697, | |
| "loss": 0.228, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.4920466595970305, | |
| "grad_norm": 1.0257049798965454, | |
| "learning_rate": 0.00015058324496288437, | |
| "loss": 0.2196, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.509720749381407, | |
| "grad_norm": 1.500623345375061, | |
| "learning_rate": 0.00014952279957582184, | |
| "loss": 0.2351, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.5273948391657832, | |
| "grad_norm": 1.1046085357666016, | |
| "learning_rate": 0.00014846235418875927, | |
| "loss": 0.2595, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.545068928950159, | |
| "grad_norm": 1.2226991653442383, | |
| "learning_rate": 0.0001474019088016967, | |
| "loss": 0.1914, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.562743018734535, | |
| "grad_norm": 0.6742298007011414, | |
| "learning_rate": 0.00014634146341463414, | |
| "loss": 0.2096, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.5804171085189114, | |
| "grad_norm": 1.5504461526870728, | |
| "learning_rate": 0.00014528101802757158, | |
| "loss": 0.2051, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.5980911983032873, | |
| "grad_norm": 0.9681800603866577, | |
| "learning_rate": 0.000144220572640509, | |
| "loss": 0.2327, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.6157652880876636, | |
| "grad_norm": 0.9383839964866638, | |
| "learning_rate": 0.00014316012725344645, | |
| "loss": 0.2344, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.6334393778720395, | |
| "grad_norm": 0.6154807209968567, | |
| "learning_rate": 0.00014209968186638388, | |
| "loss": 0.2383, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.6511134676564154, | |
| "grad_norm": 1.2676986455917358, | |
| "learning_rate": 0.00014103923647932132, | |
| "loss": 0.2257, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.6687875574407918, | |
| "grad_norm": 1.183440089225769, | |
| "learning_rate": 0.00013997879109225872, | |
| "loss": 0.2102, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.686461647225168, | |
| "grad_norm": 0.7244306802749634, | |
| "learning_rate": 0.00013891834570519616, | |
| "loss": 0.2146, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.704135737009544, | |
| "grad_norm": 1.187232494354248, | |
| "learning_rate": 0.0001378579003181336, | |
| "loss": 0.2119, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.72180982679392, | |
| "grad_norm": 1.4510794878005981, | |
| "learning_rate": 0.00013679745493107103, | |
| "loss": 0.1916, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.7394839165782963, | |
| "grad_norm": 1.383832335472107, | |
| "learning_rate": 0.00013573700954400847, | |
| "loss": 0.2179, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.757158006362672, | |
| "grad_norm": 0.9274504780769348, | |
| "learning_rate": 0.0001346765641569459, | |
| "loss": 0.199, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.7748320961470485, | |
| "grad_norm": 2.6429216861724854, | |
| "learning_rate": 0.00013361611876988334, | |
| "loss": 0.2407, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.7925061859314244, | |
| "grad_norm": 1.3947652578353882, | |
| "learning_rate": 0.00013255567338282077, | |
| "loss": 0.2019, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.8101802757158008, | |
| "grad_norm": 1.148478627204895, | |
| "learning_rate": 0.0001314952279957582, | |
| "loss": 0.205, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.8278543655001767, | |
| "grad_norm": 1.1087610721588135, | |
| "learning_rate": 0.00013043478260869564, | |
| "loss": 0.2527, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.845528455284553, | |
| "grad_norm": 1.4348084926605225, | |
| "learning_rate": 0.00012937433722163308, | |
| "loss": 0.2465, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 2.863202545068929, | |
| "grad_norm": 1.2600926160812378, | |
| "learning_rate": 0.0001283138918345705, | |
| "loss": 0.1699, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.880876634853305, | |
| "grad_norm": 0.8724793195724487, | |
| "learning_rate": 0.00012725344644750795, | |
| "loss": 0.2257, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 2.898550724637681, | |
| "grad_norm": 1.5324125289916992, | |
| "learning_rate": 0.00012619300106044538, | |
| "loss": 0.2002, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.9162248144220575, | |
| "grad_norm": 1.0066156387329102, | |
| "learning_rate": 0.00012513255567338282, | |
| "loss": 0.192, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 2.9338989042064334, | |
| "grad_norm": 0.4273667633533478, | |
| "learning_rate": 0.00012407211028632022, | |
| "loss": 0.1758, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.9515729939908093, | |
| "grad_norm": 0.6536590456962585, | |
| "learning_rate": 0.00012301166489925766, | |
| "loss": 0.1905, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 2.9692470837751856, | |
| "grad_norm": 0.6973742246627808, | |
| "learning_rate": 0.00012195121951219511, | |
| "loss": 0.2116, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.9869211735595615, | |
| "grad_norm": 0.9764792919158936, | |
| "learning_rate": 0.00012089077412513254, | |
| "loss": 0.2098, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bertscore_f1": 0.9710737692450385, | |
| "eval_bleu": 0.6330045835354601, | |
| "eval_loss": 0.15814107656478882, | |
| "eval_runtime": 1020.076, | |
| "eval_samples_per_second": 6.337, | |
| "eval_steps_per_second": 0.792, | |
| "step": 8487 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 14145, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0380291237347328e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |