| { | |
| "best_global_step": 2829, | |
| "best_metric": 0.9667777874331811, | |
| "best_model_checkpoint": "./codet5-qlora-k8s/checkpoint-2829", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 2829, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017674089784376106, | |
| "grad_norm": 1.1207759380340576, | |
| "learning_rate": 0.00029898197242841994, | |
| "loss": 3.2886, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03534817956875221, | |
| "grad_norm": 1.2978123426437378, | |
| "learning_rate": 0.0002979215270413573, | |
| "loss": 1.8567, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.053022269353128315, | |
| "grad_norm": 1.624740719795227, | |
| "learning_rate": 0.0002968610816542948, | |
| "loss": 1.5695, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07069635913750442, | |
| "grad_norm": 1.7711330652236938, | |
| "learning_rate": 0.0002958006362672322, | |
| "loss": 1.4205, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08837044892188052, | |
| "grad_norm": 1.62517511844635, | |
| "learning_rate": 0.0002947401908801697, | |
| "loss": 1.2732, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10604453870625663, | |
| "grad_norm": 2.038139820098877, | |
| "learning_rate": 0.00029367974549310706, | |
| "loss": 1.1913, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12371862849063273, | |
| "grad_norm": 2.262789487838745, | |
| "learning_rate": 0.00029264050901378576, | |
| "loss": 1.117, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14139271827500885, | |
| "grad_norm": 3.121687650680542, | |
| "learning_rate": 0.0002915800636267232, | |
| "loss": 1.0202, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15906680805938495, | |
| "grad_norm": 2.0951812267303467, | |
| "learning_rate": 0.0002905196182396606, | |
| "loss": 0.9499, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.17674089784376104, | |
| "grad_norm": 2.670121192932129, | |
| "learning_rate": 0.00028945917285259806, | |
| "loss": 0.9707, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19441498762813716, | |
| "grad_norm": 2.3631107807159424, | |
| "learning_rate": 0.00028841993637327676, | |
| "loss": 0.7961, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.21208907741251326, | |
| "grad_norm": 2.10772705078125, | |
| "learning_rate": 0.0002873594909862142, | |
| "loss": 0.8912, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.22976316719688936, | |
| "grad_norm": 2.360686779022217, | |
| "learning_rate": 0.00028629904559915163, | |
| "loss": 0.871, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.24743725698126545, | |
| "grad_norm": 2.191119432449341, | |
| "learning_rate": 0.0002852598091198303, | |
| "loss": 0.758, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2651113467656416, | |
| "grad_norm": 1.7646818161010742, | |
| "learning_rate": 0.00028419936373276776, | |
| "loss": 0.8244, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2827854365500177, | |
| "grad_norm": 2.3776354789733887, | |
| "learning_rate": 0.00028313891834570514, | |
| "loss": 0.7664, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.30045952633439377, | |
| "grad_norm": 2.8682475090026855, | |
| "learning_rate": 0.00028207847295864263, | |
| "loss": 0.6942, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3181336161187699, | |
| "grad_norm": 2.353091239929199, | |
| "learning_rate": 0.00028101802757158, | |
| "loss": 0.7323, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.335807705903146, | |
| "grad_norm": 1.9457337856292725, | |
| "learning_rate": 0.00027995758218451745, | |
| "loss": 0.6474, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3534817956875221, | |
| "grad_norm": 2.510075330734253, | |
| "learning_rate": 0.00027889713679745494, | |
| "loss": 0.6801, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3711558854718982, | |
| "grad_norm": 1.7497014999389648, | |
| "learning_rate": 0.0002778366914103923, | |
| "loss": 0.656, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.38882997525627433, | |
| "grad_norm": 2.862682342529297, | |
| "learning_rate": 0.0002767762460233298, | |
| "loss": 0.6238, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4065040650406504, | |
| "grad_norm": 1.998961091041565, | |
| "learning_rate": 0.0002757158006362672, | |
| "loss": 0.6306, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4241781548250265, | |
| "grad_norm": 1.854942798614502, | |
| "learning_rate": 0.0002746553552492047, | |
| "loss": 0.5689, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4418522446094026, | |
| "grad_norm": 1.8994203805923462, | |
| "learning_rate": 0.00027359490986214206, | |
| "loss": 0.6595, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4595263343937787, | |
| "grad_norm": 1.6235908269882202, | |
| "learning_rate": 0.0002725344644750795, | |
| "loss": 0.5665, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.47720042417815484, | |
| "grad_norm": 2.291989803314209, | |
| "learning_rate": 0.00027147401908801693, | |
| "loss": 0.5761, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4948745139625309, | |
| "grad_norm": 1.4632915258407593, | |
| "learning_rate": 0.00027041357370095437, | |
| "loss": 0.5171, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.512548603746907, | |
| "grad_norm": 2.1687259674072266, | |
| "learning_rate": 0.0002693531283138918, | |
| "loss": 0.6183, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5302226935312832, | |
| "grad_norm": 1.734108805656433, | |
| "learning_rate": 0.00026829268292682924, | |
| "loss": 0.5411, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5478967833156593, | |
| "grad_norm": 1.3890644311904907, | |
| "learning_rate": 0.00026723223753976667, | |
| "loss": 0.5092, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5655708731000354, | |
| "grad_norm": 1.98700749874115, | |
| "learning_rate": 0.0002661717921527041, | |
| "loss": 0.4804, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5832449628844114, | |
| "grad_norm": 1.1181468963623047, | |
| "learning_rate": 0.00026511134676564154, | |
| "loss": 0.5148, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6009190526687875, | |
| "grad_norm": 1.7994420528411865, | |
| "learning_rate": 0.000264050901378579, | |
| "loss": 0.4231, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6185931424531637, | |
| "grad_norm": 2.032198667526245, | |
| "learning_rate": 0.0002629904559915164, | |
| "loss": 0.5106, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6362672322375398, | |
| "grad_norm": 3.585948944091797, | |
| "learning_rate": 0.00026193001060445385, | |
| "loss": 0.4717, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6539413220219159, | |
| "grad_norm": 1.8610371351242065, | |
| "learning_rate": 0.0002608695652173913, | |
| "loss": 0.4765, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.671615411806292, | |
| "grad_norm": 1.2324624061584473, | |
| "learning_rate": 0.0002598091198303287, | |
| "loss": 0.4643, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.689289501590668, | |
| "grad_norm": 2.391714572906494, | |
| "learning_rate": 0.00025874867444326615, | |
| "loss": 0.4512, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7069635913750442, | |
| "grad_norm": 1.8863242864608765, | |
| "learning_rate": 0.0002576882290562036, | |
| "loss": 0.4115, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7246376811594203, | |
| "grad_norm": 0.7850649356842041, | |
| "learning_rate": 0.000256627783669141, | |
| "loss": 0.4341, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7423117709437964, | |
| "grad_norm": 1.5869959592819214, | |
| "learning_rate": 0.00025556733828207846, | |
| "loss": 0.4172, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7599858607281725, | |
| "grad_norm": 1.2584971189498901, | |
| "learning_rate": 0.0002545068928950159, | |
| "loss": 0.4384, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.7776599505125487, | |
| "grad_norm": 2.560710906982422, | |
| "learning_rate": 0.00025344644750795333, | |
| "loss": 0.4558, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7953340402969247, | |
| "grad_norm": 2.2893359661102295, | |
| "learning_rate": 0.00025238600212089076, | |
| "loss": 0.4345, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8130081300813008, | |
| "grad_norm": 1.5244982242584229, | |
| "learning_rate": 0.0002513255567338282, | |
| "loss": 0.4071, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8306822198656769, | |
| "grad_norm": 1.384102463722229, | |
| "learning_rate": 0.00025026511134676563, | |
| "loss": 0.3612, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.848356309650053, | |
| "grad_norm": 1.3080965280532837, | |
| "learning_rate": 0.00024920466595970307, | |
| "loss": 0.3556, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8660303994344292, | |
| "grad_norm": 1.3324400186538696, | |
| "learning_rate": 0.00024814422057264045, | |
| "loss": 0.3985, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8837044892188052, | |
| "grad_norm": 1.7705445289611816, | |
| "learning_rate": 0.00024708377518557794, | |
| "loss": 0.3895, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9013785790031813, | |
| "grad_norm": 1.352480173110962, | |
| "learning_rate": 0.0002460233297985153, | |
| "loss": 0.426, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9190526687875574, | |
| "grad_norm": 1.479979157447815, | |
| "learning_rate": 0.0002449628844114528, | |
| "loss": 0.4057, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9367267585719335, | |
| "grad_norm": 2.1380653381347656, | |
| "learning_rate": 0.00024390243902439022, | |
| "loss": 0.3689, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9544008483563097, | |
| "grad_norm": 1.9099682569503784, | |
| "learning_rate": 0.00024284199363732768, | |
| "loss": 0.3991, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.9720749381406858, | |
| "grad_norm": 1.399566411972046, | |
| "learning_rate": 0.0002417815482502651, | |
| "loss": 0.3412, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.9897490279250618, | |
| "grad_norm": 2.508267879486084, | |
| "learning_rate": 0.00024072110286320252, | |
| "loss": 0.3828, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bertscore_f1": 0.9667777874331811, | |
| "eval_bleu": 0.5973566262792636, | |
| "eval_loss": 0.27053505182266235, | |
| "eval_runtime": 1054.1237, | |
| "eval_samples_per_second": 6.132, | |
| "eval_steps_per_second": 0.767, | |
| "step": 2829 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 14145, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3460097079115776.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |