{ "best_global_step": 2829, "best_metric": 0.9667777874331811, "best_model_checkpoint": "./codet5-qlora-k8s/checkpoint-2829", "epoch": 1.0, "eval_steps": 500, "global_step": 2829, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017674089784376106, "grad_norm": 1.1207759380340576, "learning_rate": 0.00029898197242841994, "loss": 3.2886, "step": 50 }, { "epoch": 0.03534817956875221, "grad_norm": 1.2978123426437378, "learning_rate": 0.0002979215270413573, "loss": 1.8567, "step": 100 }, { "epoch": 0.053022269353128315, "grad_norm": 1.624740719795227, "learning_rate": 0.0002968610816542948, "loss": 1.5695, "step": 150 }, { "epoch": 0.07069635913750442, "grad_norm": 1.7711330652236938, "learning_rate": 0.0002958006362672322, "loss": 1.4205, "step": 200 }, { "epoch": 0.08837044892188052, "grad_norm": 1.62517511844635, "learning_rate": 0.0002947401908801697, "loss": 1.2732, "step": 250 }, { "epoch": 0.10604453870625663, "grad_norm": 2.038139820098877, "learning_rate": 0.00029367974549310706, "loss": 1.1913, "step": 300 }, { "epoch": 0.12371862849063273, "grad_norm": 2.262789487838745, "learning_rate": 0.00029264050901378576, "loss": 1.117, "step": 350 }, { "epoch": 0.14139271827500885, "grad_norm": 3.121687650680542, "learning_rate": 0.0002915800636267232, "loss": 1.0202, "step": 400 }, { "epoch": 0.15906680805938495, "grad_norm": 2.0951812267303467, "learning_rate": 0.0002905196182396606, "loss": 0.9499, "step": 450 }, { "epoch": 0.17674089784376104, "grad_norm": 2.670121192932129, "learning_rate": 0.00028945917285259806, "loss": 0.9707, "step": 500 }, { "epoch": 0.19441498762813716, "grad_norm": 2.3631107807159424, "learning_rate": 0.00028841993637327676, "loss": 0.7961, "step": 550 }, { "epoch": 0.21208907741251326, "grad_norm": 2.10772705078125, "learning_rate": 0.0002873594909862142, "loss": 0.8912, "step": 600 }, { "epoch": 0.22976316719688936, "grad_norm": 2.360686779022217, "learning_rate": 0.00028629904559915163, "loss": 0.871, "step": 650 }, { "epoch": 0.24743725698126545, "grad_norm": 2.191119432449341, "learning_rate": 0.0002852598091198303, "loss": 0.758, "step": 700 }, { "epoch": 0.2651113467656416, "grad_norm": 1.7646818161010742, "learning_rate": 0.00028419936373276776, "loss": 0.8244, "step": 750 }, { "epoch": 0.2827854365500177, "grad_norm": 2.3776354789733887, "learning_rate": 0.00028313891834570514, "loss": 0.7664, "step": 800 }, { "epoch": 0.30045952633439377, "grad_norm": 2.8682475090026855, "learning_rate": 0.00028207847295864263, "loss": 0.6942, "step": 850 }, { "epoch": 0.3181336161187699, "grad_norm": 2.353091239929199, "learning_rate": 0.00028101802757158, "loss": 0.7323, "step": 900 }, { "epoch": 0.335807705903146, "grad_norm": 1.9457337856292725, "learning_rate": 0.00027995758218451745, "loss": 0.6474, "step": 950 }, { "epoch": 0.3534817956875221, "grad_norm": 2.510075330734253, "learning_rate": 0.00027889713679745494, "loss": 0.6801, "step": 1000 }, { "epoch": 0.3711558854718982, "grad_norm": 1.7497014999389648, "learning_rate": 0.0002778366914103923, "loss": 0.656, "step": 1050 }, { "epoch": 0.38882997525627433, "grad_norm": 2.862682342529297, "learning_rate": 0.0002767762460233298, "loss": 0.6238, "step": 1100 }, { "epoch": 0.4065040650406504, "grad_norm": 1.998961091041565, "learning_rate": 0.0002757158006362672, "loss": 0.6306, "step": 1150 }, { "epoch": 0.4241781548250265, "grad_norm": 1.854942798614502, "learning_rate": 0.0002746553552492047, "loss": 0.5689, "step": 1200 }, { "epoch": 0.4418522446094026, "grad_norm": 1.8994203805923462, "learning_rate": 0.00027359490986214206, "loss": 0.6595, "step": 1250 }, { "epoch": 0.4595263343937787, "grad_norm": 1.6235908269882202, "learning_rate": 0.0002725344644750795, "loss": 0.5665, "step": 1300 }, { "epoch": 0.47720042417815484, "grad_norm": 2.291989803314209, "learning_rate": 0.00027147401908801693, "loss": 0.5761, "step": 1350 }, { "epoch": 0.4948745139625309, "grad_norm": 1.4632915258407593, "learning_rate": 0.00027041357370095437, "loss": 0.5171, "step": 1400 }, { "epoch": 0.512548603746907, "grad_norm": 2.1687259674072266, "learning_rate": 0.0002693531283138918, "loss": 0.6183, "step": 1450 }, { "epoch": 0.5302226935312832, "grad_norm": 1.734108805656433, "learning_rate": 0.00026829268292682924, "loss": 0.5411, "step": 1500 }, { "epoch": 0.5478967833156593, "grad_norm": 1.3890644311904907, "learning_rate": 0.00026723223753976667, "loss": 0.5092, "step": 1550 }, { "epoch": 0.5655708731000354, "grad_norm": 1.98700749874115, "learning_rate": 0.0002661717921527041, "loss": 0.4804, "step": 1600 }, { "epoch": 0.5832449628844114, "grad_norm": 1.1181468963623047, "learning_rate": 0.00026511134676564154, "loss": 0.5148, "step": 1650 }, { "epoch": 0.6009190526687875, "grad_norm": 1.7994420528411865, "learning_rate": 0.000264050901378579, "loss": 0.4231, "step": 1700 }, { "epoch": 0.6185931424531637, "grad_norm": 2.032198667526245, "learning_rate": 0.0002629904559915164, "loss": 0.5106, "step": 1750 }, { "epoch": 0.6362672322375398, "grad_norm": 3.585948944091797, "learning_rate": 0.00026193001060445385, "loss": 0.4717, "step": 1800 }, { "epoch": 0.6539413220219159, "grad_norm": 1.8610371351242065, "learning_rate": 0.0002608695652173913, "loss": 0.4765, "step": 1850 }, { "epoch": 0.671615411806292, "grad_norm": 1.2324624061584473, "learning_rate": 0.0002598091198303287, "loss": 0.4643, "step": 1900 }, { "epoch": 0.689289501590668, "grad_norm": 2.391714572906494, "learning_rate": 0.00025874867444326615, "loss": 0.4512, "step": 1950 }, { "epoch": 0.7069635913750442, "grad_norm": 1.8863242864608765, "learning_rate": 0.0002576882290562036, "loss": 0.4115, "step": 2000 }, { "epoch": 0.7246376811594203, "grad_norm": 0.7850649356842041, "learning_rate": 0.000256627783669141, "loss": 0.4341, "step": 2050 }, { "epoch": 0.7423117709437964, "grad_norm": 1.5869959592819214, "learning_rate": 0.00025556733828207846, "loss": 0.4172, "step": 2100 }, { "epoch": 0.7599858607281725, "grad_norm": 1.2584971189498901, "learning_rate": 0.0002545068928950159, "loss": 0.4384, "step": 2150 }, { "epoch": 0.7776599505125487, "grad_norm": 2.560710906982422, "learning_rate": 0.00025344644750795333, "loss": 0.4558, "step": 2200 }, { "epoch": 0.7953340402969247, "grad_norm": 2.2893359661102295, "learning_rate": 0.00025238600212089076, "loss": 0.4345, "step": 2250 }, { "epoch": 0.8130081300813008, "grad_norm": 1.5244982242584229, "learning_rate": 0.0002513255567338282, "loss": 0.4071, "step": 2300 }, { "epoch": 0.8306822198656769, "grad_norm": 1.384102463722229, "learning_rate": 0.00025026511134676563, "loss": 0.3612, "step": 2350 }, { "epoch": 0.848356309650053, "grad_norm": 1.3080965280532837, "learning_rate": 0.00024920466595970307, "loss": 0.3556, "step": 2400 }, { "epoch": 0.8660303994344292, "grad_norm": 1.3324400186538696, "learning_rate": 0.00024814422057264045, "loss": 0.3985, "step": 2450 }, { "epoch": 0.8837044892188052, "grad_norm": 1.7705445289611816, "learning_rate": 0.00024708377518557794, "loss": 0.3895, "step": 2500 }, { "epoch": 0.9013785790031813, "grad_norm": 1.352480173110962, "learning_rate": 0.0002460233297985153, "loss": 0.426, "step": 2550 }, { "epoch": 0.9190526687875574, "grad_norm": 1.479979157447815, "learning_rate": 0.0002449628844114528, "loss": 0.4057, "step": 2600 }, { "epoch": 0.9367267585719335, "grad_norm": 2.1380653381347656, "learning_rate": 0.00024390243902439022, "loss": 0.3689, "step": 2650 }, { "epoch": 0.9544008483563097, "grad_norm": 1.9099682569503784, "learning_rate": 0.00024284199363732768, "loss": 0.3991, "step": 2700 }, { "epoch": 0.9720749381406858, "grad_norm": 1.399566411972046, "learning_rate": 0.0002417815482502651, "loss": 0.3412, "step": 2750 }, { "epoch": 0.9897490279250618, "grad_norm": 2.508267879486084, "learning_rate": 0.00024072110286320252, "loss": 0.3828, "step": 2800 }, { "epoch": 1.0, "eval_bertscore_f1": 0.9667777874331811, "eval_bleu": 0.5973566262792636, "eval_loss": 0.27053505182266235, "eval_runtime": 1054.1237, "eval_samples_per_second": 6.132, "eval_steps_per_second": 0.767, "step": 2829 } ], "logging_steps": 50, "max_steps": 14145, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3460097079115776.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }