codet5_qlora / checkpoint-2829 /trainer_state.json

Upload folder using huggingface_hub

9eb7f59 verified 8 months ago

10.8 kB

	{
	"best_global_step": 2829,
	"best_metric": 0.9667777874331811,
	"best_model_checkpoint": "./codet5-qlora-k8s/checkpoint-2829",
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 2829,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.017674089784376106,
	"grad_norm": 1.1207759380340576,
	"learning_rate": 0.00029898197242841994,
	"loss": 3.2886,
	"step": 50
	},
	{
	"epoch": 0.03534817956875221,
	"grad_norm": 1.2978123426437378,
	"learning_rate": 0.0002979215270413573,
	"loss": 1.8567,
	"step": 100
	},
	{
	"epoch": 0.053022269353128315,
	"grad_norm": 1.624740719795227,
	"learning_rate": 0.0002968610816542948,
	"loss": 1.5695,
	"step": 150
	},
	{
	"epoch": 0.07069635913750442,
	"grad_norm": 1.7711330652236938,
	"learning_rate": 0.0002958006362672322,
	"loss": 1.4205,
	"step": 200
	},
	{
	"epoch": 0.08837044892188052,
	"grad_norm": 1.62517511844635,
	"learning_rate": 0.0002947401908801697,
	"loss": 1.2732,
	"step": 250
	},
	{
	"epoch": 0.10604453870625663,
	"grad_norm": 2.038139820098877,
	"learning_rate": 0.00029367974549310706,
	"loss": 1.1913,
	"step": 300
	},
	{
	"epoch": 0.12371862849063273,
	"grad_norm": 2.262789487838745,
	"learning_rate": 0.00029264050901378576,
	"loss": 1.117,
	"step": 350
	},
	{
	"epoch": 0.14139271827500885,
	"grad_norm": 3.121687650680542,
	"learning_rate": 0.0002915800636267232,
	"loss": 1.0202,
	"step": 400
	},
	{
	"epoch": 0.15906680805938495,
	"grad_norm": 2.0951812267303467,
	"learning_rate": 0.0002905196182396606,
	"loss": 0.9499,
	"step": 450
	},
	{
	"epoch": 0.17674089784376104,
	"grad_norm": 2.670121192932129,
	"learning_rate": 0.00028945917285259806,
	"loss": 0.9707,
	"step": 500
	},
	{
	"epoch": 0.19441498762813716,
	"grad_norm": 2.3631107807159424,
	"learning_rate": 0.00028841993637327676,
	"loss": 0.7961,
	"step": 550
	},
	{
	"epoch": 0.21208907741251326,
	"grad_norm": 2.10772705078125,
	"learning_rate": 0.0002873594909862142,
	"loss": 0.8912,
	"step": 600
	},
	{
	"epoch": 0.22976316719688936,
	"grad_norm": 2.360686779022217,
	"learning_rate": 0.00028629904559915163,
	"loss": 0.871,
	"step": 650
	},
	{
	"epoch": 0.24743725698126545,
	"grad_norm": 2.191119432449341,
	"learning_rate": 0.0002852598091198303,
	"loss": 0.758,
	"step": 700
	},
	{
	"epoch": 0.2651113467656416,
	"grad_norm": 1.7646818161010742,
	"learning_rate": 0.00028419936373276776,
	"loss": 0.8244,
	"step": 750
	},
	{
	"epoch": 0.2827854365500177,
	"grad_norm": 2.3776354789733887,
	"learning_rate": 0.00028313891834570514,
	"loss": 0.7664,
	"step": 800
	},
	{
	"epoch": 0.30045952633439377,
	"grad_norm": 2.8682475090026855,
	"learning_rate": 0.00028207847295864263,
	"loss": 0.6942,
	"step": 850
	},
	{
	"epoch": 0.3181336161187699,
	"grad_norm": 2.353091239929199,
	"learning_rate": 0.00028101802757158,
	"loss": 0.7323,
	"step": 900
	},
	{
	"epoch": 0.335807705903146,
	"grad_norm": 1.9457337856292725,
	"learning_rate": 0.00027995758218451745,
	"loss": 0.6474,
	"step": 950
	},
	{
	"epoch": 0.3534817956875221,
	"grad_norm": 2.510075330734253,
	"learning_rate": 0.00027889713679745494,
	"loss": 0.6801,
	"step": 1000
	},
	{
	"epoch": 0.3711558854718982,
	"grad_norm": 1.7497014999389648,
	"learning_rate": 0.0002778366914103923,
	"loss": 0.656,
	"step": 1050
	},
	{
	"epoch": 0.38882997525627433,
	"grad_norm": 2.862682342529297,
	"learning_rate": 0.0002767762460233298,
	"loss": 0.6238,
	"step": 1100
	},
	{
	"epoch": 0.4065040650406504,
	"grad_norm": 1.998961091041565,
	"learning_rate": 0.0002757158006362672,
	"loss": 0.6306,
	"step": 1150
	},
	{
	"epoch": 0.4241781548250265,
	"grad_norm": 1.854942798614502,
	"learning_rate": 0.0002746553552492047,
	"loss": 0.5689,
	"step": 1200
	},
	{
	"epoch": 0.4418522446094026,
	"grad_norm": 1.8994203805923462,
	"learning_rate": 0.00027359490986214206,
	"loss": 0.6595,
	"step": 1250
	},
	{
	"epoch": 0.4595263343937787,
	"grad_norm": 1.6235908269882202,
	"learning_rate": 0.0002725344644750795,
	"loss": 0.5665,
	"step": 1300
	},
	{
	"epoch": 0.47720042417815484,
	"grad_norm": 2.291989803314209,
	"learning_rate": 0.00027147401908801693,
	"loss": 0.5761,
	"step": 1350
	},
	{
	"epoch": 0.4948745139625309,
	"grad_norm": 1.4632915258407593,
	"learning_rate": 0.00027041357370095437,
	"loss": 0.5171,
	"step": 1400
	},
	{
	"epoch": 0.512548603746907,
	"grad_norm": 2.1687259674072266,
	"learning_rate": 0.0002693531283138918,
	"loss": 0.6183,
	"step": 1450
	},
	{
	"epoch": 0.5302226935312832,
	"grad_norm": 1.734108805656433,
	"learning_rate": 0.00026829268292682924,
	"loss": 0.5411,
	"step": 1500
	},
	{
	"epoch": 0.5478967833156593,
	"grad_norm": 1.3890644311904907,
	"learning_rate": 0.00026723223753976667,
	"loss": 0.5092,
	"step": 1550
	},
	{
	"epoch": 0.5655708731000354,
	"grad_norm": 1.98700749874115,
	"learning_rate": 0.0002661717921527041,
	"loss": 0.4804,
	"step": 1600
	},
	{
	"epoch": 0.5832449628844114,
	"grad_norm": 1.1181468963623047,
	"learning_rate": 0.00026511134676564154,
	"loss": 0.5148,
	"step": 1650
	},
	{
	"epoch": 0.6009190526687875,
	"grad_norm": 1.7994420528411865,
	"learning_rate": 0.000264050901378579,
	"loss": 0.4231,
	"step": 1700
	},
	{
	"epoch": 0.6185931424531637,
	"grad_norm": 2.032198667526245,
	"learning_rate": 0.0002629904559915164,
	"loss": 0.5106,
	"step": 1750
	},
	{
	"epoch": 0.6362672322375398,
	"grad_norm": 3.585948944091797,
	"learning_rate": 0.00026193001060445385,
	"loss": 0.4717,
	"step": 1800
	},
	{
	"epoch": 0.6539413220219159,
	"grad_norm": 1.8610371351242065,
	"learning_rate": 0.0002608695652173913,
	"loss": 0.4765,
	"step": 1850
	},
	{
	"epoch": 0.671615411806292,
	"grad_norm": 1.2324624061584473,
	"learning_rate": 0.0002598091198303287,
	"loss": 0.4643,
	"step": 1900
	},
	{
	"epoch": 0.689289501590668,
	"grad_norm": 2.391714572906494,
	"learning_rate": 0.00025874867444326615,
	"loss": 0.4512,
	"step": 1950
	},
	{
	"epoch": 0.7069635913750442,
	"grad_norm": 1.8863242864608765,
	"learning_rate": 0.0002576882290562036,
	"loss": 0.4115,
	"step": 2000
	},
	{
	"epoch": 0.7246376811594203,
	"grad_norm": 0.7850649356842041,
	"learning_rate": 0.000256627783669141,
	"loss": 0.4341,
	"step": 2050
	},
	{
	"epoch": 0.7423117709437964,
	"grad_norm": 1.5869959592819214,
	"learning_rate": 0.00025556733828207846,
	"loss": 0.4172,
	"step": 2100
	},
	{
	"epoch": 0.7599858607281725,
	"grad_norm": 1.2584971189498901,
	"learning_rate": 0.0002545068928950159,
	"loss": 0.4384,
	"step": 2150
	},
	{
	"epoch": 0.7776599505125487,
	"grad_norm": 2.560710906982422,
	"learning_rate": 0.00025344644750795333,
	"loss": 0.4558,
	"step": 2200
	},
	{
	"epoch": 0.7953340402969247,
	"grad_norm": 2.2893359661102295,
	"learning_rate": 0.00025238600212089076,
	"loss": 0.4345,
	"step": 2250
	},
	{
	"epoch": 0.8130081300813008,
	"grad_norm": 1.5244982242584229,
	"learning_rate": 0.0002513255567338282,
	"loss": 0.4071,
	"step": 2300
	},
	{
	"epoch": 0.8306822198656769,
	"grad_norm": 1.384102463722229,
	"learning_rate": 0.00025026511134676563,
	"loss": 0.3612,
	"step": 2350
	},
	{
	"epoch": 0.848356309650053,
	"grad_norm": 1.3080965280532837,
	"learning_rate": 0.00024920466595970307,
	"loss": 0.3556,
	"step": 2400
	},
	{
	"epoch": 0.8660303994344292,
	"grad_norm": 1.3324400186538696,
	"learning_rate": 0.00024814422057264045,
	"loss": 0.3985,
	"step": 2450
	},
	{
	"epoch": 0.8837044892188052,
	"grad_norm": 1.7705445289611816,
	"learning_rate": 0.00024708377518557794,
	"loss": 0.3895,
	"step": 2500
	},
	{
	"epoch": 0.9013785790031813,
	"grad_norm": 1.352480173110962,
	"learning_rate": 0.0002460233297985153,
	"loss": 0.426,
	"step": 2550
	},
	{
	"epoch": 0.9190526687875574,
	"grad_norm": 1.479979157447815,
	"learning_rate": 0.0002449628844114528,
	"loss": 0.4057,
	"step": 2600
	},
	{
	"epoch": 0.9367267585719335,
	"grad_norm": 2.1380653381347656,
	"learning_rate": 0.00024390243902439022,
	"loss": 0.3689,
	"step": 2650
	},
	{
	"epoch": 0.9544008483563097,
	"grad_norm": 1.9099682569503784,
	"learning_rate": 0.00024284199363732768,
	"loss": 0.3991,
	"step": 2700
	},
	{
	"epoch": 0.9720749381406858,
	"grad_norm": 1.399566411972046,
	"learning_rate": 0.0002417815482502651,
	"loss": 0.3412,
	"step": 2750
	},
	{
	"epoch": 0.9897490279250618,
	"grad_norm": 2.508267879486084,
	"learning_rate": 0.00024072110286320252,
	"loss": 0.3828,
	"step": 2800
	},
	{
	"epoch": 1.0,
	"eval_bertscore_f1": 0.9667777874331811,
	"eval_bleu": 0.5973566262792636,
	"eval_loss": 0.27053505182266235,
	"eval_runtime": 1054.1237,
	"eval_samples_per_second": 6.132,
	"eval_steps_per_second": 0.767,
	"step": 2829
	}
	],
	"logging_steps": 50,
	"max_steps": 14145,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3460097079115776.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}