codellama_utests_embedded / checkpoint-2252 /trainer_state.json

Upload folder using huggingface_hub

7ac68cc verified 8 months ago

8.6 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 4.0,
	"eval_steps": 500,
	"global_step": 2252,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.08888888888888889,
	"grad_norm": 0.045956578105688095,
	"learning_rate": 0.00019564831261101244,
	"loss": 0.322,
	"step": 50
	},
	{
	"epoch": 0.17777777777777778,
	"grad_norm": 0.03815697133541107,
	"learning_rate": 0.00019120781527531084,
	"loss": 0.1842,
	"step": 100
	},
	{
	"epoch": 0.26666666666666666,
	"grad_norm": 0.061464082449674606,
	"learning_rate": 0.00018676731793960925,
	"loss": 0.1801,
	"step": 150
	},
	{
	"epoch": 0.35555555555555557,
	"grad_norm": 0.0637449324131012,
	"learning_rate": 0.00018232682060390765,
	"loss": 0.1773,
	"step": 200
	},
	{
	"epoch": 0.4444444444444444,
	"grad_norm": 0.053989194333553314,
	"learning_rate": 0.00017788632326820605,
	"loss": 0.17,
	"step": 250
	},
	{
	"epoch": 0.5333333333333333,
	"grad_norm": 0.06381799280643463,
	"learning_rate": 0.00017344582593250445,
	"loss": 0.1527,
	"step": 300
	},
	{
	"epoch": 0.6222222222222222,
	"grad_norm": 0.07270950824022293,
	"learning_rate": 0.00016900532859680286,
	"loss": 0.1594,
	"step": 350
	},
	{
	"epoch": 0.7111111111111111,
	"grad_norm": 0.051719773560762405,
	"learning_rate": 0.00016456483126110126,
	"loss": 0.1535,
	"step": 400
	},
	{
	"epoch": 0.8,
	"grad_norm": 0.06734076142311096,
	"learning_rate": 0.00016012433392539966,
	"loss": 0.1431,
	"step": 450
	},
	{
	"epoch": 0.8888888888888888,
	"grad_norm": 0.04238704591989517,
	"learning_rate": 0.00015568383658969806,
	"loss": 0.1439,
	"step": 500
	},
	{
	"epoch": 0.9777777777777777,
	"grad_norm": 0.056586671620607376,
	"learning_rate": 0.00015124333925399647,
	"loss": 0.1333,
	"step": 550
	},
	{
	"epoch": 1.0657777777777777,
	"grad_norm": 0.05698104947805405,
	"learning_rate": 0.00014680284191829484,
	"loss": 0.1474,
	"step": 600
	},
	{
	"epoch": 1.1546666666666667,
	"grad_norm": 0.054345980286598206,
	"learning_rate": 0.00014236234458259324,
	"loss": 0.1253,
	"step": 650
	},
	{
	"epoch": 1.2435555555555555,
	"grad_norm": 0.04708417132496834,
	"learning_rate": 0.00013792184724689165,
	"loss": 0.1349,
	"step": 700
	},
	{
	"epoch": 1.3324444444444445,
	"grad_norm": 0.06845518201589584,
	"learning_rate": 0.00013348134991119005,
	"loss": 0.1144,
	"step": 750
	},
	{
	"epoch": 1.4213333333333333,
	"grad_norm": 0.0530928298830986,
	"learning_rate": 0.00012904085257548845,
	"loss": 0.1168,
	"step": 800
	},
	{
	"epoch": 1.5102222222222221,
	"grad_norm": 0.08338514715433121,
	"learning_rate": 0.00012460035523978685,
	"loss": 0.1237,
	"step": 850
	},
	{
	"epoch": 1.5991111111111111,
	"grad_norm": 0.06609191745519638,
	"learning_rate": 0.00012015985790408525,
	"loss": 0.141,
	"step": 900
	},
	{
	"epoch": 1.688,
	"grad_norm": 0.08270981907844543,
	"learning_rate": 0.00011571936056838366,
	"loss": 0.1256,
	"step": 950
	},
	{
	"epoch": 1.7768888888888887,
	"grad_norm": 0.048950765281915665,
	"learning_rate": 0.00011127886323268206,
	"loss": 0.1175,
	"step": 1000
	},
	{
	"epoch": 1.8657777777777778,
	"grad_norm": 0.06318267434835434,
	"learning_rate": 0.00010683836589698046,
	"loss": 0.1275,
	"step": 1050
	},
	{
	"epoch": 1.9546666666666668,
	"grad_norm": 0.07041537761688232,
	"learning_rate": 0.00010239786856127886,
	"loss": 0.124,
	"step": 1100
	},
	{
	"epoch": 2.042666666666667,
	"grad_norm": 0.0656278058886528,
	"learning_rate": 9.795737122557727e-05,
	"loss": 0.1327,
	"step": 1150
	},
	{
	"epoch": 2.1315555555555554,
	"grad_norm": 0.06331595033407211,
	"learning_rate": 9.351687388987567e-05,
	"loss": 0.1069,
	"step": 1200
	},
	{
	"epoch": 2.2204444444444444,
	"grad_norm": 0.04332101345062256,
	"learning_rate": 8.907637655417407e-05,
	"loss": 0.1024,
	"step": 1250
	},
	{
	"epoch": 2.3093333333333335,
	"grad_norm": 0.06444103270769119,
	"learning_rate": 8.463587921847247e-05,
	"loss": 0.1026,
	"step": 1300
	},
	{
	"epoch": 2.398222222222222,
	"grad_norm": 0.06379590928554535,
	"learning_rate": 8.019538188277088e-05,
	"loss": 0.1189,
	"step": 1350
	},
	{
	"epoch": 2.487111111111111,
	"grad_norm": 0.0656711533665657,
	"learning_rate": 7.575488454706926e-05,
	"loss": 0.1085,
	"step": 1400
	},
	{
	"epoch": 2.576,
	"grad_norm": 0.07427001744508743,
	"learning_rate": 7.131438721136767e-05,
	"loss": 0.1245,
	"step": 1450
	},
	{
	"epoch": 2.664888888888889,
	"grad_norm": 0.06245647370815277,
	"learning_rate": 6.687388987566607e-05,
	"loss": 0.1118,
	"step": 1500
	},
	{
	"epoch": 2.7537777777777777,
	"grad_norm": 0.08108062297105789,
	"learning_rate": 6.243339253996447e-05,
	"loss": 0.1137,
	"step": 1550
	},
	{
	"epoch": 2.8426666666666667,
	"grad_norm": 0.061958227306604385,
	"learning_rate": 5.7992895204262874e-05,
	"loss": 0.1001,
	"step": 1600
	},
	{
	"epoch": 2.9315555555555557,
	"grad_norm": 0.056670840829610825,
	"learning_rate": 5.3552397868561276e-05,
	"loss": 0.1172,
	"step": 1650
	},
	{
	"epoch": 3.0195555555555558,
	"grad_norm": 0.07005661725997925,
	"learning_rate": 4.9111900532859686e-05,
	"loss": 0.1099,
	"step": 1700
	},
	{
	"epoch": 3.1084444444444443,
	"grad_norm": 0.0712517499923706,
	"learning_rate": 4.467140319715809e-05,
	"loss": 0.0932,
	"step": 1750
	},
	{
	"epoch": 3.1973333333333334,
	"grad_norm": 0.07057236135005951,
	"learning_rate": 4.0230905861456483e-05,
	"loss": 0.0982,
	"step": 1800
	},
	{
	"epoch": 3.2862222222222224,
	"grad_norm": 0.07291365414857864,
	"learning_rate": 3.5790408525754886e-05,
	"loss": 0.1008,
	"step": 1850
	},
	{
	"epoch": 3.375111111111111,
	"grad_norm": 0.11394993960857391,
	"learning_rate": 3.134991119005329e-05,
	"loss": 0.0933,
	"step": 1900
	},
	{
	"epoch": 3.464,
	"grad_norm": 0.09188514947891235,
	"learning_rate": 2.690941385435169e-05,
	"loss": 0.0969,
	"step": 1950
	},
	{
	"epoch": 3.552888888888889,
	"grad_norm": 0.08308542519807816,
	"learning_rate": 2.246891651865009e-05,
	"loss": 0.1067,
	"step": 2000
	},
	{
	"epoch": 3.6417777777777776,
	"grad_norm": 0.10817820578813553,
	"learning_rate": 1.8028419182948492e-05,
	"loss": 0.0931,
	"step": 2050
	},
	{
	"epoch": 3.7306666666666666,
	"grad_norm": 0.06676340103149414,
	"learning_rate": 1.3587921847246892e-05,
	"loss": 0.0984,
	"step": 2100
	},
	{
	"epoch": 3.8195555555555556,
	"grad_norm": 0.0792100727558136,
	"learning_rate": 9.147424511545293e-06,
	"loss": 0.1007,
	"step": 2150
	},
	{
	"epoch": 3.9084444444444446,
	"grad_norm": 0.06362316757440567,
	"learning_rate": 4.706927175843695e-06,
	"loss": 0.0935,
	"step": 2200
	},
	{
	"epoch": 3.997333333333333,
	"grad_norm": 0.08906163275241852,
	"learning_rate": 2.6642984014209596e-07,
	"loss": 0.0958,
	"step": 2250
	}
	],
	"logging_steps": 50,
	"max_steps": 2252,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 4,
	"save_steps": 200,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.468874889363456e+18,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}