evoxtral / checkpoint-100 /trainer_state.json

Duplicate from YongkangZOU/evoxtral-lora

fc6036c about 2 months ago

4.63 kB

	{
	"best_global_step": 100,
	"best_metric": 0.1666431427001953,
	"best_model_checkpoint": "/output/evoxtral-lora/checkpoint-100",
	"epoch": 1.9702970297029703,
	"eval_steps": 50,
	"global_step": 100,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.09900990099009901,
	"grad_norm": 5.802285194396973,
	"learning_rate": 1.6000000000000003e-05,
	"loss": 0.9557,
	"step": 5
	},
	{
	"epoch": 0.19801980198019803,
	"grad_norm": 2.306023597717285,
	"learning_rate": 3.6e-05,
	"loss": 0.461,
	"step": 10
	},
	{
	"epoch": 0.297029702970297,
	"grad_norm": 2.0266661643981934,
	"learning_rate": 5.6000000000000006e-05,
	"loss": 0.3281,
	"step": 15
	},
	{
	"epoch": 0.39603960396039606,
	"grad_norm": 1.5811512470245361,
	"learning_rate": 7.6e-05,
	"loss": 0.2656,
	"step": 20
	},
	{
	"epoch": 0.49504950495049505,
	"grad_norm": 1.5266460180282593,
	"learning_rate": 9.6e-05,
	"loss": 0.2477,
	"step": 25
	},
	{
	"epoch": 0.594059405940594,
	"grad_norm": 1.0605043172836304,
	"learning_rate": 0.000116,
	"loss": 0.2199,
	"step": 30
	},
	{
	"epoch": 0.693069306930693,
	"grad_norm": 1.4974918365478516,
	"learning_rate": 0.00013600000000000003,
	"loss": 0.1935,
	"step": 35
	},
	{
	"epoch": 0.7920792079207921,
	"grad_norm": 1.1257297992706299,
	"learning_rate": 0.00015600000000000002,
	"loss": 0.1739,
	"step": 40
	},
	{
	"epoch": 0.8910891089108911,
	"grad_norm": 0.9485541582107544,
	"learning_rate": 0.00017600000000000002,
	"loss": 0.1829,
	"step": 45
	},
	{
	"epoch": 0.9900990099009901,
	"grad_norm": 0.6643406748771667,
	"learning_rate": 0.000196,
	"loss": 0.1887,
	"step": 50
	},
	{
	"epoch": 0.9900990099009901,
	"eval_loss": 0.1841340959072113,
	"eval_runtime": 17.7494,
	"eval_samples_per_second": 5.69,
	"eval_steps_per_second": 2.873,
	"step": 50
	},
	{
	"epoch": 1.0792079207920793,
	"grad_norm": 0.7675807476043701,
	"learning_rate": 0.0001992566788083908,
	"loss": 0.1535,
	"step": 55
	},
	{
	"epoch": 1.1782178217821782,
	"grad_norm": 0.6500945687294006,
	"learning_rate": 0.0001962558656223516,
	"loss": 0.15,
	"step": 60
	},
	{
	"epoch": 1.2772277227722773,
	"grad_norm": 3.4771132469177246,
	"learning_rate": 0.00019102070542141328,
	"loss": 0.1058,
	"step": 65
	},
	{
	"epoch": 1.3762376237623761,
	"grad_norm": 6.509069919586182,
	"learning_rate": 0.0001836727197823842,
	"loss": 0.148,
	"step": 70
	},
	{
	"epoch": 1.4752475247524752,
	"grad_norm": 1.21564519405365,
	"learning_rate": 0.0001743824744123196,
	"loss": 0.154,
	"step": 75
	},
	{
	"epoch": 1.5742574257425743,
	"grad_norm": 0.9891815185546875,
	"learning_rate": 0.00016336561987834153,
	"loss": 0.1472,
	"step": 80
	},
	{
	"epoch": 1.6732673267326734,
	"grad_norm": 0.8960129022598267,
	"learning_rate": 0.00015087788580152206,
	"loss": 0.1388,
	"step": 85
	},
	{
	"epoch": 1.7722772277227723,
	"grad_norm": 0.6417158842086792,
	"learning_rate": 0.00013720914471250644,
	"loss": 0.1544,
	"step": 90
	},
	{
	"epoch": 1.8712871287128712,
	"grad_norm": 0.6971271634101868,
	"learning_rate": 0.00012267668336210413,
	"loss": 0.1216,
	"step": 95
	},
	{
	"epoch": 1.9702970297029703,
	"grad_norm": 0.6400907039642334,
	"learning_rate": 0.00010761783767709182,
	"loss": 0.1347,
	"step": 100
	},
	{
	"epoch": 1.9702970297029703,
	"eval_loss": 0.1666431427001953,
	"eval_runtime": 17.7221,
	"eval_samples_per_second": 5.699,
	"eval_steps_per_second": 2.878,
	"step": 100
	}
	],
	"logging_steps": 5,
	"max_steps": 153,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.8528360565456896e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}