white-bird
/

1_sft_label1_200

Model card Files Files and versions

1_sft_label1_200 / trainer_state.json

zhenzhe's picture

11

5b53d1a about 1 year ago

history blame contribute delete

4.19 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.5649717514124294,
	"eval_steps": 500,
	"global_step": 200,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.02824858757062147,
	"grad_norm": 5.842401568474987,
	"learning_rate": 5e-06,
	"loss": 2.0396,
	"step": 10
	},
	{
	"epoch": 0.05649717514124294,
	"grad_norm": 3.6388768595262957,
	"learning_rate": 4.997468222143782e-06,
	"loss": 1.7131,
	"step": 20
	},
	{
	"epoch": 0.0847457627118644,
	"grad_norm": 1.8710689080902667,
	"learning_rate": 4.989878016494418e-06,
	"loss": 1.5907,
	"step": 30
	},
	{
	"epoch": 0.11299435028248588,
	"grad_norm": 1.880666572553662,
	"learning_rate": 4.977244756423578e-06,
	"loss": 1.502,
	"step": 40
	},
	{
	"epoch": 0.14124293785310735,
	"grad_norm": 1.6520564729299156,
	"learning_rate": 4.959594029617741e-06,
	"loss": 1.4357,
	"step": 50
	},
	{
	"epoch": 0.1694915254237288,
	"grad_norm": 1.7284626155394651,
	"learning_rate": 4.9369615862523266e-06,
	"loss": 1.3794,
	"step": 60
	},
	{
	"epoch": 0.1977401129943503,
	"grad_norm": 1.7131842535751252,
	"learning_rate": 4.90939326658249e-06,
	"loss": 1.3396,
	"step": 70
	},
	{
	"epoch": 0.22598870056497175,
	"grad_norm": 1.6872621752920653,
	"learning_rate": 4.876944908097249e-06,
	"loss": 1.3119,
	"step": 80
	},
	{
	"epoch": 0.2542372881355932,
	"grad_norm": 1.6781047658941155,
	"learning_rate": 4.8396822324249915e-06,
	"loss": 1.2932,
	"step": 90
	},
	{
	"epoch": 0.2824858757062147,
	"grad_norm": 1.8225514648316858,
	"learning_rate": 4.797680712219421e-06,
	"loss": 1.2533,
	"step": 100
	},
	{
	"epoch": 0.3107344632768362,
	"grad_norm": 1.4304345855914073,
	"learning_rate": 4.751025418295565e-06,
	"loss": 1.2581,
	"step": 110
	},
	{
	"epoch": 0.3389830508474576,
	"grad_norm": 2.489693154849688,
	"learning_rate": 4.699810847325449e-06,
	"loss": 1.2615,
	"step": 120
	},
	{
	"epoch": 0.3672316384180791,
	"grad_norm": 1.497470889635536,
	"learning_rate": 4.644140730442432e-06,
	"loss": 1.2385,
	"step": 130
	},
	{
	"epoch": 0.3954802259887006,
	"grad_norm": 1.4852922561551125,
	"learning_rate": 4.584127823141855e-06,
	"loss": 1.2228,
	"step": 140
	},
	{
	"epoch": 0.423728813559322,
	"grad_norm": 1.4374567982916069,
	"learning_rate": 4.5198936769035504e-06,
	"loss": 1.2254,
	"step": 150
	},
	{
	"epoch": 0.4519774011299435,
	"grad_norm": 1.4496375733325404,
	"learning_rate": 4.451568392998767e-06,
	"loss": 1.2265,
	"step": 160
	},
	{
	"epoch": 0.480225988700565,
	"grad_norm": 1.2754388779607286,
	"learning_rate": 4.3792903589801515e-06,
	"loss": 1.1846,
	"step": 170
	},
	{
	"epoch": 0.5084745762711864,
	"grad_norm": 1.3975405752824914,
	"learning_rate": 4.30320596838852e-06,
	"loss": 1.178,
	"step": 180
	},
	{
	"epoch": 0.536723163841808,
	"grad_norm": 1.3749489429241677,
	"learning_rate": 4.223469324244115e-06,
	"loss": 1.1717,
	"step": 190
	},
	{
	"epoch": 0.5649717514124294,
	"grad_norm": 1.393315372114993,
	"learning_rate": 4.140241926922916e-06,
	"loss": 1.181,
	"step": 200
	}
	],
	"logging_steps": 10,
	"max_steps": 708,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 200,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 65229815808000.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}