qwen0.5B-fine-tunned / trainer_state.json

devank2000

Upload 15 files

6cbf965 verified 8 months ago

4.74 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.04,
	"grad_norm": 0.42000654339790344,
	"learning_rate": 0.00019,
	"loss": 2.3405,
	"step": 20
	},
	{
	"epoch": 0.08,
	"grad_norm": 0.3462536931037903,
	"learning_rate": 0.00039000000000000005,
	"loss": 2.0338,
	"step": 40
	},
	{
	"epoch": 0.12,
	"grad_norm": 0.3165397346019745,
	"learning_rate": 0.0004995066821070679,
	"loss": 1.9899,
	"step": 60
	},
	{
	"epoch": 0.16,
	"grad_norm": 0.3208169937133789,
	"learning_rate": 0.000494893812399836,
	"loss": 1.9271,
	"step": 80
	},
	{
	"epoch": 0.2,
	"grad_norm": 0.3150009214878082,
	"learning_rate": 0.00048551436319682417,
	"loss": 1.9116,
	"step": 100
	},
	{
	"epoch": 0.24,
	"grad_norm": 0.3124900758266449,
	"learning_rate": 0.00047155089480780364,
	"loss": 1.8997,
	"step": 120
	},
	{
	"epoch": 0.28,
	"grad_norm": 0.3002833127975464,
	"learning_rate": 0.0004532751902617569,
	"loss": 1.8786,
	"step": 140
	},
	{
	"epoch": 0.32,
	"grad_norm": 0.31171324849128723,
	"learning_rate": 0.00043104296535936696,
	"loss": 1.8725,
	"step": 160
	},
	{
	"epoch": 0.36,
	"grad_norm": 0.29542508721351624,
	"learning_rate": 0.0004052869450695776,
	"loss": 1.8492,
	"step": 180
	},
	{
	"epoch": 0.4,
	"grad_norm": 0.2971261143684387,
	"learning_rate": 0.0003765084410302909,
	"loss": 1.8369,
	"step": 200
	},
	{
	"epoch": 0.44,
	"grad_norm": 0.30833086371421814,
	"learning_rate": 0.0003452675940875686,
	"loss": 1.8455,
	"step": 220
	},
	{
	"epoch": 0.48,
	"grad_norm": 0.29420724511146545,
	"learning_rate": 0.0003121724717912138,
	"loss": 1.8386,
	"step": 240
	},
	{
	"epoch": 0.52,
	"grad_norm": 0.3092733919620514,
	"learning_rate": 0.00027786723305158134,
	"loss": 1.8389,
	"step": 260
	},
	{
	"epoch": 0.56,
	"grad_norm": 0.30236175656318665,
	"learning_rate": 0.00024301959031910783,
	"loss": 1.8244,
	"step": 280
	},
	{
	"epoch": 0.6,
	"grad_norm": 0.31684431433677673,
	"learning_rate": 0.00020830781332097445,
	"loss": 1.8188,
	"step": 300
	},
	{
	"epoch": 0.64,
	"grad_norm": 0.3082987666130066,
	"learning_rate": 0.00017440752731238892,
	"loss": 1.8117,
	"step": 320
	},
	{
	"epoch": 0.68,
	"grad_norm": 0.28097477555274963,
	"learning_rate": 0.0001419785627995044,
	"loss": 1.8093,
	"step": 340
	},
	{
	"epoch": 0.72,
	"grad_norm": 0.30032917857170105,
	"learning_rate": 0.000111652112689164,
	"loss": 1.8271,
	"step": 360
	},
	{
	"epoch": 0.76,
	"grad_norm": 0.30186182260513306,
	"learning_rate": 8.40184468369396e-05,
	"loss": 1.7926,
	"step": 380
	},
	{
	"epoch": 0.8,
	"grad_norm": 0.30022040009498596,
	"learning_rate": 5.961542311581586e-05,
	"loss": 1.797,
	"step": 400
	},
	{
	"epoch": 0.84,
	"grad_norm": 0.31262123584747314,
	"learning_rate": 3.891801862449629e-05,
	"loss": 1.8048,
	"step": 420
	},
	{
	"epoch": 0.88,
	"grad_norm": 0.2855285704135895,
	"learning_rate": 2.2329084798455746e-05,
	"loss": 1.8056,
	"step": 440
	},
	{
	"epoch": 0.92,
	"grad_norm": 0.3000660240650177,
	"learning_rate": 1.0171506364985622e-05,
	"loss": 1.7877,
	"step": 460
	},
	{
	"epoch": 0.96,
	"grad_norm": 0.2893758714199066,
	"learning_rate": 2.681916759252917e-06,
	"loss": 1.7942,
	"step": 480
	},
	{
	"epoch": 1.0,
	"grad_norm": 0.2988986372947693,
	"learning_rate": 6.092323651313292e-09,
	"loss": 1.7917,
	"step": 500
	}
	],
	"logging_steps": 20,
	"max_steps": 500,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.4071568135757824e+16,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}