End of training

9228397 verified about 1 year ago

6.35 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.9927360774818403,
	"eval_steps": 500,
	"global_step": 309,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.09685230024213075,
	"grad_norm": 1.6106698543432953,
	"learning_rate": 5e-06,
	"loss": 0.649,
	"step": 10
	},
	{
	"epoch": 0.1937046004842615,
	"grad_norm": 0.9911008535692477,
	"learning_rate": 5e-06,
	"loss": 0.5864,
	"step": 20
	},
	{
	"epoch": 0.29055690072639223,
	"grad_norm": 0.6386745349148736,
	"learning_rate": 5e-06,
	"loss": 0.5582,
	"step": 30
	},
	{
	"epoch": 0.387409200968523,
	"grad_norm": 1.0205474281151061,
	"learning_rate": 5e-06,
	"loss": 0.5464,
	"step": 40
	},
	{
	"epoch": 0.48426150121065376,
	"grad_norm": 0.741493469056709,
	"learning_rate": 5e-06,
	"loss": 0.5311,
	"step": 50
	},
	{
	"epoch": 0.5811138014527845,
	"grad_norm": 0.7786501028026512,
	"learning_rate": 5e-06,
	"loss": 0.5181,
	"step": 60
	},
	{
	"epoch": 0.6779661016949152,
	"grad_norm": 0.6711455522499474,
	"learning_rate": 5e-06,
	"loss": 0.5158,
	"step": 70
	},
	{
	"epoch": 0.774818401937046,
	"grad_norm": 0.6218873174772125,
	"learning_rate": 5e-06,
	"loss": 0.5126,
	"step": 80
	},
	{
	"epoch": 0.8716707021791767,
	"grad_norm": 0.6652769231147759,
	"learning_rate": 5e-06,
	"loss": 0.5105,
	"step": 90
	},
	{
	"epoch": 0.9685230024213075,
	"grad_norm": 1.4354098566929865,
	"learning_rate": 5e-06,
	"loss": 0.5082,
	"step": 100
	},
	{
	"epoch": 0.9975786924939467,
	"eval_loss": 0.49066221714019775,
	"eval_runtime": 69.4631,
	"eval_samples_per_second": 40.021,
	"eval_steps_per_second": 0.633,
	"step": 103
	},
	{
	"epoch": 1.0653753026634383,
	"grad_norm": 0.8075871605198771,
	"learning_rate": 5e-06,
	"loss": 0.5159,
	"step": 110
	},
	{
	"epoch": 1.162227602905569,
	"grad_norm": 0.7654895903052866,
	"learning_rate": 5e-06,
	"loss": 0.4583,
	"step": 120
	},
	{
	"epoch": 1.2590799031476998,
	"grad_norm": 0.47351004510337863,
	"learning_rate": 5e-06,
	"loss": 0.4586,
	"step": 130
	},
	{
	"epoch": 1.3559322033898304,
	"grad_norm": 0.5062829494154636,
	"learning_rate": 5e-06,
	"loss": 0.4572,
	"step": 140
	},
	{
	"epoch": 1.4527845036319613,
	"grad_norm": 0.6119092771725125,
	"learning_rate": 5e-06,
	"loss": 0.4544,
	"step": 150
	},
	{
	"epoch": 1.549636803874092,
	"grad_norm": 0.6212058614890003,
	"learning_rate": 5e-06,
	"loss": 0.4561,
	"step": 160
	},
	{
	"epoch": 1.6464891041162226,
	"grad_norm": 0.5105359500584984,
	"learning_rate": 5e-06,
	"loss": 0.4518,
	"step": 170
	},
	{
	"epoch": 1.7433414043583535,
	"grad_norm": 0.5867880979483323,
	"learning_rate": 5e-06,
	"loss": 0.4551,
	"step": 180
	},
	{
	"epoch": 1.8401937046004844,
	"grad_norm": 0.4498960324504211,
	"learning_rate": 5e-06,
	"loss": 0.454,
	"step": 190
	},
	{
	"epoch": 1.937046004842615,
	"grad_norm": 0.5182866069406472,
	"learning_rate": 5e-06,
	"loss": 0.4499,
	"step": 200
	},
	{
	"epoch": 1.9951573849878934,
	"eval_loss": 0.47824251651763916,
	"eval_runtime": 71.5938,
	"eval_samples_per_second": 38.83,
	"eval_steps_per_second": 0.615,
	"step": 206
	},
	{
	"epoch": 2.0338983050847457,
	"grad_norm": 0.9414090883543634,
	"learning_rate": 5e-06,
	"loss": 0.4671,
	"step": 210
	},
	{
	"epoch": 2.1307506053268765,
	"grad_norm": 0.5171048417889069,
	"learning_rate": 5e-06,
	"loss": 0.4066,
	"step": 220
	},
	{
	"epoch": 2.2276029055690074,
	"grad_norm": 0.5123629438372025,
	"learning_rate": 5e-06,
	"loss": 0.4113,
	"step": 230
	},
	{
	"epoch": 2.324455205811138,
	"grad_norm": 0.5363285052863767,
	"learning_rate": 5e-06,
	"loss": 0.4081,
	"step": 240
	},
	{
	"epoch": 2.4213075060532687,
	"grad_norm": 0.4907788960865576,
	"learning_rate": 5e-06,
	"loss": 0.407,
	"step": 250
	},
	{
	"epoch": 2.5181598062953996,
	"grad_norm": 0.507228977380475,
	"learning_rate": 5e-06,
	"loss": 0.4051,
	"step": 260
	},
	{
	"epoch": 2.61501210653753,
	"grad_norm": 0.4923140802099653,
	"learning_rate": 5e-06,
	"loss": 0.4109,
	"step": 270
	},
	{
	"epoch": 2.711864406779661,
	"grad_norm": 0.5763086112386324,
	"learning_rate": 5e-06,
	"loss": 0.3986,
	"step": 280
	},
	{
	"epoch": 2.8087167070217918,
	"grad_norm": 0.4788239568139877,
	"learning_rate": 5e-06,
	"loss": 0.4115,
	"step": 290
	},
	{
	"epoch": 2.9055690072639226,
	"grad_norm": 0.5281993404834231,
	"learning_rate": 5e-06,
	"loss": 0.4124,
	"step": 300
	},
	{
	"epoch": 2.9927360774818403,
	"eval_loss": 0.47956007719039917,
	"eval_runtime": 68.3404,
	"eval_samples_per_second": 40.679,
	"eval_steps_per_second": 0.644,
	"step": 309
	},
	{
	"epoch": 2.9927360774818403,
	"step": 309,
	"total_flos": 517377129185280.0,
	"train_loss": 0.4712127952513957,
	"train_runtime": 10324.3717,
	"train_samples_per_second": 15.347,
	"train_steps_per_second": 0.03
	}
	],
	"logging_steps": 10,
	"max_steps": 309,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 517377129185280.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}