sedrickkeh's picture
End of training
9228397 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9927360774818403,
"eval_steps": 500,
"global_step": 309,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09685230024213075,
"grad_norm": 1.6106698543432953,
"learning_rate": 5e-06,
"loss": 0.649,
"step": 10
},
{
"epoch": 0.1937046004842615,
"grad_norm": 0.9911008535692477,
"learning_rate": 5e-06,
"loss": 0.5864,
"step": 20
},
{
"epoch": 0.29055690072639223,
"grad_norm": 0.6386745349148736,
"learning_rate": 5e-06,
"loss": 0.5582,
"step": 30
},
{
"epoch": 0.387409200968523,
"grad_norm": 1.0205474281151061,
"learning_rate": 5e-06,
"loss": 0.5464,
"step": 40
},
{
"epoch": 0.48426150121065376,
"grad_norm": 0.741493469056709,
"learning_rate": 5e-06,
"loss": 0.5311,
"step": 50
},
{
"epoch": 0.5811138014527845,
"grad_norm": 0.7786501028026512,
"learning_rate": 5e-06,
"loss": 0.5181,
"step": 60
},
{
"epoch": 0.6779661016949152,
"grad_norm": 0.6711455522499474,
"learning_rate": 5e-06,
"loss": 0.5158,
"step": 70
},
{
"epoch": 0.774818401937046,
"grad_norm": 0.6218873174772125,
"learning_rate": 5e-06,
"loss": 0.5126,
"step": 80
},
{
"epoch": 0.8716707021791767,
"grad_norm": 0.6652769231147759,
"learning_rate": 5e-06,
"loss": 0.5105,
"step": 90
},
{
"epoch": 0.9685230024213075,
"grad_norm": 1.4354098566929865,
"learning_rate": 5e-06,
"loss": 0.5082,
"step": 100
},
{
"epoch": 0.9975786924939467,
"eval_loss": 0.49066221714019775,
"eval_runtime": 69.4631,
"eval_samples_per_second": 40.021,
"eval_steps_per_second": 0.633,
"step": 103
},
{
"epoch": 1.0653753026634383,
"grad_norm": 0.8075871605198771,
"learning_rate": 5e-06,
"loss": 0.5159,
"step": 110
},
{
"epoch": 1.162227602905569,
"grad_norm": 0.7654895903052866,
"learning_rate": 5e-06,
"loss": 0.4583,
"step": 120
},
{
"epoch": 1.2590799031476998,
"grad_norm": 0.47351004510337863,
"learning_rate": 5e-06,
"loss": 0.4586,
"step": 130
},
{
"epoch": 1.3559322033898304,
"grad_norm": 0.5062829494154636,
"learning_rate": 5e-06,
"loss": 0.4572,
"step": 140
},
{
"epoch": 1.4527845036319613,
"grad_norm": 0.6119092771725125,
"learning_rate": 5e-06,
"loss": 0.4544,
"step": 150
},
{
"epoch": 1.549636803874092,
"grad_norm": 0.6212058614890003,
"learning_rate": 5e-06,
"loss": 0.4561,
"step": 160
},
{
"epoch": 1.6464891041162226,
"grad_norm": 0.5105359500584984,
"learning_rate": 5e-06,
"loss": 0.4518,
"step": 170
},
{
"epoch": 1.7433414043583535,
"grad_norm": 0.5867880979483323,
"learning_rate": 5e-06,
"loss": 0.4551,
"step": 180
},
{
"epoch": 1.8401937046004844,
"grad_norm": 0.4498960324504211,
"learning_rate": 5e-06,
"loss": 0.454,
"step": 190
},
{
"epoch": 1.937046004842615,
"grad_norm": 0.5182866069406472,
"learning_rate": 5e-06,
"loss": 0.4499,
"step": 200
},
{
"epoch": 1.9951573849878934,
"eval_loss": 0.47824251651763916,
"eval_runtime": 71.5938,
"eval_samples_per_second": 38.83,
"eval_steps_per_second": 0.615,
"step": 206
},
{
"epoch": 2.0338983050847457,
"grad_norm": 0.9414090883543634,
"learning_rate": 5e-06,
"loss": 0.4671,
"step": 210
},
{
"epoch": 2.1307506053268765,
"grad_norm": 0.5171048417889069,
"learning_rate": 5e-06,
"loss": 0.4066,
"step": 220
},
{
"epoch": 2.2276029055690074,
"grad_norm": 0.5123629438372025,
"learning_rate": 5e-06,
"loss": 0.4113,
"step": 230
},
{
"epoch": 2.324455205811138,
"grad_norm": 0.5363285052863767,
"learning_rate": 5e-06,
"loss": 0.4081,
"step": 240
},
{
"epoch": 2.4213075060532687,
"grad_norm": 0.4907788960865576,
"learning_rate": 5e-06,
"loss": 0.407,
"step": 250
},
{
"epoch": 2.5181598062953996,
"grad_norm": 0.507228977380475,
"learning_rate": 5e-06,
"loss": 0.4051,
"step": 260
},
{
"epoch": 2.61501210653753,
"grad_norm": 0.4923140802099653,
"learning_rate": 5e-06,
"loss": 0.4109,
"step": 270
},
{
"epoch": 2.711864406779661,
"grad_norm": 0.5763086112386324,
"learning_rate": 5e-06,
"loss": 0.3986,
"step": 280
},
{
"epoch": 2.8087167070217918,
"grad_norm": 0.4788239568139877,
"learning_rate": 5e-06,
"loss": 0.4115,
"step": 290
},
{
"epoch": 2.9055690072639226,
"grad_norm": 0.5281993404834231,
"learning_rate": 5e-06,
"loss": 0.4124,
"step": 300
},
{
"epoch": 2.9927360774818403,
"eval_loss": 0.47956007719039917,
"eval_runtime": 68.3404,
"eval_samples_per_second": 40.679,
"eval_steps_per_second": 0.644,
"step": 309
},
{
"epoch": 2.9927360774818403,
"step": 309,
"total_flos": 517377129185280.0,
"train_loss": 0.4712127952513957,
"train_runtime": 10324.3717,
"train_samples_per_second": 15.347,
"train_steps_per_second": 0.03
}
],
"logging_steps": 10,
"max_steps": 309,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 517377129185280.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}