llama_2_gsm8k_final_answer / trainer_state.json
CharlesLi's picture
Model save
094f0ac verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.6923076923076925,
"eval_steps": 5,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15384615384615385,
"grad_norm": 2.1649128036114615,
"learning_rate": 4e-05,
"loss": 3.1505,
"step": 1
},
{
"epoch": 0.7692307692307693,
"grad_norm": 1.4215155841281863,
"learning_rate": 0.0002,
"loss": 3.0231,
"step": 5
},
{
"epoch": 0.7692307692307693,
"eval_loss": 2.6061668395996094,
"eval_runtime": 2.6215,
"eval_samples_per_second": 7.629,
"eval_steps_per_second": 1.144,
"step": 5
},
{
"epoch": 1.5384615384615383,
"grad_norm": 0.651519916913547,
"learning_rate": 0.00019396926207859084,
"loss": 1.8692,
"step": 10
},
{
"epoch": 1.5384615384615383,
"eval_loss": 1.6896581649780273,
"eval_runtime": 1.3449,
"eval_samples_per_second": 14.871,
"eval_steps_per_second": 2.231,
"step": 10
},
{
"epoch": 2.3076923076923075,
"grad_norm": 1.9515671701626458,
"learning_rate": 0.0001766044443118978,
"loss": 1.3807,
"step": 15
},
{
"epoch": 2.3076923076923075,
"eval_loss": 1.45188570022583,
"eval_runtime": 1.3413,
"eval_samples_per_second": 14.911,
"eval_steps_per_second": 2.237,
"step": 15
},
{
"epoch": 3.076923076923077,
"grad_norm": 0.6039953656556071,
"learning_rate": 0.00015000000000000001,
"loss": 1.165,
"step": 20
},
{
"epoch": 3.076923076923077,
"eval_loss": 1.377772569656372,
"eval_runtime": 1.3458,
"eval_samples_per_second": 14.861,
"eval_steps_per_second": 2.229,
"step": 20
},
{
"epoch": 3.8461538461538463,
"grad_norm": 0.2836516448253057,
"learning_rate": 0.00011736481776669306,
"loss": 1.0943,
"step": 25
},
{
"epoch": 3.8461538461538463,
"eval_loss": 1.3182907104492188,
"eval_runtime": 1.3427,
"eval_samples_per_second": 14.895,
"eval_steps_per_second": 2.234,
"step": 25
},
{
"epoch": 4.615384615384615,
"grad_norm": 0.30847224546147556,
"learning_rate": 8.263518223330697e-05,
"loss": 1.0287,
"step": 30
},
{
"epoch": 4.615384615384615,
"eval_loss": 1.3018699884414673,
"eval_runtime": 1.3484,
"eval_samples_per_second": 14.832,
"eval_steps_per_second": 2.225,
"step": 30
},
{
"epoch": 5.384615384615385,
"grad_norm": 0.44903587891321267,
"learning_rate": 5.000000000000002e-05,
"loss": 0.9179,
"step": 35
},
{
"epoch": 5.384615384615385,
"eval_loss": 1.3123514652252197,
"eval_runtime": 1.3412,
"eval_samples_per_second": 14.912,
"eval_steps_per_second": 2.237,
"step": 35
},
{
"epoch": 6.153846153846154,
"grad_norm": 0.32136925605744115,
"learning_rate": 2.339555568810221e-05,
"loss": 0.9144,
"step": 40
},
{
"epoch": 6.153846153846154,
"eval_loss": 1.325203776359558,
"eval_runtime": 1.3435,
"eval_samples_per_second": 14.886,
"eval_steps_per_second": 2.233,
"step": 40
},
{
"epoch": 6.923076923076923,
"grad_norm": 0.4351422163226237,
"learning_rate": 6.030737921409169e-06,
"loss": 0.879,
"step": 45
},
{
"epoch": 6.923076923076923,
"eval_loss": 1.3339039087295532,
"eval_runtime": 1.3424,
"eval_samples_per_second": 14.898,
"eval_steps_per_second": 2.235,
"step": 45
},
{
"epoch": 7.6923076923076925,
"grad_norm": 0.3920332363997211,
"learning_rate": 0.0,
"loss": 0.8385,
"step": 50
},
{
"epoch": 7.6923076923076925,
"eval_loss": 1.3345052003860474,
"eval_runtime": 1.3405,
"eval_samples_per_second": 14.92,
"eval_steps_per_second": 2.238,
"step": 50
},
{
"epoch": 7.6923076923076925,
"step": 50,
"total_flos": 421095309312.0,
"train_loss": 1.3136224746704102,
"train_runtime": 178.8914,
"train_samples_per_second": 4.472,
"train_steps_per_second": 0.279
}
],
"logging_steps": 5,
"max_steps": 50,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 421095309312.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}