summarization-base-0 / trainer_state.json
apwic's picture
End of training
3c77706 verified
{
"best_metric": 0.4457,
"best_model_checkpoint": "bin/summarization-base-0/checkpoint-3566",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 17830,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 2.3418121337890625,
"learning_rate": 0.0008,
"loss": 1.5349,
"step": 3566
},
{
"epoch": 1.0,
"eval_gen_len": 1.0,
"eval_loss": 0.9113389849662781,
"eval_rouge1": 0.4457,
"eval_rouge2": 0.0,
"eval_rougeL": 0.4447,
"eval_rougeLsum": 0.4422,
"eval_runtime": 633.9025,
"eval_samples_per_second": 1.183,
"eval_steps_per_second": 0.148,
"step": 3566
},
{
"epoch": 2.0,
"grad_norm": 1.0075907707214355,
"learning_rate": 0.0006,
"loss": 0.8222,
"step": 7132
},
{
"epoch": 2.0,
"eval_gen_len": 1.0,
"eval_loss": 0.7657843828201294,
"eval_rouge1": 0.4293,
"eval_rouge2": 0.0,
"eval_rougeL": 0.4263,
"eval_rougeLsum": 0.4253,
"eval_runtime": 631.9317,
"eval_samples_per_second": 1.187,
"eval_steps_per_second": 0.149,
"step": 7132
},
{
"epoch": 3.0,
"grad_norm": 0.6747327446937561,
"learning_rate": 0.0004,
"loss": 0.6231,
"step": 10698
},
{
"epoch": 3.0,
"eval_gen_len": 1.0,
"eval_loss": 0.6905989050865173,
"eval_rouge1": 0.4208,
"eval_rouge2": 0.0,
"eval_rougeL": 0.417,
"eval_rougeLsum": 0.415,
"eval_runtime": 620.388,
"eval_samples_per_second": 1.209,
"eval_steps_per_second": 0.152,
"step": 10698
},
{
"epoch": 4.0,
"grad_norm": 0.5738873481750488,
"learning_rate": 0.0002,
"loss": 0.4701,
"step": 14264
},
{
"epoch": 4.0,
"eval_gen_len": 1.0,
"eval_loss": 0.6816205978393555,
"eval_rouge1": 0.4158,
"eval_rouge2": 0.0,
"eval_rougeL": 0.4113,
"eval_rougeLsum": 0.41,
"eval_runtime": 613.0796,
"eval_samples_per_second": 1.223,
"eval_steps_per_second": 0.153,
"step": 14264
},
{
"epoch": 5.0,
"grad_norm": 1.3324648141860962,
"learning_rate": 0.0,
"loss": 0.3217,
"step": 17830
},
{
"epoch": 5.0,
"eval_gen_len": 1.0,
"eval_loss": 0.7147426009178162,
"eval_rouge1": 0.4109,
"eval_rouge2": 0.0,
"eval_rougeL": 0.4081,
"eval_rougeLsum": 0.4066,
"eval_runtime": 610.9308,
"eval_samples_per_second": 1.228,
"eval_steps_per_second": 0.154,
"step": 17830
},
{
"epoch": 5.0,
"step": 17830,
"total_flos": 4.883004680306688e+16,
"train_loss": 0.7543908821486784,
"train_runtime": 12843.575,
"train_samples_per_second": 5.552,
"train_steps_per_second": 1.388
}
],
"logging_steps": 500,
"max_steps": 17830,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 4.883004680306688e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}