ai-forever's picture
Upload pipeline weights
6af51a0
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 9365,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.07,
"learning_rate": 3.932194340630005e-05,
"loss": 1.2388,
"step": 2000
},
{
"epoch": 1.07,
"eval_gen_len": 48.53731343283582,
"eval_loss": 1.1303935050964355,
"eval_rouge1": 66.5164,
"eval_rouge2": 54.1964,
"eval_rougeL": 60.1255,
"eval_rougeLsum": 62.6329,
"eval_runtime": 248.0848,
"eval_samples_per_second": 5.401,
"eval_steps_per_second": 0.27,
"step": 2000
},
{
"epoch": 2.14,
"learning_rate": 2.8643886812600106e-05,
"loss": 0.9911,
"step": 4000
},
{
"epoch": 2.14,
"eval_gen_len": 48.21940298507463,
"eval_loss": 1.0996825695037842,
"eval_rouge1": 66.7923,
"eval_rouge2": 54.4921,
"eval_rougeL": 60.3864,
"eval_rougeLsum": 63.0007,
"eval_runtime": 245.1086,
"eval_samples_per_second": 5.467,
"eval_steps_per_second": 0.273,
"step": 4000
},
{
"epoch": 3.2,
"learning_rate": 1.7965830218900163e-05,
"loss": 0.8473,
"step": 6000
},
{
"epoch": 3.2,
"eval_gen_len": 51.417164179104475,
"eval_loss": 1.1019301414489746,
"eval_rouge1": 68.1867,
"eval_rouge2": 55.4038,
"eval_rougeL": 61.5061,
"eval_rougeLsum": 64.2051,
"eval_runtime": 255.7993,
"eval_samples_per_second": 5.238,
"eval_steps_per_second": 0.262,
"step": 6000
},
{
"epoch": 4.27,
"learning_rate": 7.287773625200214e-06,
"loss": 0.7315,
"step": 8000
},
{
"epoch": 4.27,
"eval_gen_len": 50.42686567164179,
"eval_loss": 1.111816167831421,
"eval_rouge1": 67.961,
"eval_rouge2": 55.3863,
"eval_rougeL": 61.4022,
"eval_rougeLsum": 64.1305,
"eval_runtime": 261.9465,
"eval_samples_per_second": 5.116,
"eval_steps_per_second": 0.256,
"step": 8000
},
{
"epoch": 5.0,
"step": 9365,
"total_flos": 1.1949135422717952e+17,
"train_loss": 0.9121758847458122,
"train_runtime": 3599.8478,
"train_samples_per_second": 52.03,
"train_steps_per_second": 2.601
}
],
"max_steps": 9365,
"num_train_epochs": 5,
"total_flos": 1.1949135422717952e+17,
"trial_name": null,
"trial_params": null
}