File size: 2,549 Bytes
6af51a0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | {
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 9365,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.07,
"learning_rate": 3.932194340630005e-05,
"loss": 1.2388,
"step": 2000
},
{
"epoch": 1.07,
"eval_gen_len": 48.53731343283582,
"eval_loss": 1.1303935050964355,
"eval_rouge1": 66.5164,
"eval_rouge2": 54.1964,
"eval_rougeL": 60.1255,
"eval_rougeLsum": 62.6329,
"eval_runtime": 248.0848,
"eval_samples_per_second": 5.401,
"eval_steps_per_second": 0.27,
"step": 2000
},
{
"epoch": 2.14,
"learning_rate": 2.8643886812600106e-05,
"loss": 0.9911,
"step": 4000
},
{
"epoch": 2.14,
"eval_gen_len": 48.21940298507463,
"eval_loss": 1.0996825695037842,
"eval_rouge1": 66.7923,
"eval_rouge2": 54.4921,
"eval_rougeL": 60.3864,
"eval_rougeLsum": 63.0007,
"eval_runtime": 245.1086,
"eval_samples_per_second": 5.467,
"eval_steps_per_second": 0.273,
"step": 4000
},
{
"epoch": 3.2,
"learning_rate": 1.7965830218900163e-05,
"loss": 0.8473,
"step": 6000
},
{
"epoch": 3.2,
"eval_gen_len": 51.417164179104475,
"eval_loss": 1.1019301414489746,
"eval_rouge1": 68.1867,
"eval_rouge2": 55.4038,
"eval_rougeL": 61.5061,
"eval_rougeLsum": 64.2051,
"eval_runtime": 255.7993,
"eval_samples_per_second": 5.238,
"eval_steps_per_second": 0.262,
"step": 6000
},
{
"epoch": 4.27,
"learning_rate": 7.287773625200214e-06,
"loss": 0.7315,
"step": 8000
},
{
"epoch": 4.27,
"eval_gen_len": 50.42686567164179,
"eval_loss": 1.111816167831421,
"eval_rouge1": 67.961,
"eval_rouge2": 55.3863,
"eval_rougeL": 61.4022,
"eval_rougeLsum": 64.1305,
"eval_runtime": 261.9465,
"eval_samples_per_second": 5.116,
"eval_steps_per_second": 0.256,
"step": 8000
},
{
"epoch": 5.0,
"step": 9365,
"total_flos": 1.1949135422717952e+17,
"train_loss": 0.9121758847458122,
"train_runtime": 3599.8478,
"train_samples_per_second": 52.03,
"train_steps_per_second": 2.601
}
],
"max_steps": 9365,
"num_train_epochs": 5,
"total_flos": 1.1949135422717952e+17,
"trial_name": null,
"trial_params": null
}
|