bart-base-paraphrasing-review / trainer_state.json
NICFRU's picture
Upload 11 files (#1)
0f7969e
{
"best_metric": 54.2663,
"best_model_checkpoint": "drive/MyDrive/Models/bart-base-paraphrasing-Review/checkpoint-200",
"epoch": 20.0,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 3.9680000000000006e-05,
"loss": 1.5002,
"step": 10
},
{
"epoch": 0.2,
"eval_gen_len": 19.33,
"eval_loss": 0.5407444834709167,
"eval_rouge1": 51.4943,
"eval_rouge2": 46.3962,
"eval_rougeL": 48.4646,
"eval_rougeLsum": 50.6928,
"eval_runtime": 6.8406,
"eval_samples_per_second": 14.619,
"eval_steps_per_second": 1.9,
"step": 10
},
{
"epoch": 0.4,
"learning_rate": 3.9280000000000003e-05,
"loss": 0.7721,
"step": 20
},
{
"epoch": 0.4,
"eval_gen_len": 19.82,
"eval_loss": 0.46091121435165405,
"eval_rouge1": 52.542,
"eval_rouge2": 47.6572,
"eval_rougeL": 49.0919,
"eval_rougeLsum": 51.8604,
"eval_runtime": 8.2487,
"eval_samples_per_second": 12.123,
"eval_steps_per_second": 1.576,
"step": 20
},
{
"epoch": 0.6,
"learning_rate": 3.888e-05,
"loss": 0.6535,
"step": 30
},
{
"epoch": 0.6,
"eval_gen_len": 19.83,
"eval_loss": 0.462110698223114,
"eval_rouge1": 51.4575,
"eval_rouge2": 45.802,
"eval_rougeL": 48.4395,
"eval_rougeLsum": 50.6411,
"eval_runtime": 8.2208,
"eval_samples_per_second": 12.164,
"eval_steps_per_second": 1.581,
"step": 30
},
{
"epoch": 0.8,
"learning_rate": 3.8480000000000004e-05,
"loss": 0.6574,
"step": 40
},
{
"epoch": 0.8,
"eval_gen_len": 19.92,
"eval_loss": 0.4107917547225952,
"eval_rouge1": 53.4288,
"eval_rouge2": 48.0337,
"eval_rougeL": 49.8305,
"eval_rougeLsum": 52.5636,
"eval_runtime": 7.434,
"eval_samples_per_second": 13.452,
"eval_steps_per_second": 1.749,
"step": 40
},
{
"epoch": 1.0,
"learning_rate": 3.808e-05,
"loss": 0.6373,
"step": 50
},
{
"epoch": 1.0,
"eval_gen_len": 19.6,
"eval_loss": 0.417746365070343,
"eval_rouge1": 52.4223,
"eval_rouge2": 47.4174,
"eval_rougeL": 49.3609,
"eval_rougeLsum": 51.6379,
"eval_runtime": 8.0886,
"eval_samples_per_second": 12.363,
"eval_steps_per_second": 1.607,
"step": 50
},
{
"epoch": 1.2,
"learning_rate": 3.768e-05,
"loss": 0.4648,
"step": 60
},
{
"epoch": 1.2,
"eval_gen_len": 19.78,
"eval_loss": 0.4121185541152954,
"eval_rouge1": 54.2574,
"eval_rouge2": 49.4654,
"eval_rougeL": 52.0245,
"eval_rougeLsum": 53.4168,
"eval_runtime": 6.7135,
"eval_samples_per_second": 14.895,
"eval_steps_per_second": 1.936,
"step": 60
},
{
"epoch": 1.4,
"learning_rate": 3.728e-05,
"loss": 0.4845,
"step": 70
},
{
"epoch": 1.4,
"eval_gen_len": 19.81,
"eval_loss": 0.3925768733024597,
"eval_rouge1": 53.6404,
"eval_rouge2": 48.6113,
"eval_rougeL": 50.7083,
"eval_rougeLsum": 52.7981,
"eval_runtime": 8.3261,
"eval_samples_per_second": 12.01,
"eval_steps_per_second": 1.561,
"step": 70
},
{
"epoch": 1.6,
"learning_rate": 3.6880000000000006e-05,
"loss": 0.384,
"step": 80
},
{
"epoch": 1.6,
"eval_gen_len": 19.86,
"eval_loss": 0.4059392511844635,
"eval_rouge1": 53.3219,
"eval_rouge2": 48.4794,
"eval_rougeL": 51.0572,
"eval_rougeLsum": 52.5149,
"eval_runtime": 6.9029,
"eval_samples_per_second": 14.487,
"eval_steps_per_second": 1.883,
"step": 80
},
{
"epoch": 1.8,
"learning_rate": 3.648e-05,
"loss": 0.424,
"step": 90
},
{
"epoch": 1.8,
"eval_gen_len": 19.86,
"eval_loss": 0.40950503945350647,
"eval_rouge1": 53.46,
"eval_rouge2": 48.5606,
"eval_rougeL": 50.3645,
"eval_rougeLsum": 52.6534,
"eval_runtime": 7.8719,
"eval_samples_per_second": 12.703,
"eval_steps_per_second": 1.651,
"step": 90
},
{
"epoch": 2.0,
"learning_rate": 3.608000000000001e-05,
"loss": 0.5296,
"step": 100
},
{
"epoch": 2.0,
"eval_gen_len": 19.82,
"eval_loss": 0.3766419291496277,
"eval_rouge1": 53.4984,
"eval_rouge2": 48.5905,
"eval_rougeL": 51.3471,
"eval_rougeLsum": 52.7547,
"eval_runtime": 9.149,
"eval_samples_per_second": 10.93,
"eval_steps_per_second": 1.421,
"step": 100
},
{
"epoch": 2.2,
"learning_rate": 3.5680000000000004e-05,
"loss": 0.3373,
"step": 110
},
{
"epoch": 2.2,
"eval_gen_len": 19.73,
"eval_loss": 0.38662123680114746,
"eval_rouge1": 54.0079,
"eval_rouge2": 49.2467,
"eval_rougeL": 51.8215,
"eval_rougeLsum": 53.1045,
"eval_runtime": 9.6895,
"eval_samples_per_second": 10.32,
"eval_steps_per_second": 1.342,
"step": 110
},
{
"epoch": 2.4,
"learning_rate": 3.528e-05,
"loss": 0.3547,
"step": 120
},
{
"epoch": 2.4,
"eval_gen_len": 19.92,
"eval_loss": 0.4074762761592865,
"eval_rouge1": 53.6412,
"eval_rouge2": 48.7051,
"eval_rougeL": 50.9648,
"eval_rougeLsum": 52.9864,
"eval_runtime": 10.9749,
"eval_samples_per_second": 9.112,
"eval_steps_per_second": 1.185,
"step": 120
},
{
"epoch": 2.6,
"learning_rate": 3.4880000000000005e-05,
"loss": 0.3487,
"step": 130
},
{
"epoch": 2.6,
"eval_gen_len": 19.8,
"eval_loss": 0.3859531283378601,
"eval_rouge1": 52.9323,
"eval_rouge2": 47.6675,
"eval_rougeL": 49.9928,
"eval_rougeLsum": 52.1444,
"eval_runtime": 7.7041,
"eval_samples_per_second": 12.98,
"eval_steps_per_second": 1.687,
"step": 130
},
{
"epoch": 2.8,
"learning_rate": 3.448e-05,
"loss": 0.3698,
"step": 140
},
{
"epoch": 2.8,
"eval_gen_len": 19.95,
"eval_loss": 0.38846832513809204,
"eval_rouge1": 53.839,
"eval_rouge2": 48.7036,
"eval_rougeL": 50.9286,
"eval_rougeLsum": 52.9169,
"eval_runtime": 7.8099,
"eval_samples_per_second": 12.804,
"eval_steps_per_second": 1.665,
"step": 140
},
{
"epoch": 3.0,
"learning_rate": 3.408e-05,
"loss": 0.3611,
"step": 150
},
{
"epoch": 3.0,
"eval_gen_len": 19.9,
"eval_loss": 0.3930496275424957,
"eval_rouge1": 53.5329,
"eval_rouge2": 48.8497,
"eval_rougeL": 50.7808,
"eval_rougeLsum": 52.8185,
"eval_runtime": 7.4198,
"eval_samples_per_second": 13.477,
"eval_steps_per_second": 1.752,
"step": 150
},
{
"epoch": 3.2,
"learning_rate": 3.368e-05,
"loss": 0.2646,
"step": 160
},
{
"epoch": 3.2,
"eval_gen_len": 19.72,
"eval_loss": 0.3976023197174072,
"eval_rouge1": 52.8705,
"eval_rouge2": 47.9654,
"eval_rougeL": 50.1419,
"eval_rougeLsum": 52.2498,
"eval_runtime": 7.0404,
"eval_samples_per_second": 14.204,
"eval_steps_per_second": 1.846,
"step": 160
},
{
"epoch": 3.4,
"learning_rate": 3.328e-05,
"loss": 0.247,
"step": 170
},
{
"epoch": 3.4,
"eval_gen_len": 19.83,
"eval_loss": 0.42816296219825745,
"eval_rouge1": 53.8543,
"eval_rouge2": 48.9547,
"eval_rougeL": 50.8772,
"eval_rougeLsum": 53.0736,
"eval_runtime": 8.0332,
"eval_samples_per_second": 12.448,
"eval_steps_per_second": 1.618,
"step": 170
},
{
"epoch": 3.6,
"learning_rate": 3.2880000000000004e-05,
"loss": 0.3138,
"step": 180
},
{
"epoch": 3.6,
"eval_gen_len": 19.93,
"eval_loss": 0.4122447669506073,
"eval_rouge1": 53.352,
"eval_rouge2": 48.3843,
"eval_rougeL": 50.8596,
"eval_rougeLsum": 52.4134,
"eval_runtime": 6.6776,
"eval_samples_per_second": 14.975,
"eval_steps_per_second": 1.947,
"step": 180
},
{
"epoch": 3.8,
"learning_rate": 3.248000000000001e-05,
"loss": 0.2651,
"step": 190
},
{
"epoch": 3.8,
"eval_gen_len": 19.92,
"eval_loss": 0.4259437322616577,
"eval_rouge1": 54.1516,
"eval_rouge2": 49.344,
"eval_rougeL": 51.9955,
"eval_rougeLsum": 53.4336,
"eval_runtime": 8.5086,
"eval_samples_per_second": 11.753,
"eval_steps_per_second": 1.528,
"step": 190
},
{
"epoch": 4.0,
"learning_rate": 3.2080000000000005e-05,
"loss": 0.2809,
"step": 200
},
{
"epoch": 4.0,
"eval_gen_len": 19.92,
"eval_loss": 0.4072933495044708,
"eval_rouge1": 54.2663,
"eval_rouge2": 49.3906,
"eval_rougeL": 51.9168,
"eval_rougeLsum": 53.408,
"eval_runtime": 6.638,
"eval_samples_per_second": 15.065,
"eval_steps_per_second": 1.958,
"step": 200
},
{
"epoch": 4.2,
"learning_rate": 3.168e-05,
"loss": 0.1995,
"step": 210
},
{
"epoch": 4.2,
"eval_gen_len": 19.93,
"eval_loss": 0.43279576301574707,
"eval_rouge1": 52.9244,
"eval_rouge2": 47.8818,
"eval_rougeL": 50.3287,
"eval_rougeLsum": 52.0486,
"eval_runtime": 11.2433,
"eval_samples_per_second": 8.894,
"eval_steps_per_second": 1.156,
"step": 210
},
{
"epoch": 4.4,
"learning_rate": 3.1280000000000005e-05,
"loss": 0.2221,
"step": 220
},
{
"epoch": 4.4,
"eval_gen_len": 19.92,
"eval_loss": 0.4510229527950287,
"eval_rouge1": 53.2115,
"eval_rouge2": 47.7244,
"eval_rougeL": 50.3561,
"eval_rougeLsum": 52.3876,
"eval_runtime": 9.8917,
"eval_samples_per_second": 10.11,
"eval_steps_per_second": 1.314,
"step": 220
},
{
"epoch": 4.6,
"learning_rate": 3.088e-05,
"loss": 0.198,
"step": 230
},
{
"epoch": 4.6,
"eval_gen_len": 19.92,
"eval_loss": 0.46202754974365234,
"eval_rouge1": 53.6401,
"eval_rouge2": 48.3161,
"eval_rougeL": 50.5124,
"eval_rougeLsum": 52.809,
"eval_runtime": 6.6888,
"eval_samples_per_second": 14.95,
"eval_steps_per_second": 1.944,
"step": 230
},
{
"epoch": 4.8,
"learning_rate": 3.0520000000000006e-05,
"loss": 0.2395,
"step": 240
},
{
"epoch": 4.8,
"eval_gen_len": 19.9,
"eval_loss": 0.42543941736221313,
"eval_rouge1": 52.8133,
"eval_rouge2": 47.1914,
"eval_rougeL": 49.4615,
"eval_rougeLsum": 52.0112,
"eval_runtime": 8.3162,
"eval_samples_per_second": 12.025,
"eval_steps_per_second": 1.563,
"step": 240
},
{
"epoch": 5.0,
"learning_rate": 3.0120000000000003e-05,
"loss": 0.1948,
"step": 250
},
{
"epoch": 5.0,
"eval_gen_len": 19.81,
"eval_loss": 0.4100102186203003,
"eval_rouge1": 54.5088,
"eval_rouge2": 49.4265,
"eval_rougeL": 51.1343,
"eval_rougeLsum": 53.6923,
"eval_runtime": 6.6101,
"eval_samples_per_second": 15.128,
"eval_steps_per_second": 1.967,
"step": 250
},
{
"epoch": 5.2,
"learning_rate": 2.9720000000000003e-05,
"loss": 0.1519,
"step": 260
},
{
"epoch": 5.2,
"eval_gen_len": 19.77,
"eval_loss": 0.4444543421268463,
"eval_rouge1": 52.762,
"eval_rouge2": 47.5185,
"eval_rougeL": 49.8893,
"eval_rougeLsum": 51.9623,
"eval_runtime": 8.1595,
"eval_samples_per_second": 12.256,
"eval_steps_per_second": 1.593,
"step": 260
},
{
"epoch": 5.4,
"learning_rate": 2.932e-05,
"loss": 0.1982,
"step": 270
},
{
"epoch": 5.4,
"eval_gen_len": 19.93,
"eval_loss": 0.4569690227508545,
"eval_rouge1": 53.5689,
"eval_rouge2": 48.2123,
"eval_rougeL": 50.6624,
"eval_rougeLsum": 52.7258,
"eval_runtime": 7.021,
"eval_samples_per_second": 14.243,
"eval_steps_per_second": 1.852,
"step": 270
},
{
"epoch": 5.6,
"learning_rate": 2.892e-05,
"loss": 0.1715,
"step": 280
},
{
"epoch": 5.6,
"eval_gen_len": 19.88,
"eval_loss": 0.4624459445476532,
"eval_rouge1": 52.8072,
"eval_rouge2": 47.7219,
"eval_rougeL": 50.2858,
"eval_rougeLsum": 52.1116,
"eval_runtime": 7.4965,
"eval_samples_per_second": 13.34,
"eval_steps_per_second": 1.734,
"step": 280
},
{
"epoch": 5.8,
"learning_rate": 2.852e-05,
"loss": 0.2086,
"step": 290
},
{
"epoch": 5.8,
"eval_gen_len": 19.89,
"eval_loss": 0.42807042598724365,
"eval_rouge1": 53.7821,
"eval_rouge2": 48.5423,
"eval_rougeL": 51.0908,
"eval_rougeLsum": 53.0218,
"eval_runtime": 7.7637,
"eval_samples_per_second": 12.88,
"eval_steps_per_second": 1.674,
"step": 290
},
{
"epoch": 6.0,
"learning_rate": 2.8120000000000002e-05,
"loss": 0.1586,
"step": 300
},
{
"epoch": 6.0,
"eval_gen_len": 19.9,
"eval_loss": 0.46185803413391113,
"eval_rouge1": 54.0695,
"eval_rouge2": 49.2271,
"eval_rougeL": 51.3253,
"eval_rougeLsum": 53.3106,
"eval_runtime": 6.9257,
"eval_samples_per_second": 14.439,
"eval_steps_per_second": 1.877,
"step": 300
},
{
"epoch": 6.2,
"learning_rate": 2.772e-05,
"loss": 0.1397,
"step": 310
},
{
"epoch": 6.2,
"eval_gen_len": 19.7,
"eval_loss": 0.47624072432518005,
"eval_rouge1": 52.9886,
"eval_rouge2": 48.0092,
"eval_rougeL": 50.6223,
"eval_rougeLsum": 52.1953,
"eval_runtime": 7.6624,
"eval_samples_per_second": 13.051,
"eval_steps_per_second": 1.697,
"step": 310
},
{
"epoch": 6.4,
"learning_rate": 2.7320000000000003e-05,
"loss": 0.1359,
"step": 320
},
{
"epoch": 6.4,
"eval_gen_len": 19.92,
"eval_loss": 0.5075673460960388,
"eval_rouge1": 55.1074,
"eval_rouge2": 50.6647,
"eval_rougeL": 53.0524,
"eval_rougeLsum": 54.4127,
"eval_runtime": 10.6561,
"eval_samples_per_second": 9.384,
"eval_steps_per_second": 1.22,
"step": 320
},
{
"epoch": 6.6,
"learning_rate": 2.6920000000000003e-05,
"loss": 0.1533,
"step": 330
},
{
"epoch": 6.6,
"eval_gen_len": 19.92,
"eval_loss": 0.4753476679325104,
"eval_rouge1": 53.9777,
"eval_rouge2": 49.1125,
"eval_rougeL": 51.7324,
"eval_rougeLsum": 53.2685,
"eval_runtime": 8.3237,
"eval_samples_per_second": 12.014,
"eval_steps_per_second": 1.562,
"step": 330
},
{
"epoch": 6.8,
"learning_rate": 2.6520000000000004e-05,
"loss": 0.1231,
"step": 340
},
{
"epoch": 6.8,
"eval_gen_len": 19.75,
"eval_loss": 0.4629780650138855,
"eval_rouge1": 52.8367,
"eval_rouge2": 47.6698,
"eval_rougeL": 50.0559,
"eval_rougeLsum": 51.986,
"eval_runtime": 6.6174,
"eval_samples_per_second": 15.112,
"eval_steps_per_second": 1.965,
"step": 340
},
{
"epoch": 7.0,
"learning_rate": 2.6120000000000004e-05,
"loss": 0.166,
"step": 350
},
{
"epoch": 7.0,
"eval_gen_len": 19.67,
"eval_loss": 0.4622231423854828,
"eval_rouge1": 53.2509,
"eval_rouge2": 48.2235,
"eval_rougeL": 50.8908,
"eval_rougeLsum": 52.3717,
"eval_runtime": 8.1116,
"eval_samples_per_second": 12.328,
"eval_steps_per_second": 1.603,
"step": 350
},
{
"epoch": 7.2,
"learning_rate": 2.572e-05,
"loss": 0.1042,
"step": 360
},
{
"epoch": 7.2,
"eval_gen_len": 19.86,
"eval_loss": 0.4717084467411041,
"eval_rouge1": 53.4468,
"eval_rouge2": 48.3219,
"eval_rougeL": 51.074,
"eval_rougeLsum": 52.5822,
"eval_runtime": 7.0111,
"eval_samples_per_second": 14.263,
"eval_steps_per_second": 1.854,
"step": 360
},
{
"epoch": 7.4,
"learning_rate": 2.532e-05,
"loss": 0.0942,
"step": 370
},
{
"epoch": 7.4,
"eval_gen_len": 19.92,
"eval_loss": 0.5130577683448792,
"eval_rouge1": 53.5099,
"eval_rouge2": 48.6401,
"eval_rougeL": 51.348,
"eval_rougeLsum": 52.7033,
"eval_runtime": 7.5081,
"eval_samples_per_second": 13.319,
"eval_steps_per_second": 1.731,
"step": 370
},
{
"epoch": 7.6,
"learning_rate": 2.4920000000000002e-05,
"loss": 0.108,
"step": 380
},
{
"epoch": 7.6,
"eval_gen_len": 19.92,
"eval_loss": 0.4888118803501129,
"eval_rouge1": 53.9399,
"eval_rouge2": 48.9895,
"eval_rougeL": 51.4691,
"eval_rougeLsum": 53.0923,
"eval_runtime": 7.6096,
"eval_samples_per_second": 13.141,
"eval_steps_per_second": 1.708,
"step": 380
},
{
"epoch": 7.8,
"learning_rate": 2.4520000000000002e-05,
"loss": 0.396,
"step": 390
},
{
"epoch": 7.8,
"eval_gen_len": 19.79,
"eval_loss": 0.48315200209617615,
"eval_rouge1": 52.1465,
"eval_rouge2": 46.8191,
"eval_rougeL": 49.6385,
"eval_rougeLsum": 51.3173,
"eval_runtime": 6.5648,
"eval_samples_per_second": 15.233,
"eval_steps_per_second": 1.98,
"step": 390
},
{
"epoch": 8.0,
"learning_rate": 2.4120000000000003e-05,
"loss": 0.3167,
"step": 400
},
{
"epoch": 8.0,
"eval_gen_len": 19.93,
"eval_loss": 0.505258321762085,
"eval_rouge1": 52.4445,
"eval_rouge2": 47.0657,
"eval_rougeL": 49.9123,
"eval_rougeLsum": 51.5409,
"eval_runtime": 8.223,
"eval_samples_per_second": 12.161,
"eval_steps_per_second": 1.581,
"step": 400
},
{
"epoch": 8.2,
"learning_rate": 2.372e-05,
"loss": 0.0926,
"step": 410
},
{
"epoch": 8.2,
"eval_gen_len": 19.91,
"eval_loss": 0.491664320230484,
"eval_rouge1": 53.9381,
"eval_rouge2": 49.1002,
"eval_rougeL": 51.6393,
"eval_rougeLsum": 53.07,
"eval_runtime": 7.2939,
"eval_samples_per_second": 13.71,
"eval_steps_per_second": 1.782,
"step": 410
},
{
"epoch": 8.4,
"learning_rate": 2.332e-05,
"loss": 0.1143,
"step": 420
},
{
"epoch": 8.4,
"eval_gen_len": 19.86,
"eval_loss": 0.4843844175338745,
"eval_rouge1": 51.9319,
"eval_rouge2": 46.4005,
"eval_rougeL": 49.5382,
"eval_rougeLsum": 51.0042,
"eval_runtime": 12.7186,
"eval_samples_per_second": 7.863,
"eval_steps_per_second": 1.022,
"step": 420
},
{
"epoch": 8.6,
"learning_rate": 2.292e-05,
"loss": 0.0852,
"step": 430
},
{
"epoch": 8.6,
"eval_gen_len": 19.89,
"eval_loss": 0.5236030220985413,
"eval_rouge1": 53.0753,
"eval_rouge2": 47.999,
"eval_rougeL": 50.8066,
"eval_rougeLsum": 52.1535,
"eval_runtime": 6.9023,
"eval_samples_per_second": 14.488,
"eval_steps_per_second": 1.883,
"step": 430
},
{
"epoch": 8.8,
"learning_rate": 2.252e-05,
"loss": 0.0894,
"step": 440
},
{
"epoch": 8.8,
"eval_gen_len": 19.83,
"eval_loss": 0.5296807289123535,
"eval_rouge1": 52.3528,
"eval_rouge2": 47.0573,
"eval_rougeL": 49.9171,
"eval_rougeLsum": 51.4175,
"eval_runtime": 7.6163,
"eval_samples_per_second": 13.13,
"eval_steps_per_second": 1.707,
"step": 440
},
{
"epoch": 9.0,
"learning_rate": 2.2120000000000005e-05,
"loss": 0.092,
"step": 450
},
{
"epoch": 9.0,
"eval_gen_len": 19.79,
"eval_loss": 0.545309841632843,
"eval_rouge1": 52.9402,
"eval_rouge2": 47.6937,
"eval_rougeL": 50.4167,
"eval_rougeLsum": 52.0807,
"eval_runtime": 7.5068,
"eval_samples_per_second": 13.321,
"eval_steps_per_second": 1.732,
"step": 450
},
{
"epoch": 9.2,
"learning_rate": 2.1720000000000002e-05,
"loss": 0.0837,
"step": 460
},
{
"epoch": 9.2,
"eval_gen_len": 19.92,
"eval_loss": 0.5535491108894348,
"eval_rouge1": 53.2688,
"eval_rouge2": 48.3391,
"eval_rougeL": 50.8943,
"eval_rougeLsum": 52.5012,
"eval_runtime": 6.8049,
"eval_samples_per_second": 14.695,
"eval_steps_per_second": 1.91,
"step": 460
},
{
"epoch": 9.4,
"learning_rate": 2.1320000000000003e-05,
"loss": 0.0753,
"step": 470
},
{
"epoch": 9.4,
"eval_gen_len": 19.88,
"eval_loss": 0.5247593522071838,
"eval_rouge1": 53.1878,
"eval_rouge2": 47.9215,
"eval_rougeL": 50.2844,
"eval_rougeLsum": 52.072,
"eval_runtime": 8.2937,
"eval_samples_per_second": 12.057,
"eval_steps_per_second": 1.567,
"step": 470
},
{
"epoch": 9.6,
"learning_rate": 2.0920000000000003e-05,
"loss": 0.0861,
"step": 480
},
{
"epoch": 9.6,
"eval_gen_len": 19.87,
"eval_loss": 0.5427589416503906,
"eval_rouge1": 53.4103,
"eval_rouge2": 48.1705,
"eval_rougeL": 49.9195,
"eval_rougeLsum": 52.3346,
"eval_runtime": 6.5323,
"eval_samples_per_second": 15.309,
"eval_steps_per_second": 1.99,
"step": 480
},
{
"epoch": 9.8,
"learning_rate": 2.0520000000000003e-05,
"loss": 0.0868,
"step": 490
},
{
"epoch": 9.8,
"eval_gen_len": 19.88,
"eval_loss": 0.5506805181503296,
"eval_rouge1": 52.9177,
"eval_rouge2": 47.3222,
"eval_rougeL": 49.5766,
"eval_rougeLsum": 51.9415,
"eval_runtime": 8.204,
"eval_samples_per_second": 12.189,
"eval_steps_per_second": 1.585,
"step": 490
},
{
"epoch": 10.0,
"learning_rate": 2.012e-05,
"loss": 0.0956,
"step": 500
},
{
"epoch": 10.0,
"eval_gen_len": 19.84,
"eval_loss": 0.5506177544593811,
"eval_rouge1": 53.8554,
"eval_rouge2": 48.6792,
"eval_rougeL": 51.0507,
"eval_rougeLsum": 53.0633,
"eval_runtime": 6.7592,
"eval_samples_per_second": 14.795,
"eval_steps_per_second": 1.923,
"step": 500
},
{
"epoch": 10.2,
"learning_rate": 1.972e-05,
"loss": 0.063,
"step": 510
},
{
"epoch": 10.2,
"eval_gen_len": 19.92,
"eval_loss": 0.5823889374732971,
"eval_rouge1": 53.384,
"eval_rouge2": 48.0079,
"eval_rougeL": 50.8212,
"eval_rougeLsum": 52.4155,
"eval_runtime": 7.9793,
"eval_samples_per_second": 12.532,
"eval_steps_per_second": 1.629,
"step": 510
},
{
"epoch": 10.4,
"learning_rate": 1.932e-05,
"loss": 0.072,
"step": 520
},
{
"epoch": 10.4,
"eval_gen_len": 19.81,
"eval_loss": 0.5638877749443054,
"eval_rouge1": 53.1868,
"eval_rouge2": 48.0509,
"eval_rougeL": 50.6999,
"eval_rougeLsum": 52.3177,
"eval_runtime": 6.4643,
"eval_samples_per_second": 15.47,
"eval_steps_per_second": 2.011,
"step": 520
},
{
"epoch": 10.6,
"learning_rate": 1.8920000000000002e-05,
"loss": 0.0753,
"step": 530
},
{
"epoch": 10.6,
"eval_gen_len": 19.9,
"eval_loss": 0.5847346782684326,
"eval_rouge1": 53.9754,
"eval_rouge2": 48.7875,
"eval_rougeL": 50.5908,
"eval_rougeLsum": 53.0499,
"eval_runtime": 8.2095,
"eval_samples_per_second": 12.181,
"eval_steps_per_second": 1.584,
"step": 530
},
{
"epoch": 10.8,
"learning_rate": 1.8520000000000002e-05,
"loss": 0.0832,
"step": 540
},
{
"epoch": 10.8,
"eval_gen_len": 19.91,
"eval_loss": 0.5566834807395935,
"eval_rouge1": 52.2617,
"eval_rouge2": 46.765,
"eval_rougeL": 48.9661,
"eval_rougeLsum": 51.2406,
"eval_runtime": 6.5735,
"eval_samples_per_second": 15.213,
"eval_steps_per_second": 1.978,
"step": 540
},
{
"epoch": 11.0,
"learning_rate": 1.8120000000000003e-05,
"loss": 0.0848,
"step": 550
},
{
"epoch": 11.0,
"eval_gen_len": 19.89,
"eval_loss": 0.5314372181892395,
"eval_rouge1": 53.1077,
"eval_rouge2": 47.9591,
"eval_rougeL": 50.2941,
"eval_rougeLsum": 52.1444,
"eval_runtime": 8.0665,
"eval_samples_per_second": 12.397,
"eval_steps_per_second": 1.612,
"step": 550
},
{
"epoch": 11.2,
"learning_rate": 1.7720000000000003e-05,
"loss": 0.059,
"step": 560
},
{
"epoch": 11.2,
"eval_gen_len": 19.88,
"eval_loss": 0.5482513904571533,
"eval_rouge1": 54.0563,
"eval_rouge2": 49.0141,
"eval_rougeL": 50.7017,
"eval_rougeLsum": 53.0877,
"eval_runtime": 7.1824,
"eval_samples_per_second": 13.923,
"eval_steps_per_second": 1.81,
"step": 560
},
{
"epoch": 11.4,
"learning_rate": 1.732e-05,
"loss": 0.072,
"step": 570
},
{
"epoch": 11.4,
"eval_gen_len": 19.9,
"eval_loss": 0.5527663826942444,
"eval_rouge1": 53.7555,
"eval_rouge2": 48.6013,
"eval_rougeL": 50.0226,
"eval_rougeLsum": 52.6843,
"eval_runtime": 7.3833,
"eval_samples_per_second": 13.544,
"eval_steps_per_second": 1.761,
"step": 570
},
{
"epoch": 11.6,
"learning_rate": 1.692e-05,
"loss": 0.0537,
"step": 580
},
{
"epoch": 11.6,
"eval_gen_len": 19.93,
"eval_loss": 0.5718952417373657,
"eval_rouge1": 53.3446,
"eval_rouge2": 48.156,
"eval_rougeL": 50.5391,
"eval_rougeLsum": 52.473,
"eval_runtime": 7.8512,
"eval_samples_per_second": 12.737,
"eval_steps_per_second": 1.656,
"step": 580
},
{
"epoch": 11.8,
"learning_rate": 1.652e-05,
"loss": 0.064,
"step": 590
},
{
"epoch": 11.8,
"eval_gen_len": 19.9,
"eval_loss": 0.5569304823875427,
"eval_rouge1": 53.7903,
"eval_rouge2": 48.7776,
"eval_rougeL": 50.6488,
"eval_rougeLsum": 52.8657,
"eval_runtime": 6.4395,
"eval_samples_per_second": 15.529,
"eval_steps_per_second": 2.019,
"step": 590
},
{
"epoch": 12.0,
"learning_rate": 1.612e-05,
"loss": 0.0511,
"step": 600
},
{
"epoch": 12.0,
"eval_gen_len": 19.93,
"eval_loss": 0.5667613744735718,
"eval_rouge1": 53.2515,
"eval_rouge2": 48.205,
"eval_rougeL": 49.9425,
"eval_rougeLsum": 52.302,
"eval_runtime": 8.1618,
"eval_samples_per_second": 12.252,
"eval_steps_per_second": 1.593,
"step": 600
},
{
"epoch": 12.2,
"learning_rate": 1.5720000000000002e-05,
"loss": 0.0327,
"step": 610
},
{
"epoch": 12.2,
"eval_gen_len": 19.96,
"eval_loss": 0.6061870455741882,
"eval_rouge1": 52.9566,
"eval_rouge2": 47.573,
"eval_rougeL": 49.5866,
"eval_rougeLsum": 51.8936,
"eval_runtime": 7.6757,
"eval_samples_per_second": 13.028,
"eval_steps_per_second": 1.694,
"step": 610
},
{
"epoch": 12.4,
"learning_rate": 1.5320000000000002e-05,
"loss": 0.066,
"step": 620
},
{
"epoch": 12.4,
"eval_gen_len": 19.96,
"eval_loss": 0.5923030376434326,
"eval_rouge1": 52.8622,
"eval_rouge2": 47.4706,
"eval_rougeL": 49.6314,
"eval_rougeLsum": 51.8334,
"eval_runtime": 6.8801,
"eval_samples_per_second": 14.535,
"eval_steps_per_second": 1.89,
"step": 620
},
{
"epoch": 12.6,
"learning_rate": 1.4920000000000001e-05,
"loss": 0.0508,
"step": 630
},
{
"epoch": 12.6,
"eval_gen_len": 19.93,
"eval_loss": 0.5727255344390869,
"eval_rouge1": 53.0607,
"eval_rouge2": 47.7413,
"eval_rougeL": 50.1959,
"eval_rougeLsum": 52.1585,
"eval_runtime": 8.4487,
"eval_samples_per_second": 11.836,
"eval_steps_per_second": 1.539,
"step": 630
},
{
"epoch": 12.8,
"learning_rate": 1.4520000000000002e-05,
"loss": 0.0551,
"step": 640
},
{
"epoch": 12.8,
"eval_gen_len": 19.93,
"eval_loss": 0.5799768567085266,
"eval_rouge1": 52.551,
"eval_rouge2": 47.2043,
"eval_rougeL": 49.6789,
"eval_rougeLsum": 51.5886,
"eval_runtime": 6.6599,
"eval_samples_per_second": 15.015,
"eval_steps_per_second": 1.952,
"step": 640
},
{
"epoch": 13.0,
"learning_rate": 1.412e-05,
"loss": 0.0393,
"step": 650
},
{
"epoch": 13.0,
"eval_gen_len": 19.89,
"eval_loss": 0.5942478775978088,
"eval_rouge1": 52.5056,
"eval_rouge2": 47.2568,
"eval_rougeL": 49.8142,
"eval_rougeLsum": 51.6736,
"eval_runtime": 8.553,
"eval_samples_per_second": 11.692,
"eval_steps_per_second": 1.52,
"step": 650
},
{
"epoch": 13.2,
"learning_rate": 1.3720000000000002e-05,
"loss": 0.0472,
"step": 660
},
{
"epoch": 13.2,
"eval_gen_len": 19.84,
"eval_loss": 0.5964275598526001,
"eval_rouge1": 53.9299,
"eval_rouge2": 49.2649,
"eval_rougeL": 51.1705,
"eval_rougeLsum": 53.3046,
"eval_runtime": 7.2167,
"eval_samples_per_second": 13.857,
"eval_steps_per_second": 1.801,
"step": 660
},
{
"epoch": 13.4,
"learning_rate": 1.3320000000000001e-05,
"loss": 0.0484,
"step": 670
},
{
"epoch": 13.4,
"eval_gen_len": 19.93,
"eval_loss": 0.6021795868873596,
"eval_rouge1": 52.7107,
"eval_rouge2": 47.5152,
"eval_rougeL": 50.4474,
"eval_rougeLsum": 51.8891,
"eval_runtime": 7.6455,
"eval_samples_per_second": 13.08,
"eval_steps_per_second": 1.7,
"step": 670
},
{
"epoch": 13.6,
"learning_rate": 1.2920000000000002e-05,
"loss": 0.0493,
"step": 680
},
{
"epoch": 13.6,
"eval_gen_len": 19.93,
"eval_loss": 0.5896037220954895,
"eval_rouge1": 52.581,
"eval_rouge2": 47.2503,
"eval_rougeL": 49.8352,
"eval_rougeLsum": 51.7529,
"eval_runtime": 7.7221,
"eval_samples_per_second": 12.95,
"eval_steps_per_second": 1.683,
"step": 680
},
{
"epoch": 13.8,
"learning_rate": 1.252e-05,
"loss": 0.0373,
"step": 690
},
{
"epoch": 13.8,
"eval_gen_len": 19.9,
"eval_loss": 0.5958464741706848,
"eval_rouge1": 53.7481,
"eval_rouge2": 48.8915,
"eval_rougeL": 50.9133,
"eval_rougeLsum": 52.8805,
"eval_runtime": 7.027,
"eval_samples_per_second": 14.231,
"eval_steps_per_second": 1.85,
"step": 690
},
{
"epoch": 14.0,
"learning_rate": 1.2120000000000001e-05,
"loss": 0.0439,
"step": 700
},
{
"epoch": 14.0,
"eval_gen_len": 19.95,
"eval_loss": 0.5903494954109192,
"eval_rouge1": 52.5592,
"eval_rouge2": 47.4923,
"eval_rougeL": 50.2025,
"eval_rougeLsum": 51.7545,
"eval_runtime": 8.4747,
"eval_samples_per_second": 11.8,
"eval_steps_per_second": 1.534,
"step": 700
},
{
"epoch": 14.2,
"learning_rate": 1.172e-05,
"loss": 0.0495,
"step": 710
},
{
"epoch": 14.2,
"eval_gen_len": 19.96,
"eval_loss": 0.6074602603912354,
"eval_rouge1": 53.1459,
"eval_rouge2": 48.2296,
"eval_rougeL": 50.8331,
"eval_rougeLsum": 52.3047,
"eval_runtime": 6.6639,
"eval_samples_per_second": 15.006,
"eval_steps_per_second": 1.951,
"step": 710
},
{
"epoch": 14.4,
"learning_rate": 1.132e-05,
"loss": 0.0488,
"step": 720
},
{
"epoch": 14.4,
"eval_gen_len": 19.96,
"eval_loss": 0.6027532815933228,
"eval_rouge1": 53.1743,
"eval_rouge2": 48.1621,
"eval_rougeL": 50.5616,
"eval_rougeLsum": 52.294,
"eval_runtime": 8.39,
"eval_samples_per_second": 11.919,
"eval_steps_per_second": 1.549,
"step": 720
},
{
"epoch": 14.6,
"learning_rate": 1.0920000000000002e-05,
"loss": 0.0428,
"step": 730
},
{
"epoch": 14.6,
"eval_gen_len": 19.92,
"eval_loss": 0.6251101493835449,
"eval_rouge1": 53.6698,
"eval_rouge2": 48.9146,
"eval_rougeL": 51.3211,
"eval_rougeLsum": 52.8219,
"eval_runtime": 6.6435,
"eval_samples_per_second": 15.052,
"eval_steps_per_second": 1.957,
"step": 730
},
{
"epoch": 14.8,
"learning_rate": 1.0520000000000001e-05,
"loss": 0.0332,
"step": 740
},
{
"epoch": 14.8,
"eval_gen_len": 19.88,
"eval_loss": 0.5891709327697754,
"eval_rouge1": 53.3632,
"eval_rouge2": 48.281,
"eval_rougeL": 50.8155,
"eval_rougeLsum": 52.441,
"eval_runtime": 7.9324,
"eval_samples_per_second": 12.606,
"eval_steps_per_second": 1.639,
"step": 740
},
{
"epoch": 15.0,
"learning_rate": 1.0120000000000001e-05,
"loss": 0.0443,
"step": 750
},
{
"epoch": 15.0,
"eval_gen_len": 19.92,
"eval_loss": 0.6117092370986938,
"eval_rouge1": 53.3405,
"eval_rouge2": 48.3071,
"eval_rougeL": 50.4905,
"eval_rougeLsum": 52.5336,
"eval_runtime": 7.4624,
"eval_samples_per_second": 13.401,
"eval_steps_per_second": 1.742,
"step": 750
},
{
"epoch": 15.2,
"learning_rate": 9.72e-06,
"loss": 0.0533,
"step": 760
},
{
"epoch": 15.2,
"eval_gen_len": 19.88,
"eval_loss": 0.596733033657074,
"eval_rouge1": 53.2498,
"eval_rouge2": 48.2583,
"eval_rougeL": 50.5271,
"eval_rougeLsum": 52.4317,
"eval_runtime": 7.2087,
"eval_samples_per_second": 13.872,
"eval_steps_per_second": 1.803,
"step": 760
},
{
"epoch": 15.4,
"learning_rate": 9.32e-06,
"loss": 0.0312,
"step": 770
},
{
"epoch": 15.4,
"eval_gen_len": 19.93,
"eval_loss": 0.611742377281189,
"eval_rouge1": 52.9445,
"eval_rouge2": 47.6968,
"eval_rougeL": 50.0402,
"eval_rougeLsum": 52.053,
"eval_runtime": 8.3029,
"eval_samples_per_second": 12.044,
"eval_steps_per_second": 1.566,
"step": 770
},
{
"epoch": 15.6,
"learning_rate": 8.920000000000001e-06,
"loss": 0.0363,
"step": 780
},
{
"epoch": 15.6,
"eval_gen_len": 19.93,
"eval_loss": 0.6265898942947388,
"eval_rouge1": 52.6826,
"eval_rouge2": 47.3318,
"eval_rougeL": 50.0565,
"eval_rougeLsum": 51.7117,
"eval_runtime": 6.4527,
"eval_samples_per_second": 15.497,
"eval_steps_per_second": 2.015,
"step": 780
},
{
"epoch": 15.8,
"learning_rate": 8.52e-06,
"loss": 0.035,
"step": 790
},
{
"epoch": 15.8,
"eval_gen_len": 19.93,
"eval_loss": 0.6089562773704529,
"eval_rouge1": 52.7035,
"eval_rouge2": 47.3733,
"eval_rougeL": 49.8679,
"eval_rougeLsum": 51.8499,
"eval_runtime": 8.2045,
"eval_samples_per_second": 12.188,
"eval_steps_per_second": 1.584,
"step": 790
},
{
"epoch": 16.0,
"learning_rate": 8.120000000000002e-06,
"loss": 0.0405,
"step": 800
},
{
"epoch": 16.0,
"eval_gen_len": 19.93,
"eval_loss": 0.6099843382835388,
"eval_rouge1": 52.3781,
"eval_rouge2": 46.9794,
"eval_rougeL": 49.4781,
"eval_rougeLsum": 51.4939,
"eval_runtime": 6.4126,
"eval_samples_per_second": 15.594,
"eval_steps_per_second": 2.027,
"step": 800
},
{
"epoch": 16.2,
"learning_rate": 7.72e-06,
"loss": 0.0378,
"step": 810
},
{
"epoch": 16.2,
"eval_gen_len": 19.95,
"eval_loss": 0.610033392906189,
"eval_rouge1": 53.2325,
"eval_rouge2": 48.2416,
"eval_rougeL": 49.8123,
"eval_rougeLsum": 52.4172,
"eval_runtime": 7.6476,
"eval_samples_per_second": 13.076,
"eval_steps_per_second": 1.7,
"step": 810
},
{
"epoch": 16.4,
"learning_rate": 7.32e-06,
"loss": 0.0322,
"step": 820
},
{
"epoch": 16.4,
"eval_gen_len": 19.95,
"eval_loss": 0.6167323589324951,
"eval_rouge1": 53.1487,
"eval_rouge2": 48.2712,
"eval_rougeL": 49.9134,
"eval_rougeLsum": 52.4232,
"eval_runtime": 6.8379,
"eval_samples_per_second": 14.624,
"eval_steps_per_second": 1.901,
"step": 820
},
{
"epoch": 16.6,
"learning_rate": 6.92e-06,
"loss": 0.0274,
"step": 830
},
{
"epoch": 16.6,
"eval_gen_len": 19.93,
"eval_loss": 0.6255094408988953,
"eval_rouge1": 53.1403,
"eval_rouge2": 48.0661,
"eval_rougeL": 50.3561,
"eval_rougeLsum": 52.2934,
"eval_runtime": 8.4294,
"eval_samples_per_second": 11.863,
"eval_steps_per_second": 1.542,
"step": 830
},
{
"epoch": 16.8,
"learning_rate": 6.520000000000001e-06,
"loss": 0.0277,
"step": 840
},
{
"epoch": 16.8,
"eval_gen_len": 19.93,
"eval_loss": 0.6320467591285706,
"eval_rouge1": 53.1403,
"eval_rouge2": 48.0661,
"eval_rougeL": 50.3561,
"eval_rougeLsum": 52.2934,
"eval_runtime": 6.6968,
"eval_samples_per_second": 14.933,
"eval_steps_per_second": 1.941,
"step": 840
},
{
"epoch": 17.0,
"learning_rate": 6.120000000000001e-06,
"loss": 0.0302,
"step": 850
},
{
"epoch": 17.0,
"eval_gen_len": 19.93,
"eval_loss": 0.6346279978752136,
"eval_rouge1": 53.0019,
"eval_rouge2": 47.8758,
"eval_rougeL": 50.1767,
"eval_rougeLsum": 52.0888,
"eval_runtime": 8.3668,
"eval_samples_per_second": 11.952,
"eval_steps_per_second": 1.554,
"step": 850
},
{
"epoch": 17.2,
"learning_rate": 5.72e-06,
"loss": 0.0344,
"step": 860
},
{
"epoch": 17.2,
"eval_gen_len": 19.91,
"eval_loss": 0.6346395015716553,
"eval_rouge1": 53.0135,
"eval_rouge2": 48.0249,
"eval_rougeL": 49.977,
"eval_rougeLsum": 52.1297,
"eval_runtime": 6.9398,
"eval_samples_per_second": 14.41,
"eval_steps_per_second": 1.873,
"step": 860
},
{
"epoch": 17.4,
"learning_rate": 5.320000000000001e-06,
"loss": 0.0331,
"step": 870
},
{
"epoch": 17.4,
"eval_gen_len": 19.93,
"eval_loss": 0.6337741613388062,
"eval_rouge1": 53.2181,
"eval_rouge2": 48.1723,
"eval_rougeL": 50.4487,
"eval_rougeLsum": 52.4175,
"eval_runtime": 7.6152,
"eval_samples_per_second": 13.132,
"eval_steps_per_second": 1.707,
"step": 870
},
{
"epoch": 17.6,
"learning_rate": 4.92e-06,
"loss": 0.0344,
"step": 880
},
{
"epoch": 17.6,
"eval_gen_len": 19.93,
"eval_loss": 0.6169251799583435,
"eval_rouge1": 53.2726,
"eval_rouge2": 48.2238,
"eval_rougeL": 50.5094,
"eval_rougeLsum": 52.4617,
"eval_runtime": 7.681,
"eval_samples_per_second": 13.019,
"eval_steps_per_second": 1.692,
"step": 880
},
{
"epoch": 17.8,
"learning_rate": 4.520000000000001e-06,
"loss": 0.04,
"step": 890
},
{
"epoch": 17.8,
"eval_gen_len": 19.89,
"eval_loss": 0.6145161986351013,
"eval_rouge1": 52.9438,
"eval_rouge2": 47.9296,
"eval_rougeL": 50.2202,
"eval_rougeLsum": 52.1485,
"eval_runtime": 6.7635,
"eval_samples_per_second": 14.785,
"eval_steps_per_second": 1.922,
"step": 890
},
{
"epoch": 18.0,
"learning_rate": 4.12e-06,
"loss": 0.0291,
"step": 900
},
{
"epoch": 18.0,
"eval_gen_len": 19.91,
"eval_loss": 0.6197648048400879,
"eval_rouge1": 52.9654,
"eval_rouge2": 47.9572,
"eval_rougeL": 50.2296,
"eval_rougeLsum": 52.2057,
"eval_runtime": 8.1949,
"eval_samples_per_second": 12.203,
"eval_steps_per_second": 1.586,
"step": 900
},
{
"epoch": 18.2,
"learning_rate": 3.7200000000000004e-06,
"loss": 0.0258,
"step": 910
},
{
"epoch": 18.2,
"eval_gen_len": 19.95,
"eval_loss": 0.626217782497406,
"eval_rouge1": 52.9077,
"eval_rouge2": 47.7606,
"eval_rougeL": 50.1641,
"eval_rougeLsum": 52.126,
"eval_runtime": 6.3753,
"eval_samples_per_second": 15.686,
"eval_steps_per_second": 2.039,
"step": 910
},
{
"epoch": 18.4,
"learning_rate": 3.3200000000000004e-06,
"loss": 0.0374,
"step": 920
},
{
"epoch": 18.4,
"eval_gen_len": 19.93,
"eval_loss": 0.6214368343353271,
"eval_rouge1": 52.9374,
"eval_rouge2": 47.9047,
"eval_rougeL": 50.5323,
"eval_rougeLsum": 52.2003,
"eval_runtime": 7.6867,
"eval_samples_per_second": 13.01,
"eval_steps_per_second": 1.691,
"step": 920
},
{
"epoch": 18.6,
"learning_rate": 2.92e-06,
"loss": 0.0236,
"step": 930
},
{
"epoch": 18.6,
"eval_gen_len": 19.93,
"eval_loss": 0.622434139251709,
"eval_rouge1": 52.9374,
"eval_rouge2": 47.9308,
"eval_rougeL": 50.2522,
"eval_rougeLsum": 52.2003,
"eval_runtime": 7.0784,
"eval_samples_per_second": 14.128,
"eval_steps_per_second": 1.837,
"step": 930
},
{
"epoch": 18.8,
"learning_rate": 2.52e-06,
"loss": 0.0161,
"step": 940
},
{
"epoch": 18.8,
"eval_gen_len": 19.93,
"eval_loss": 0.6140244603157043,
"eval_rouge1": 52.9019,
"eval_rouge2": 47.9184,
"eval_rougeL": 50.4912,
"eval_rougeLsum": 52.1339,
"eval_runtime": 7.1373,
"eval_samples_per_second": 14.011,
"eval_steps_per_second": 1.821,
"step": 940
},
{
"epoch": 19.0,
"learning_rate": 2.12e-06,
"loss": 0.0411,
"step": 950
},
{
"epoch": 19.0,
"eval_gen_len": 19.95,
"eval_loss": 0.6175794005393982,
"eval_rouge1": 53.0345,
"eval_rouge2": 48.0268,
"eval_rougeL": 50.3072,
"eval_rougeLsum": 52.2337,
"eval_runtime": 7.454,
"eval_samples_per_second": 13.416,
"eval_steps_per_second": 1.744,
"step": 950
},
{
"epoch": 19.2,
"learning_rate": 1.72e-06,
"loss": 0.0255,
"step": 960
},
{
"epoch": 19.2,
"eval_gen_len": 19.95,
"eval_loss": 0.6201534271240234,
"eval_rouge1": 53.0189,
"eval_rouge2": 47.8858,
"eval_rougeL": 50.2223,
"eval_rougeLsum": 52.1154,
"eval_runtime": 6.4527,
"eval_samples_per_second": 15.497,
"eval_steps_per_second": 2.015,
"step": 960
},
{
"epoch": 19.4,
"learning_rate": 1.32e-06,
"loss": 0.024,
"step": 970
},
{
"epoch": 19.4,
"eval_gen_len": 19.95,
"eval_loss": 0.6199787855148315,
"eval_rouge1": 53.0189,
"eval_rouge2": 47.8858,
"eval_rougeL": 50.2223,
"eval_rougeLsum": 52.1154,
"eval_runtime": 8.1992,
"eval_samples_per_second": 12.196,
"eval_steps_per_second": 1.586,
"step": 970
},
{
"epoch": 19.6,
"learning_rate": 9.200000000000001e-07,
"loss": 0.0295,
"step": 980
},
{
"epoch": 19.6,
"eval_gen_len": 19.95,
"eval_loss": 0.618171215057373,
"eval_rouge1": 53.0189,
"eval_rouge2": 47.8858,
"eval_rougeL": 50.2223,
"eval_rougeLsum": 52.1154,
"eval_runtime": 6.3503,
"eval_samples_per_second": 15.747,
"eval_steps_per_second": 2.047,
"step": 980
},
{
"epoch": 19.8,
"learning_rate": 5.2e-07,
"loss": 0.0129,
"step": 990
},
{
"epoch": 19.8,
"eval_gen_len": 19.95,
"eval_loss": 0.6194862723350525,
"eval_rouge1": 53.0273,
"eval_rouge2": 47.8919,
"eval_rougeL": 50.2299,
"eval_rougeLsum": 52.1227,
"eval_runtime": 8.0127,
"eval_samples_per_second": 12.48,
"eval_steps_per_second": 1.622,
"step": 990
},
{
"epoch": 20.0,
"learning_rate": 1.2000000000000002e-07,
"loss": 0.0332,
"step": 1000
},
{
"epoch": 20.0,
"eval_gen_len": 19.95,
"eval_loss": 0.6200586557388306,
"eval_rouge1": 52.8857,
"eval_rouge2": 47.6744,
"eval_rougeL": 50.037,
"eval_rougeLsum": 52.0023,
"eval_runtime": 6.4657,
"eval_samples_per_second": 15.466,
"eval_steps_per_second": 2.011,
"step": 1000
}
],
"max_steps": 1000,
"num_train_epochs": 20,
"total_flos": 644853467381760.0,
"trial_name": null,
"trial_params": null
}