mllm-dev's picture
Upload folder using huggingface_hub
4148993 verified
{
"best_metric": 6.074151039123535,
"best_model_checkpoint": "bill_sum_finetune_test_gpt2/checkpoint-528",
"epoch": 35.0,
"eval_steps": 500,
"global_step": 560,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_gen_len": 600.0,
"eval_loss": 6.843741416931152,
"eval_rouge1": 0.4053,
"eval_rouge2": 0.1708,
"eval_rougeL": 0.2228,
"eval_rougeLsum": 0.35,
"eval_runtime": 21.0375,
"eval_samples_per_second": 11.788,
"eval_steps_per_second": 0.19,
"step": 16
},
{
"epoch": 2.0,
"eval_gen_len": 600.0,
"eval_loss": 6.511104583740234,
"eval_rouge1": 0.3978,
"eval_rouge2": 0.1673,
"eval_rougeL": 0.2181,
"eval_rougeLsum": 0.3434,
"eval_runtime": 21.5368,
"eval_samples_per_second": 11.515,
"eval_steps_per_second": 0.186,
"step": 32
},
{
"epoch": 3.0,
"eval_gen_len": 600.0,
"eval_loss": 6.349050998687744,
"eval_rouge1": 0.3988,
"eval_rouge2": 0.1679,
"eval_rougeL": 0.2188,
"eval_rougeLsum": 0.3443,
"eval_runtime": 21.2101,
"eval_samples_per_second": 11.693,
"eval_steps_per_second": 0.189,
"step": 48
},
{
"epoch": 4.0,
"eval_gen_len": 600.0,
"eval_loss": 6.258257865905762,
"eval_rouge1": 0.3996,
"eval_rouge2": 0.1681,
"eval_rougeL": 0.2189,
"eval_rougeLsum": 0.345,
"eval_runtime": 21.3129,
"eval_samples_per_second": 11.636,
"eval_steps_per_second": 0.188,
"step": 64
},
{
"epoch": 5.0,
"eval_gen_len": 600.0,
"eval_loss": 6.200411796569824,
"eval_rouge1": 0.3986,
"eval_rouge2": 0.1677,
"eval_rougeL": 0.2184,
"eval_rougeLsum": 0.3439,
"eval_runtime": 21.6445,
"eval_samples_per_second": 11.458,
"eval_steps_per_second": 0.185,
"step": 80
},
{
"epoch": 6.0,
"eval_gen_len": 600.0,
"eval_loss": 6.170421600341797,
"eval_rouge1": 0.3981,
"eval_rouge2": 0.1674,
"eval_rougeL": 0.2178,
"eval_rougeLsum": 0.3432,
"eval_runtime": 21.242,
"eval_samples_per_second": 11.675,
"eval_steps_per_second": 0.188,
"step": 96
},
{
"epoch": 7.0,
"eval_gen_len": 600.0,
"eval_loss": 6.150304317474365,
"eval_rouge1": 0.3976,
"eval_rouge2": 0.1672,
"eval_rougeL": 0.2176,
"eval_rougeLsum": 0.3428,
"eval_runtime": 21.3562,
"eval_samples_per_second": 11.613,
"eval_steps_per_second": 0.187,
"step": 112
},
{
"epoch": 8.0,
"eval_gen_len": 600.0,
"eval_loss": 6.135751724243164,
"eval_rouge1": 0.3977,
"eval_rouge2": 0.1672,
"eval_rougeL": 0.2175,
"eval_rougeLsum": 0.3427,
"eval_runtime": 21.5836,
"eval_samples_per_second": 11.49,
"eval_steps_per_second": 0.185,
"step": 128
},
{
"epoch": 9.0,
"eval_gen_len": 600.0,
"eval_loss": 6.122563362121582,
"eval_rouge1": 0.3977,
"eval_rouge2": 0.1671,
"eval_rougeL": 0.2171,
"eval_rougeLsum": 0.3425,
"eval_runtime": 21.5829,
"eval_samples_per_second": 11.491,
"eval_steps_per_second": 0.185,
"step": 144
},
{
"epoch": 10.0,
"eval_gen_len": 600.0,
"eval_loss": 6.114274501800537,
"eval_rouge1": 0.397,
"eval_rouge2": 0.1669,
"eval_rougeL": 0.2174,
"eval_rougeLsum": 0.3427,
"eval_runtime": 21.508,
"eval_samples_per_second": 11.531,
"eval_steps_per_second": 0.186,
"step": 160
},
{
"epoch": 11.0,
"eval_gen_len": 600.0,
"eval_loss": 6.108905792236328,
"eval_rouge1": 0.3973,
"eval_rouge2": 0.167,
"eval_rougeL": 0.2173,
"eval_rougeLsum": 0.3427,
"eval_runtime": 21.2386,
"eval_samples_per_second": 11.677,
"eval_steps_per_second": 0.188,
"step": 176
},
{
"epoch": 12.0,
"eval_gen_len": 600.0,
"eval_loss": 6.107725620269775,
"eval_rouge1": 0.3974,
"eval_rouge2": 0.167,
"eval_rougeL": 0.2173,
"eval_rougeLsum": 0.3426,
"eval_runtime": 21.6952,
"eval_samples_per_second": 11.431,
"eval_steps_per_second": 0.184,
"step": 192
},
{
"epoch": 13.0,
"eval_gen_len": 600.0,
"eval_loss": 6.099628448486328,
"eval_rouge1": 0.3976,
"eval_rouge2": 0.167,
"eval_rougeL": 0.2172,
"eval_rougeLsum": 0.3428,
"eval_runtime": 21.1438,
"eval_samples_per_second": 11.729,
"eval_steps_per_second": 0.189,
"step": 208
},
{
"epoch": 14.0,
"eval_gen_len": 600.0,
"eval_loss": 6.096395492553711,
"eval_rouge1": 0.3975,
"eval_rouge2": 0.167,
"eval_rougeL": 0.2171,
"eval_rougeLsum": 0.3426,
"eval_runtime": 21.6504,
"eval_samples_per_second": 11.455,
"eval_steps_per_second": 0.185,
"step": 224
},
{
"epoch": 15.0,
"eval_gen_len": 600.0,
"eval_loss": 6.0916852951049805,
"eval_rouge1": 0.3979,
"eval_rouge2": 0.167,
"eval_rougeL": 0.2168,
"eval_rougeLsum": 0.3427,
"eval_runtime": 21.4782,
"eval_samples_per_second": 11.547,
"eval_steps_per_second": 0.186,
"step": 240
},
{
"epoch": 16.0,
"eval_gen_len": 600.0,
"eval_loss": 6.090492248535156,
"eval_rouge1": 0.3977,
"eval_rouge2": 0.1672,
"eval_rougeL": 0.2173,
"eval_rougeLsum": 0.3428,
"eval_runtime": 21.7128,
"eval_samples_per_second": 11.422,
"eval_steps_per_second": 0.184,
"step": 256
},
{
"epoch": 17.0,
"eval_gen_len": 600.0,
"eval_loss": 6.091054916381836,
"eval_rouge1": 0.399,
"eval_rouge2": 0.168,
"eval_rougeL": 0.2176,
"eval_rougeLsum": 0.3436,
"eval_runtime": 21.2583,
"eval_samples_per_second": 11.666,
"eval_steps_per_second": 0.188,
"step": 272
},
{
"epoch": 18.0,
"eval_gen_len": 600.0,
"eval_loss": 6.0864386558532715,
"eval_rouge1": 0.3985,
"eval_rouge2": 0.1675,
"eval_rougeL": 0.2172,
"eval_rougeLsum": 0.3431,
"eval_runtime": 21.4489,
"eval_samples_per_second": 11.562,
"eval_steps_per_second": 0.186,
"step": 288
},
{
"epoch": 19.0,
"eval_gen_len": 600.0,
"eval_loss": 6.082566261291504,
"eval_rouge1": 0.4004,
"eval_rouge2": 0.1686,
"eval_rougeL": 0.2186,
"eval_rougeLsum": 0.3451,
"eval_runtime": 21.4779,
"eval_samples_per_second": 11.547,
"eval_steps_per_second": 0.186,
"step": 304
},
{
"epoch": 20.0,
"eval_gen_len": 600.0,
"eval_loss": 6.0813798904418945,
"eval_rouge1": 0.4009,
"eval_rouge2": 0.1689,
"eval_rougeL": 0.2189,
"eval_rougeLsum": 0.3454,
"eval_runtime": 21.5568,
"eval_samples_per_second": 11.504,
"eval_steps_per_second": 0.186,
"step": 320
},
{
"epoch": 21.0,
"eval_gen_len": 600.0,
"eval_loss": 6.082016944885254,
"eval_rouge1": 0.3999,
"eval_rouge2": 0.1682,
"eval_rougeL": 0.218,
"eval_rougeLsum": 0.3444,
"eval_runtime": 21.5727,
"eval_samples_per_second": 11.496,
"eval_steps_per_second": 0.185,
"step": 336
},
{
"epoch": 22.0,
"eval_gen_len": 600.0,
"eval_loss": 6.082878589630127,
"eval_rouge1": 0.4076,
"eval_rouge2": 0.1718,
"eval_rougeL": 0.2222,
"eval_rougeLsum": 0.3508,
"eval_runtime": 20.8434,
"eval_samples_per_second": 11.898,
"eval_steps_per_second": 0.192,
"step": 352
},
{
"epoch": 23.0,
"eval_gen_len": 600.0,
"eval_loss": 6.080228805541992,
"eval_rouge1": 0.405,
"eval_rouge2": 0.1705,
"eval_rougeL": 0.221,
"eval_rougeLsum": 0.3488,
"eval_runtime": 21.1916,
"eval_samples_per_second": 11.703,
"eval_steps_per_second": 0.189,
"step": 368
},
{
"epoch": 24.0,
"eval_gen_len": 600.0,
"eval_loss": 6.07808780670166,
"eval_rouge1": 0.4052,
"eval_rouge2": 0.1709,
"eval_rougeL": 0.2212,
"eval_rougeLsum": 0.3491,
"eval_runtime": 21.3026,
"eval_samples_per_second": 11.642,
"eval_steps_per_second": 0.188,
"step": 384
},
{
"epoch": 25.0,
"eval_gen_len": 600.0,
"eval_loss": 6.077059268951416,
"eval_rouge1": 0.4064,
"eval_rouge2": 0.1711,
"eval_rougeL": 0.2216,
"eval_rougeLsum": 0.3498,
"eval_runtime": 20.9702,
"eval_samples_per_second": 11.826,
"eval_steps_per_second": 0.191,
"step": 400
},
{
"epoch": 26.0,
"eval_gen_len": 600.0,
"eval_loss": 6.075596809387207,
"eval_rouge1": 0.4086,
"eval_rouge2": 0.1723,
"eval_rougeL": 0.223,
"eval_rougeLsum": 0.3517,
"eval_runtime": 21.1984,
"eval_samples_per_second": 11.699,
"eval_steps_per_second": 0.189,
"step": 416
},
{
"epoch": 27.0,
"eval_gen_len": 600.0,
"eval_loss": 6.075705528259277,
"eval_rouge1": 0.4075,
"eval_rouge2": 0.1719,
"eval_rougeL": 0.2224,
"eval_rougeLsum": 0.3509,
"eval_runtime": 20.7964,
"eval_samples_per_second": 11.925,
"eval_steps_per_second": 0.192,
"step": 432
},
{
"epoch": 28.0,
"eval_gen_len": 600.0,
"eval_loss": 6.075275421142578,
"eval_rouge1": 0.4081,
"eval_rouge2": 0.1722,
"eval_rougeL": 0.2224,
"eval_rougeLsum": 0.3509,
"eval_runtime": 20.9972,
"eval_samples_per_second": 11.811,
"eval_steps_per_second": 0.191,
"step": 448
},
{
"epoch": 29.0,
"eval_gen_len": 600.0,
"eval_loss": 6.076692581176758,
"eval_rouge1": 0.4132,
"eval_rouge2": 0.1751,
"eval_rougeL": 0.2258,
"eval_rougeLsum": 0.3553,
"eval_runtime": 21.0313,
"eval_samples_per_second": 11.792,
"eval_steps_per_second": 0.19,
"step": 464
},
{
"epoch": 30.0,
"eval_gen_len": 600.0,
"eval_loss": 6.075990676879883,
"eval_rouge1": 0.4108,
"eval_rouge2": 0.1737,
"eval_rougeL": 0.2242,
"eval_rougeLsum": 0.3533,
"eval_runtime": 20.714,
"eval_samples_per_second": 11.973,
"eval_steps_per_second": 0.193,
"step": 480
},
{
"epoch": 31.0,
"eval_gen_len": 600.0,
"eval_loss": 6.074672222137451,
"eval_rouge1": 0.4126,
"eval_rouge2": 0.1747,
"eval_rougeL": 0.2253,
"eval_rougeLsum": 0.3546,
"eval_runtime": 21.1511,
"eval_samples_per_second": 11.725,
"eval_steps_per_second": 0.189,
"step": 496
},
{
"epoch": 31.25,
"grad_norm": 237350.25,
"learning_rate": 2.1428571428571427e-06,
"loss": 6.1153,
"step": 500
},
{
"epoch": 32.0,
"eval_gen_len": 600.0,
"eval_loss": 6.076193809509277,
"eval_rouge1": 0.4119,
"eval_rouge2": 0.1744,
"eval_rougeL": 0.2248,
"eval_rougeLsum": 0.3541,
"eval_runtime": 20.5412,
"eval_samples_per_second": 12.073,
"eval_steps_per_second": 0.195,
"step": 512
},
{
"epoch": 33.0,
"eval_gen_len": 600.0,
"eval_loss": 6.074151039123535,
"eval_rouge1": 0.4123,
"eval_rouge2": 0.1746,
"eval_rougeL": 0.2251,
"eval_rougeLsum": 0.3545,
"eval_runtime": 21.0056,
"eval_samples_per_second": 11.806,
"eval_steps_per_second": 0.19,
"step": 528
},
{
"epoch": 34.0,
"eval_gen_len": 600.0,
"eval_loss": 6.076315879821777,
"eval_rouge1": 0.4114,
"eval_rouge2": 0.1741,
"eval_rougeL": 0.2246,
"eval_rougeLsum": 0.3537,
"eval_runtime": 20.6937,
"eval_samples_per_second": 11.984,
"eval_steps_per_second": 0.193,
"step": 544
},
{
"epoch": 35.0,
"eval_gen_len": 600.0,
"eval_loss": 6.0760498046875,
"eval_rouge1": 0.4119,
"eval_rouge2": 0.1744,
"eval_rougeL": 0.2249,
"eval_rougeLsum": 0.3541,
"eval_runtime": 21.0084,
"eval_samples_per_second": 11.805,
"eval_steps_per_second": 0.19,
"step": 560
}
],
"logging_steps": 500,
"max_steps": 560,
"num_input_tokens_seen": 0,
"num_train_epochs": 35,
"save_steps": 500,
"total_flos": 9044623687680000.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}