| { | |
| "best_metric": 6.074151039123535, | |
| "best_model_checkpoint": "bill_sum_finetune_test_gpt2/checkpoint-528", | |
| "epoch": 33.0, | |
| "eval_steps": 500, | |
| "global_step": 528, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.843741416931152, | |
| "eval_rouge1": 0.4053, | |
| "eval_rouge2": 0.1708, | |
| "eval_rougeL": 0.2228, | |
| "eval_rougeLsum": 0.35, | |
| "eval_runtime": 21.0375, | |
| "eval_samples_per_second": 11.788, | |
| "eval_steps_per_second": 0.19, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.511104583740234, | |
| "eval_rouge1": 0.3978, | |
| "eval_rouge2": 0.1673, | |
| "eval_rougeL": 0.2181, | |
| "eval_rougeLsum": 0.3434, | |
| "eval_runtime": 21.5368, | |
| "eval_samples_per_second": 11.515, | |
| "eval_steps_per_second": 0.186, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.349050998687744, | |
| "eval_rouge1": 0.3988, | |
| "eval_rouge2": 0.1679, | |
| "eval_rougeL": 0.2188, | |
| "eval_rougeLsum": 0.3443, | |
| "eval_runtime": 21.2101, | |
| "eval_samples_per_second": 11.693, | |
| "eval_steps_per_second": 0.189, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.258257865905762, | |
| "eval_rouge1": 0.3996, | |
| "eval_rouge2": 0.1681, | |
| "eval_rougeL": 0.2189, | |
| "eval_rougeLsum": 0.345, | |
| "eval_runtime": 21.3129, | |
| "eval_samples_per_second": 11.636, | |
| "eval_steps_per_second": 0.188, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.200411796569824, | |
| "eval_rouge1": 0.3986, | |
| "eval_rouge2": 0.1677, | |
| "eval_rougeL": 0.2184, | |
| "eval_rougeLsum": 0.3439, | |
| "eval_runtime": 21.6445, | |
| "eval_samples_per_second": 11.458, | |
| "eval_steps_per_second": 0.185, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.170421600341797, | |
| "eval_rouge1": 0.3981, | |
| "eval_rouge2": 0.1674, | |
| "eval_rougeL": 0.2178, | |
| "eval_rougeLsum": 0.3432, | |
| "eval_runtime": 21.242, | |
| "eval_samples_per_second": 11.675, | |
| "eval_steps_per_second": 0.188, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.150304317474365, | |
| "eval_rouge1": 0.3976, | |
| "eval_rouge2": 0.1672, | |
| "eval_rougeL": 0.2176, | |
| "eval_rougeLsum": 0.3428, | |
| "eval_runtime": 21.3562, | |
| "eval_samples_per_second": 11.613, | |
| "eval_steps_per_second": 0.187, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.135751724243164, | |
| "eval_rouge1": 0.3977, | |
| "eval_rouge2": 0.1672, | |
| "eval_rougeL": 0.2175, | |
| "eval_rougeLsum": 0.3427, | |
| "eval_runtime": 21.5836, | |
| "eval_samples_per_second": 11.49, | |
| "eval_steps_per_second": 0.185, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.122563362121582, | |
| "eval_rouge1": 0.3977, | |
| "eval_rouge2": 0.1671, | |
| "eval_rougeL": 0.2171, | |
| "eval_rougeLsum": 0.3425, | |
| "eval_runtime": 21.5829, | |
| "eval_samples_per_second": 11.491, | |
| "eval_steps_per_second": 0.185, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.114274501800537, | |
| "eval_rouge1": 0.397, | |
| "eval_rouge2": 0.1669, | |
| "eval_rougeL": 0.2174, | |
| "eval_rougeLsum": 0.3427, | |
| "eval_runtime": 21.508, | |
| "eval_samples_per_second": 11.531, | |
| "eval_steps_per_second": 0.186, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.108905792236328, | |
| "eval_rouge1": 0.3973, | |
| "eval_rouge2": 0.167, | |
| "eval_rougeL": 0.2173, | |
| "eval_rougeLsum": 0.3427, | |
| "eval_runtime": 21.2386, | |
| "eval_samples_per_second": 11.677, | |
| "eval_steps_per_second": 0.188, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.107725620269775, | |
| "eval_rouge1": 0.3974, | |
| "eval_rouge2": 0.167, | |
| "eval_rougeL": 0.2173, | |
| "eval_rougeLsum": 0.3426, | |
| "eval_runtime": 21.6952, | |
| "eval_samples_per_second": 11.431, | |
| "eval_steps_per_second": 0.184, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.099628448486328, | |
| "eval_rouge1": 0.3976, | |
| "eval_rouge2": 0.167, | |
| "eval_rougeL": 0.2172, | |
| "eval_rougeLsum": 0.3428, | |
| "eval_runtime": 21.1438, | |
| "eval_samples_per_second": 11.729, | |
| "eval_steps_per_second": 0.189, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.096395492553711, | |
| "eval_rouge1": 0.3975, | |
| "eval_rouge2": 0.167, | |
| "eval_rougeL": 0.2171, | |
| "eval_rougeLsum": 0.3426, | |
| "eval_runtime": 21.6504, | |
| "eval_samples_per_second": 11.455, | |
| "eval_steps_per_second": 0.185, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.0916852951049805, | |
| "eval_rouge1": 0.3979, | |
| "eval_rouge2": 0.167, | |
| "eval_rougeL": 0.2168, | |
| "eval_rougeLsum": 0.3427, | |
| "eval_runtime": 21.4782, | |
| "eval_samples_per_second": 11.547, | |
| "eval_steps_per_second": 0.186, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.090492248535156, | |
| "eval_rouge1": 0.3977, | |
| "eval_rouge2": 0.1672, | |
| "eval_rougeL": 0.2173, | |
| "eval_rougeLsum": 0.3428, | |
| "eval_runtime": 21.7128, | |
| "eval_samples_per_second": 11.422, | |
| "eval_steps_per_second": 0.184, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.091054916381836, | |
| "eval_rouge1": 0.399, | |
| "eval_rouge2": 0.168, | |
| "eval_rougeL": 0.2176, | |
| "eval_rougeLsum": 0.3436, | |
| "eval_runtime": 21.2583, | |
| "eval_samples_per_second": 11.666, | |
| "eval_steps_per_second": 0.188, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.0864386558532715, | |
| "eval_rouge1": 0.3985, | |
| "eval_rouge2": 0.1675, | |
| "eval_rougeL": 0.2172, | |
| "eval_rougeLsum": 0.3431, | |
| "eval_runtime": 21.4489, | |
| "eval_samples_per_second": 11.562, | |
| "eval_steps_per_second": 0.186, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.082566261291504, | |
| "eval_rouge1": 0.4004, | |
| "eval_rouge2": 0.1686, | |
| "eval_rougeL": 0.2186, | |
| "eval_rougeLsum": 0.3451, | |
| "eval_runtime": 21.4779, | |
| "eval_samples_per_second": 11.547, | |
| "eval_steps_per_second": 0.186, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.0813798904418945, | |
| "eval_rouge1": 0.4009, | |
| "eval_rouge2": 0.1689, | |
| "eval_rougeL": 0.2189, | |
| "eval_rougeLsum": 0.3454, | |
| "eval_runtime": 21.5568, | |
| "eval_samples_per_second": 11.504, | |
| "eval_steps_per_second": 0.186, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.082016944885254, | |
| "eval_rouge1": 0.3999, | |
| "eval_rouge2": 0.1682, | |
| "eval_rougeL": 0.218, | |
| "eval_rougeLsum": 0.3444, | |
| "eval_runtime": 21.5727, | |
| "eval_samples_per_second": 11.496, | |
| "eval_steps_per_second": 0.185, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.082878589630127, | |
| "eval_rouge1": 0.4076, | |
| "eval_rouge2": 0.1718, | |
| "eval_rougeL": 0.2222, | |
| "eval_rougeLsum": 0.3508, | |
| "eval_runtime": 20.8434, | |
| "eval_samples_per_second": 11.898, | |
| "eval_steps_per_second": 0.192, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.080228805541992, | |
| "eval_rouge1": 0.405, | |
| "eval_rouge2": 0.1705, | |
| "eval_rougeL": 0.221, | |
| "eval_rougeLsum": 0.3488, | |
| "eval_runtime": 21.1916, | |
| "eval_samples_per_second": 11.703, | |
| "eval_steps_per_second": 0.189, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.07808780670166, | |
| "eval_rouge1": 0.4052, | |
| "eval_rouge2": 0.1709, | |
| "eval_rougeL": 0.2212, | |
| "eval_rougeLsum": 0.3491, | |
| "eval_runtime": 21.3026, | |
| "eval_samples_per_second": 11.642, | |
| "eval_steps_per_second": 0.188, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.077059268951416, | |
| "eval_rouge1": 0.4064, | |
| "eval_rouge2": 0.1711, | |
| "eval_rougeL": 0.2216, | |
| "eval_rougeLsum": 0.3498, | |
| "eval_runtime": 20.9702, | |
| "eval_samples_per_second": 11.826, | |
| "eval_steps_per_second": 0.191, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.075596809387207, | |
| "eval_rouge1": 0.4086, | |
| "eval_rouge2": 0.1723, | |
| "eval_rougeL": 0.223, | |
| "eval_rougeLsum": 0.3517, | |
| "eval_runtime": 21.1984, | |
| "eval_samples_per_second": 11.699, | |
| "eval_steps_per_second": 0.189, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.075705528259277, | |
| "eval_rouge1": 0.4075, | |
| "eval_rouge2": 0.1719, | |
| "eval_rougeL": 0.2224, | |
| "eval_rougeLsum": 0.3509, | |
| "eval_runtime": 20.7964, | |
| "eval_samples_per_second": 11.925, | |
| "eval_steps_per_second": 0.192, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.075275421142578, | |
| "eval_rouge1": 0.4081, | |
| "eval_rouge2": 0.1722, | |
| "eval_rougeL": 0.2224, | |
| "eval_rougeLsum": 0.3509, | |
| "eval_runtime": 20.9972, | |
| "eval_samples_per_second": 11.811, | |
| "eval_steps_per_second": 0.191, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.076692581176758, | |
| "eval_rouge1": 0.4132, | |
| "eval_rouge2": 0.1751, | |
| "eval_rougeL": 0.2258, | |
| "eval_rougeLsum": 0.3553, | |
| "eval_runtime": 21.0313, | |
| "eval_samples_per_second": 11.792, | |
| "eval_steps_per_second": 0.19, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.075990676879883, | |
| "eval_rouge1": 0.4108, | |
| "eval_rouge2": 0.1737, | |
| "eval_rougeL": 0.2242, | |
| "eval_rougeLsum": 0.3533, | |
| "eval_runtime": 20.714, | |
| "eval_samples_per_second": 11.973, | |
| "eval_steps_per_second": 0.193, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.074672222137451, | |
| "eval_rouge1": 0.4126, | |
| "eval_rouge2": 0.1747, | |
| "eval_rougeL": 0.2253, | |
| "eval_rougeLsum": 0.3546, | |
| "eval_runtime": 21.1511, | |
| "eval_samples_per_second": 11.725, | |
| "eval_steps_per_second": 0.189, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "grad_norm": 237350.25, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 6.1153, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.076193809509277, | |
| "eval_rouge1": 0.4119, | |
| "eval_rouge2": 0.1744, | |
| "eval_rougeL": 0.2248, | |
| "eval_rougeLsum": 0.3541, | |
| "eval_runtime": 20.5412, | |
| "eval_samples_per_second": 12.073, | |
| "eval_steps_per_second": 0.195, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_gen_len": 600.0, | |
| "eval_loss": 6.074151039123535, | |
| "eval_rouge1": 0.4123, | |
| "eval_rouge2": 0.1746, | |
| "eval_rougeL": 0.2251, | |
| "eval_rougeLsum": 0.3545, | |
| "eval_runtime": 21.0056, | |
| "eval_samples_per_second": 11.806, | |
| "eval_steps_per_second": 0.19, | |
| "step": 528 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 560, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 35, | |
| "save_steps": 500, | |
| "total_flos": 8527788048384000.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |