{ "best_metric": 6.074151039123535, "best_model_checkpoint": "bill_sum_finetune_test_gpt2/checkpoint-528", "epoch": 33.0, "eval_steps": 500, "global_step": 528, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_gen_len": 600.0, "eval_loss": 6.843741416931152, "eval_rouge1": 0.4053, "eval_rouge2": 0.1708, "eval_rougeL": 0.2228, "eval_rougeLsum": 0.35, "eval_runtime": 21.0375, "eval_samples_per_second": 11.788, "eval_steps_per_second": 0.19, "step": 16 }, { "epoch": 2.0, "eval_gen_len": 600.0, "eval_loss": 6.511104583740234, "eval_rouge1": 0.3978, "eval_rouge2": 0.1673, "eval_rougeL": 0.2181, "eval_rougeLsum": 0.3434, "eval_runtime": 21.5368, "eval_samples_per_second": 11.515, "eval_steps_per_second": 0.186, "step": 32 }, { "epoch": 3.0, "eval_gen_len": 600.0, "eval_loss": 6.349050998687744, "eval_rouge1": 0.3988, "eval_rouge2": 0.1679, "eval_rougeL": 0.2188, "eval_rougeLsum": 0.3443, "eval_runtime": 21.2101, "eval_samples_per_second": 11.693, "eval_steps_per_second": 0.189, "step": 48 }, { "epoch": 4.0, "eval_gen_len": 600.0, "eval_loss": 6.258257865905762, "eval_rouge1": 0.3996, "eval_rouge2": 0.1681, "eval_rougeL": 0.2189, "eval_rougeLsum": 0.345, "eval_runtime": 21.3129, "eval_samples_per_second": 11.636, "eval_steps_per_second": 0.188, "step": 64 }, { "epoch": 5.0, "eval_gen_len": 600.0, "eval_loss": 6.200411796569824, "eval_rouge1": 0.3986, "eval_rouge2": 0.1677, "eval_rougeL": 0.2184, "eval_rougeLsum": 0.3439, "eval_runtime": 21.6445, "eval_samples_per_second": 11.458, "eval_steps_per_second": 0.185, "step": 80 }, { "epoch": 6.0, "eval_gen_len": 600.0, "eval_loss": 6.170421600341797, "eval_rouge1": 0.3981, "eval_rouge2": 0.1674, "eval_rougeL": 0.2178, "eval_rougeLsum": 0.3432, "eval_runtime": 21.242, "eval_samples_per_second": 11.675, "eval_steps_per_second": 0.188, "step": 96 }, { "epoch": 7.0, "eval_gen_len": 600.0, "eval_loss": 6.150304317474365, "eval_rouge1": 0.3976, "eval_rouge2": 0.1672, "eval_rougeL": 0.2176, "eval_rougeLsum": 0.3428, "eval_runtime": 21.3562, "eval_samples_per_second": 11.613, "eval_steps_per_second": 0.187, "step": 112 }, { "epoch": 8.0, "eval_gen_len": 600.0, "eval_loss": 6.135751724243164, "eval_rouge1": 0.3977, "eval_rouge2": 0.1672, "eval_rougeL": 0.2175, "eval_rougeLsum": 0.3427, "eval_runtime": 21.5836, "eval_samples_per_second": 11.49, "eval_steps_per_second": 0.185, "step": 128 }, { "epoch": 9.0, "eval_gen_len": 600.0, "eval_loss": 6.122563362121582, "eval_rouge1": 0.3977, "eval_rouge2": 0.1671, "eval_rougeL": 0.2171, "eval_rougeLsum": 0.3425, "eval_runtime": 21.5829, "eval_samples_per_second": 11.491, "eval_steps_per_second": 0.185, "step": 144 }, { "epoch": 10.0, "eval_gen_len": 600.0, "eval_loss": 6.114274501800537, "eval_rouge1": 0.397, "eval_rouge2": 0.1669, "eval_rougeL": 0.2174, "eval_rougeLsum": 0.3427, "eval_runtime": 21.508, "eval_samples_per_second": 11.531, "eval_steps_per_second": 0.186, "step": 160 }, { "epoch": 11.0, "eval_gen_len": 600.0, "eval_loss": 6.108905792236328, "eval_rouge1": 0.3973, "eval_rouge2": 0.167, "eval_rougeL": 0.2173, "eval_rougeLsum": 0.3427, "eval_runtime": 21.2386, "eval_samples_per_second": 11.677, "eval_steps_per_second": 0.188, "step": 176 }, { "epoch": 12.0, "eval_gen_len": 600.0, "eval_loss": 6.107725620269775, "eval_rouge1": 0.3974, "eval_rouge2": 0.167, "eval_rougeL": 0.2173, "eval_rougeLsum": 0.3426, "eval_runtime": 21.6952, "eval_samples_per_second": 11.431, "eval_steps_per_second": 0.184, "step": 192 }, { "epoch": 13.0, "eval_gen_len": 600.0, "eval_loss": 6.099628448486328, "eval_rouge1": 0.3976, "eval_rouge2": 0.167, "eval_rougeL": 0.2172, "eval_rougeLsum": 0.3428, "eval_runtime": 21.1438, "eval_samples_per_second": 11.729, "eval_steps_per_second": 0.189, "step": 208 }, { "epoch": 14.0, "eval_gen_len": 600.0, "eval_loss": 6.096395492553711, "eval_rouge1": 0.3975, "eval_rouge2": 0.167, "eval_rougeL": 0.2171, "eval_rougeLsum": 0.3426, "eval_runtime": 21.6504, "eval_samples_per_second": 11.455, "eval_steps_per_second": 0.185, "step": 224 }, { "epoch": 15.0, "eval_gen_len": 600.0, "eval_loss": 6.0916852951049805, "eval_rouge1": 0.3979, "eval_rouge2": 0.167, "eval_rougeL": 0.2168, "eval_rougeLsum": 0.3427, "eval_runtime": 21.4782, "eval_samples_per_second": 11.547, "eval_steps_per_second": 0.186, "step": 240 }, { "epoch": 16.0, "eval_gen_len": 600.0, "eval_loss": 6.090492248535156, "eval_rouge1": 0.3977, "eval_rouge2": 0.1672, "eval_rougeL": 0.2173, "eval_rougeLsum": 0.3428, "eval_runtime": 21.7128, "eval_samples_per_second": 11.422, "eval_steps_per_second": 0.184, "step": 256 }, { "epoch": 17.0, "eval_gen_len": 600.0, "eval_loss": 6.091054916381836, "eval_rouge1": 0.399, "eval_rouge2": 0.168, "eval_rougeL": 0.2176, "eval_rougeLsum": 0.3436, "eval_runtime": 21.2583, "eval_samples_per_second": 11.666, "eval_steps_per_second": 0.188, "step": 272 }, { "epoch": 18.0, "eval_gen_len": 600.0, "eval_loss": 6.0864386558532715, "eval_rouge1": 0.3985, "eval_rouge2": 0.1675, "eval_rougeL": 0.2172, "eval_rougeLsum": 0.3431, "eval_runtime": 21.4489, "eval_samples_per_second": 11.562, "eval_steps_per_second": 0.186, "step": 288 }, { "epoch": 19.0, "eval_gen_len": 600.0, "eval_loss": 6.082566261291504, "eval_rouge1": 0.4004, "eval_rouge2": 0.1686, "eval_rougeL": 0.2186, "eval_rougeLsum": 0.3451, "eval_runtime": 21.4779, "eval_samples_per_second": 11.547, "eval_steps_per_second": 0.186, "step": 304 }, { "epoch": 20.0, "eval_gen_len": 600.0, "eval_loss": 6.0813798904418945, "eval_rouge1": 0.4009, "eval_rouge2": 0.1689, "eval_rougeL": 0.2189, "eval_rougeLsum": 0.3454, "eval_runtime": 21.5568, "eval_samples_per_second": 11.504, "eval_steps_per_second": 0.186, "step": 320 }, { "epoch": 21.0, "eval_gen_len": 600.0, "eval_loss": 6.082016944885254, "eval_rouge1": 0.3999, "eval_rouge2": 0.1682, "eval_rougeL": 0.218, "eval_rougeLsum": 0.3444, "eval_runtime": 21.5727, "eval_samples_per_second": 11.496, "eval_steps_per_second": 0.185, "step": 336 }, { "epoch": 22.0, "eval_gen_len": 600.0, "eval_loss": 6.082878589630127, "eval_rouge1": 0.4076, "eval_rouge2": 0.1718, "eval_rougeL": 0.2222, "eval_rougeLsum": 0.3508, "eval_runtime": 20.8434, "eval_samples_per_second": 11.898, "eval_steps_per_second": 0.192, "step": 352 }, { "epoch": 23.0, "eval_gen_len": 600.0, "eval_loss": 6.080228805541992, "eval_rouge1": 0.405, "eval_rouge2": 0.1705, "eval_rougeL": 0.221, "eval_rougeLsum": 0.3488, "eval_runtime": 21.1916, "eval_samples_per_second": 11.703, "eval_steps_per_second": 0.189, "step": 368 }, { "epoch": 24.0, "eval_gen_len": 600.0, "eval_loss": 6.07808780670166, "eval_rouge1": 0.4052, "eval_rouge2": 0.1709, "eval_rougeL": 0.2212, "eval_rougeLsum": 0.3491, "eval_runtime": 21.3026, "eval_samples_per_second": 11.642, "eval_steps_per_second": 0.188, "step": 384 }, { "epoch": 25.0, "eval_gen_len": 600.0, "eval_loss": 6.077059268951416, "eval_rouge1": 0.4064, "eval_rouge2": 0.1711, "eval_rougeL": 0.2216, "eval_rougeLsum": 0.3498, "eval_runtime": 20.9702, "eval_samples_per_second": 11.826, "eval_steps_per_second": 0.191, "step": 400 }, { "epoch": 26.0, "eval_gen_len": 600.0, "eval_loss": 6.075596809387207, "eval_rouge1": 0.4086, "eval_rouge2": 0.1723, "eval_rougeL": 0.223, "eval_rougeLsum": 0.3517, "eval_runtime": 21.1984, "eval_samples_per_second": 11.699, "eval_steps_per_second": 0.189, "step": 416 }, { "epoch": 27.0, "eval_gen_len": 600.0, "eval_loss": 6.075705528259277, "eval_rouge1": 0.4075, "eval_rouge2": 0.1719, "eval_rougeL": 0.2224, "eval_rougeLsum": 0.3509, "eval_runtime": 20.7964, "eval_samples_per_second": 11.925, "eval_steps_per_second": 0.192, "step": 432 }, { "epoch": 28.0, "eval_gen_len": 600.0, "eval_loss": 6.075275421142578, "eval_rouge1": 0.4081, "eval_rouge2": 0.1722, "eval_rougeL": 0.2224, "eval_rougeLsum": 0.3509, "eval_runtime": 20.9972, "eval_samples_per_second": 11.811, "eval_steps_per_second": 0.191, "step": 448 }, { "epoch": 29.0, "eval_gen_len": 600.0, "eval_loss": 6.076692581176758, "eval_rouge1": 0.4132, "eval_rouge2": 0.1751, "eval_rougeL": 0.2258, "eval_rougeLsum": 0.3553, "eval_runtime": 21.0313, "eval_samples_per_second": 11.792, "eval_steps_per_second": 0.19, "step": 464 }, { "epoch": 30.0, "eval_gen_len": 600.0, "eval_loss": 6.075990676879883, "eval_rouge1": 0.4108, "eval_rouge2": 0.1737, "eval_rougeL": 0.2242, "eval_rougeLsum": 0.3533, "eval_runtime": 20.714, "eval_samples_per_second": 11.973, "eval_steps_per_second": 0.193, "step": 480 }, { "epoch": 31.0, "eval_gen_len": 600.0, "eval_loss": 6.074672222137451, "eval_rouge1": 0.4126, "eval_rouge2": 0.1747, "eval_rougeL": 0.2253, "eval_rougeLsum": 0.3546, "eval_runtime": 21.1511, "eval_samples_per_second": 11.725, "eval_steps_per_second": 0.189, "step": 496 }, { "epoch": 31.25, "grad_norm": 237350.25, "learning_rate": 2.1428571428571427e-06, "loss": 6.1153, "step": 500 }, { "epoch": 32.0, "eval_gen_len": 600.0, "eval_loss": 6.076193809509277, "eval_rouge1": 0.4119, "eval_rouge2": 0.1744, "eval_rougeL": 0.2248, "eval_rougeLsum": 0.3541, "eval_runtime": 20.5412, "eval_samples_per_second": 12.073, "eval_steps_per_second": 0.195, "step": 512 }, { "epoch": 33.0, "eval_gen_len": 600.0, "eval_loss": 6.074151039123535, "eval_rouge1": 0.4123, "eval_rouge2": 0.1746, "eval_rougeL": 0.2251, "eval_rougeLsum": 0.3545, "eval_runtime": 21.0056, "eval_samples_per_second": 11.806, "eval_steps_per_second": 0.19, "step": 528 } ], "logging_steps": 500, "max_steps": 560, "num_input_tokens_seen": 0, "num_train_epochs": 35, "save_steps": 500, "total_flos": 8527788048384000.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }