{ "best_metric": null, "best_model_checkpoint": null, "epoch": 11.049723756906078, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 2.88e-05, "loss": 0.3703, "step": 181 }, { "epoch": 2.0, "learning_rate": 2.7600000000000003e-05, "loss": 0.2003, "step": 362 }, { "epoch": 3.0, "learning_rate": 2.64e-05, "loss": 0.1576, "step": 543 }, { "epoch": 3.0, "eval_gen_len": 19.0, "eval_loss": 0.1703525185585022, "eval_rouge1": 0.3235, "eval_rouge2": 0.2582, "eval_rougeL": 0.3229, "eval_rougeLsum": 0.323, "eval_runtime": 40.4298, "eval_samples_per_second": 4.502, "eval_steps_per_second": 1.138, "step": 543 }, { "epoch": 4.0, "learning_rate": 2.52e-05, "loss": 0.1318, "step": 724 }, { "epoch": 4.0, "eval_gen_len": 19.0, "eval_loss": 0.1649511605501175, "eval_rouge1": 0.3177, "eval_rouge2": 0.2531, "eval_rougeL": 0.3173, "eval_rougeLsum": 0.3176, "eval_runtime": 38.7196, "eval_samples_per_second": 4.7, "eval_steps_per_second": 1.188, "step": 724 }, { "epoch": 5.0, "learning_rate": 2.4e-05, "loss": 0.1108, "step": 905 }, { "epoch": 5.0, "eval_gen_len": 19.0, "eval_loss": 0.16952580213546753, "eval_rouge1": 0.3228, "eval_rouge2": 0.2576, "eval_rougeL": 0.3217, "eval_rougeLsum": 0.3218, "eval_runtime": 39.283, "eval_samples_per_second": 4.633, "eval_steps_per_second": 1.171, "step": 905 }, { "epoch": 6.0, "learning_rate": 2.2800000000000002e-05, "loss": 0.097, "step": 1086 }, { "epoch": 6.0, "eval_gen_len": 19.0, "eval_loss": 0.16314710676670074, "eval_rouge1": 0.3178, "eval_rouge2": 0.2533, "eval_rougeL": 0.3177, "eval_rougeLsum": 0.3176, "eval_runtime": 39.6344, "eval_samples_per_second": 4.592, "eval_steps_per_second": 1.161, "step": 1086 }, { "epoch": 7.0, "learning_rate": 2.16e-05, "loss": 0.0835, "step": 1267 }, { "epoch": 7.0, "eval_gen_len": 19.0, "eval_loss": 0.17035609483718872, "eval_rouge1": 0.3231, "eval_rouge2": 0.2582, "eval_rougeL": 0.3226, "eval_rougeLsum": 0.3228, "eval_runtime": 39.5546, "eval_samples_per_second": 4.601, "eval_steps_per_second": 1.163, "step": 1267 }, { "epoch": 8.0, "learning_rate": 2.04e-05, "loss": 0.0746, "step": 1448 }, { "epoch": 8.0, "eval_gen_len": 19.0, "eval_loss": 0.17403215169906616, "eval_rouge1": 0.3209, "eval_rouge2": 0.2566, "eval_rougeL": 0.3205, "eval_rougeLsum": 0.3209, "eval_runtime": 38.8297, "eval_samples_per_second": 4.687, "eval_steps_per_second": 1.185, "step": 1448 }, { "epoch": 9.0, "learning_rate": 1.9200000000000003e-05, "loss": 0.0647, "step": 1629 }, { "epoch": 9.0, "eval_gen_len": 19.0, "eval_loss": 0.19373367726802826, "eval_rouge1": 0.3198, "eval_rouge2": 0.2555, "eval_rougeL": 0.3196, "eval_rougeLsum": 0.3193, "eval_runtime": 39.2617, "eval_samples_per_second": 4.636, "eval_steps_per_second": 1.172, "step": 1629 }, { "epoch": 10.0, "learning_rate": 1.8e-05, "loss": 0.0586, "step": 1810 }, { "epoch": 10.0, "eval_gen_len": 19.0, "eval_loss": 0.193358913064003, "eval_rouge1": 0.324, "eval_rouge2": 0.2596, "eval_rougeL": 0.3235, "eval_rougeLsum": 0.3232, "eval_runtime": 39.4009, "eval_samples_per_second": 4.619, "eval_steps_per_second": 1.167, "step": 1810 }, { "epoch": 11.0, "learning_rate": 1.6800000000000002e-05, "loss": 0.0522, "step": 1991 }, { "epoch": 11.0, "eval_gen_len": 19.0, "eval_loss": 0.20207685232162476, "eval_rouge1": 0.3225, "eval_rouge2": 0.2588, "eval_rougeL": 0.3216, "eval_rougeLsum": 0.3219, "eval_runtime": 39.3514, "eval_samples_per_second": 4.625, "eval_steps_per_second": 1.169, "step": 1991 } ], "max_steps": 4525, "num_train_epochs": 25, "total_flos": 6155837130719232.0, "trial_name": null, "trial_params": null }