| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 11.049723756906078, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 2.88e-05, |
| "loss": 0.3703, |
| "step": 181 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 2.7600000000000003e-05, |
| "loss": 0.2003, |
| "step": 362 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 2.64e-05, |
| "loss": 0.1576, |
| "step": 543 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.1703525185585022, |
| "eval_rouge1": 0.3235, |
| "eval_rouge2": 0.2582, |
| "eval_rougeL": 0.3229, |
| "eval_rougeLsum": 0.323, |
| "eval_runtime": 40.4298, |
| "eval_samples_per_second": 4.502, |
| "eval_steps_per_second": 1.138, |
| "step": 543 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 2.52e-05, |
| "loss": 0.1318, |
| "step": 724 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.1649511605501175, |
| "eval_rouge1": 0.3177, |
| "eval_rouge2": 0.2531, |
| "eval_rougeL": 0.3173, |
| "eval_rougeLsum": 0.3176, |
| "eval_runtime": 38.7196, |
| "eval_samples_per_second": 4.7, |
| "eval_steps_per_second": 1.188, |
| "step": 724 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 2.4e-05, |
| "loss": 0.1108, |
| "step": 905 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.16952580213546753, |
| "eval_rouge1": 0.3228, |
| "eval_rouge2": 0.2576, |
| "eval_rougeL": 0.3217, |
| "eval_rougeLsum": 0.3218, |
| "eval_runtime": 39.283, |
| "eval_samples_per_second": 4.633, |
| "eval_steps_per_second": 1.171, |
| "step": 905 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 2.2800000000000002e-05, |
| "loss": 0.097, |
| "step": 1086 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.16314710676670074, |
| "eval_rouge1": 0.3178, |
| "eval_rouge2": 0.2533, |
| "eval_rougeL": 0.3177, |
| "eval_rougeLsum": 0.3176, |
| "eval_runtime": 39.6344, |
| "eval_samples_per_second": 4.592, |
| "eval_steps_per_second": 1.161, |
| "step": 1086 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 2.16e-05, |
| "loss": 0.0835, |
| "step": 1267 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.17035609483718872, |
| "eval_rouge1": 0.3231, |
| "eval_rouge2": 0.2582, |
| "eval_rougeL": 0.3226, |
| "eval_rougeLsum": 0.3228, |
| "eval_runtime": 39.5546, |
| "eval_samples_per_second": 4.601, |
| "eval_steps_per_second": 1.163, |
| "step": 1267 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 2.04e-05, |
| "loss": 0.0746, |
| "step": 1448 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.17403215169906616, |
| "eval_rouge1": 0.3209, |
| "eval_rouge2": 0.2566, |
| "eval_rougeL": 0.3205, |
| "eval_rougeLsum": 0.3209, |
| "eval_runtime": 38.8297, |
| "eval_samples_per_second": 4.687, |
| "eval_steps_per_second": 1.185, |
| "step": 1448 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 1.9200000000000003e-05, |
| "loss": 0.0647, |
| "step": 1629 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.19373367726802826, |
| "eval_rouge1": 0.3198, |
| "eval_rouge2": 0.2555, |
| "eval_rougeL": 0.3196, |
| "eval_rougeLsum": 0.3193, |
| "eval_runtime": 39.2617, |
| "eval_samples_per_second": 4.636, |
| "eval_steps_per_second": 1.172, |
| "step": 1629 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 1.8e-05, |
| "loss": 0.0586, |
| "step": 1810 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.193358913064003, |
| "eval_rouge1": 0.324, |
| "eval_rouge2": 0.2596, |
| "eval_rougeL": 0.3235, |
| "eval_rougeLsum": 0.3232, |
| "eval_runtime": 39.4009, |
| "eval_samples_per_second": 4.619, |
| "eval_steps_per_second": 1.167, |
| "step": 1810 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 1.6800000000000002e-05, |
| "loss": 0.0522, |
| "step": 1991 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.20207685232162476, |
| "eval_rouge1": 0.3225, |
| "eval_rouge2": 0.2588, |
| "eval_rougeL": 0.3216, |
| "eval_rougeLsum": 0.3219, |
| "eval_runtime": 39.3514, |
| "eval_samples_per_second": 4.625, |
| "eval_steps_per_second": 1.169, |
| "step": 1991 |
| } |
| ], |
| "max_steps": 4525, |
| "num_train_epochs": 25, |
| "total_flos": 6155837130719232.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|