| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.91644908616188, |
| "global_step": 4500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.7828546562228025e-05, |
| "loss": 2.0396, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.5652741514360316e-05, |
| "loss": 1.9399, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_gen_len": 19.0, |
| "eval_loss": 1.7441270351409912, |
| "eval_rouge1": 0.246, |
| "eval_rouge2": 0.1167, |
| "eval_rougeL": 0.2027, |
| "eval_rougeLsum": 0.2025, |
| "eval_runtime": 183.8875, |
| "eval_samples_per_second": 12.497, |
| "eval_steps_per_second": 1.566, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.348128807658834e-05, |
| "loss": 1.9212, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 1.1305483028720628e-05, |
| "loss": 1.8858, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 18.9987, |
| "eval_loss": 1.7244781255722046, |
| "eval_rouge1": 0.246, |
| "eval_rouge2": 0.116, |
| "eval_rougeL": 0.203, |
| "eval_rougeLsum": 0.2028, |
| "eval_runtime": 185.1368, |
| "eval_samples_per_second": 12.412, |
| "eval_steps_per_second": 1.556, |
| "step": 2298 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 9.129677980852917e-06, |
| "loss": 1.8875, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 6.953872932985205e-06, |
| "loss": 1.8767, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_gen_len": 18.9956, |
| "eval_loss": 1.7172995805740356, |
| "eval_rouge1": 0.2459, |
| "eval_rouge2": 0.116, |
| "eval_rougeL": 0.2024, |
| "eval_rougeLsum": 0.2022, |
| "eval_runtime": 180.4003, |
| "eval_samples_per_second": 12.738, |
| "eval_steps_per_second": 1.596, |
| "step": 3447 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 4.778067885117494e-06, |
| "loss": 1.8909, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 2.6022628372497826e-06, |
| "loss": 1.8648, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 4.264577893820714e-07, |
| "loss": 1.8835, |
| "step": 4500 |
| } |
| ], |
| "max_steps": 4596, |
| "num_train_epochs": 4, |
| "total_flos": 9722809102368768.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|