| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 397, | |
| "global_step": 2384, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17, | |
| "eval_gen_len": 81.6102, | |
| "eval_loss": 1.3396695852279663, | |
| "eval_rouge1": 52.6908, | |
| "eval_rouge2": 34.3367, | |
| "eval_rougeL": 43.9351, | |
| "eval_rougeLsum": 44.0153, | |
| "eval_runtime": 41.4692, | |
| "eval_samples_per_second": 1.423, | |
| "eval_steps_per_second": 0.723, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 6.220447063446045, | |
| "learning_rate": 1.895763422818792e-05, | |
| "loss": 0.719, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_gen_len": 84.2203, | |
| "eval_loss": 1.363059639930725, | |
| "eval_rouge1": 54.543, | |
| "eval_rouge2": 36.4199, | |
| "eval_rougeL": 45.8273, | |
| "eval_rougeLsum": 45.7925, | |
| "eval_runtime": 42.2281, | |
| "eval_samples_per_second": 1.397, | |
| "eval_steps_per_second": 0.71, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 8.490495681762695, | |
| "learning_rate": 1.7908976510067115e-05, | |
| "loss": 0.7459, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_gen_len": 85.2373, | |
| "eval_loss": 1.3582559823989868, | |
| "eval_rouge1": 53.2262, | |
| "eval_rouge2": 34.8889, | |
| "eval_rougeL": 44.1043, | |
| "eval_rougeLsum": 44.0998, | |
| "eval_runtime": 42.6328, | |
| "eval_samples_per_second": 1.384, | |
| "eval_steps_per_second": 0.704, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 6.794929027557373, | |
| "learning_rate": 1.686031879194631e-05, | |
| "loss": 0.7154, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_gen_len": 83.7797, | |
| "eval_loss": 1.3886514902114868, | |
| "eval_rouge1": 54.9928, | |
| "eval_rouge2": 37.1125, | |
| "eval_rougeL": 46.4105, | |
| "eval_rougeLsum": 46.4044, | |
| "eval_runtime": 42.3464, | |
| "eval_samples_per_second": 1.393, | |
| "eval_steps_per_second": 0.708, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_gen_len": 85.8814, | |
| "eval_loss": 1.3405011892318726, | |
| "eval_rouge1": 52.5543, | |
| "eval_rouge2": 33.702, | |
| "eval_rougeL": 42.9428, | |
| "eval_rougeLsum": 43.0015, | |
| "eval_runtime": 43.1199, | |
| "eval_samples_per_second": 1.368, | |
| "eval_steps_per_second": 0.696, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 8.691970825195312, | |
| "learning_rate": 1.5811661073825504e-05, | |
| "loss": 0.7507, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 81.7797, | |
| "eval_loss": 1.3399206399917603, | |
| "eval_rouge1": 52.4327, | |
| "eval_rouge2": 34.1158, | |
| "eval_rougeL": 43.2742, | |
| "eval_rougeLsum": 43.1693, | |
| "eval_runtime": 41.935, | |
| "eval_samples_per_second": 1.407, | |
| "eval_steps_per_second": 0.715, | |
| "step": 2382 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 9536, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 1192, | |
| "total_flos": 5.17811143698432e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |