| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 7.936507936507937, |
| "global_step": 7500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.9007407407407405e-05, |
| "loss": 6.9695, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 4.147339344024658, |
| "eval_rouge1": 0.0, |
| "eval_rouge2": 0.0, |
| "eval_rougeL": 0.0, |
| "eval_rougeLsum": 0.0, |
| "eval_runtime": 20.6593, |
| "eval_samples_per_second": 7.745, |
| "eval_steps_per_second": 0.968, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 4.201481481481482e-05, |
| "loss": 4.5567, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 3.9342198371887207, |
| "eval_rouge1": 0.625, |
| "eval_rouge2": 0.0, |
| "eval_rougeL": 0.625, |
| "eval_rougeLsum": 0.625, |
| "eval_runtime": 20.7037, |
| "eval_samples_per_second": 7.728, |
| "eval_steps_per_second": 0.966, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 3.502222222222222e-05, |
| "loss": 4.2704, |
| "step": 2832 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 3.8318352699279785, |
| "eval_rouge1": 0.0, |
| "eval_rouge2": 0.0, |
| "eval_rougeL": 0.0, |
| "eval_rougeLsum": 0.0, |
| "eval_runtime": 18.8952, |
| "eval_samples_per_second": 8.468, |
| "eval_steps_per_second": 1.058, |
| "step": 2835 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 2.8029629629629628e-05, |
| "loss": 4.1165, |
| "step": 3776 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 3.836942195892334, |
| "eval_rouge1": 0.0, |
| "eval_rouge2": 0.0, |
| "eval_rougeL": 0.0, |
| "eval_rougeLsum": 0.0, |
| "eval_runtime": 19.8612, |
| "eval_samples_per_second": 8.056, |
| "eval_steps_per_second": 1.007, |
| "step": 3780 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 2.1037037037037034e-05, |
| "loss": 4.015, |
| "step": 4720 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 3.792680025100708, |
| "eval_rouge1": 0.0, |
| "eval_rouge2": 0.0, |
| "eval_rougeL": 0.0, |
| "eval_rougeLsum": 0.0, |
| "eval_runtime": 20.3459, |
| "eval_samples_per_second": 7.864, |
| "eval_steps_per_second": 0.983, |
| "step": 4725 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 1.4044444444444445e-05, |
| "loss": 3.9414, |
| "step": 5664 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 3.7542567253112793, |
| "eval_rouge1": 0.0, |
| "eval_rouge2": 0.0, |
| "eval_rougeL": 0.0, |
| "eval_rougeLsum": 0.0, |
| "eval_runtime": 19.4583, |
| "eval_samples_per_second": 8.223, |
| "eval_steps_per_second": 1.028, |
| "step": 5670 |
| }, |
| { |
| "epoch": 6.99, |
| "learning_rate": 7.051851851851851e-06, |
| "loss": 3.893, |
| "step": 6608 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 3.7550926208496094, |
| "eval_rouge1": 0.0, |
| "eval_rouge2": 0.0, |
| "eval_rougeL": 0.0, |
| "eval_rougeLsum": 0.0, |
| "eval_runtime": 19.588, |
| "eval_samples_per_second": 8.168, |
| "eval_steps_per_second": 1.021, |
| "step": 6615 |
| } |
| ], |
| "max_steps": 7560, |
| "num_train_epochs": 8, |
| "total_flos": 1.032980990088192e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|