| { |
| "best_metric": 1.2624495029449463, |
| "epoch": 2.0, |
| "global_step": 12460, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.899678972712681e-06, |
| "loss": 1.5273, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.799357945425362e-06, |
| "loss": 1.3678, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 9.699036918138043e-06, |
| "loss": 1.2869, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.598715890850723e-06, |
| "loss": 1.2951, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 9.498394863563404e-06, |
| "loss": 1.2578, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.398073836276083e-06, |
| "loss": 1.2532, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.297752808988765e-06, |
| "loss": 1.2441, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.197431781701446e-06, |
| "loss": 1.212, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 9.097110754414125e-06, |
| "loss": 1.2009, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 8.996789727126807e-06, |
| "loss": 1.1731, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 8.896468699839486e-06, |
| "loss": 1.2008, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 8.796147672552167e-06, |
| "loss": 1.1732, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 18.808, |
| "eval_loss": 1.2909361124038696, |
| "eval_rouge1": 38.4799, |
| "eval_rouge2": 14.1438, |
| "eval_rougeL": 32.0853, |
| "eval_rougeLsum": 34.22, |
| "eval_runtime": 725.7342, |
| "eval_samples_per_second": 2.067, |
| "eval_steps_per_second": 1.033, |
| "step": 6230 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 8.69582664526485e-06, |
| "loss": 1.1773, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 8.595505617977528e-06, |
| "loss": 1.1377, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 8.49518459069021e-06, |
| "loss": 1.1186, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 8.39486356340289e-06, |
| "loss": 1.106, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 8.29454253611557e-06, |
| "loss": 1.1346, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 8.19422150882825e-06, |
| "loss": 1.1376, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 8.093900481540931e-06, |
| "loss": 1.1118, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 7.993579454253612e-06, |
| "loss": 1.1356, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 7.893258426966293e-06, |
| "loss": 1.0916, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 7.792937399678973e-06, |
| "loss": 1.125, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.692616372391654e-06, |
| "loss": 1.1061, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 7.592295345104335e-06, |
| "loss": 1.1101, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 18.808, |
| "eval_loss": 1.2624495029449463, |
| "eval_rouge1": 38.462, |
| "eval_rouge2": 14.3899, |
| "eval_rougeL": 32.0676, |
| "eval_rougeLsum": 34.0319, |
| "eval_runtime": 723.1779, |
| "eval_samples_per_second": 2.074, |
| "eval_steps_per_second": 1.037, |
| "step": 12460 |
| } |
| ], |
| "max_steps": 49840, |
| "num_train_epochs": 8, |
| "total_flos": 2.96391223296e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|