| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.999156296140055, |
| "eval_steps": 500, |
| "global_step": 9480, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8955696202531647e-05, |
| "loss": 2.9241, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.7900843881856543e-05, |
| "loss": 2.7783, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.6845991561181435e-05, |
| "loss": 2.7342, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.579113924050633e-05, |
| "loss": 2.6968, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.8701, |
| "eval_gen_len": 19.4437588989084, |
| "eval_loss": 2.538452386856079, |
| "eval_precision": 0.8766, |
| "eval_recall": 0.8641, |
| "eval_rouge1": 0.2634, |
| "eval_rouge2": 0.0907, |
| "eval_rougeL": 0.218, |
| "eval_rougeLsum": 0.2182, |
| "eval_runtime": 339.9314, |
| "eval_samples_per_second": 6.198, |
| "eval_steps_per_second": 1.55, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.4736286919831224e-05, |
| "loss": 2.6435, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.368354430379747e-05, |
| "loss": 2.4844, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.2628691983122363e-05, |
| "loss": 2.4626, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 1.1573839662447259e-05, |
| "loss": 2.4676, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.0523206751054854e-05, |
| "loss": 2.4746, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1": 0.8712, |
| "eval_gen_len": 19.25723777883246, |
| "eval_loss": 2.507711410522461, |
| "eval_precision": 0.8774, |
| "eval_recall": 0.8655, |
| "eval_rouge1": 0.273, |
| "eval_rouge2": 0.0941, |
| "eval_rougeL": 0.2238, |
| "eval_rougeLsum": 0.2239, |
| "eval_runtime": 335.7013, |
| "eval_samples_per_second": 6.276, |
| "eval_steps_per_second": 1.57, |
| "step": 4741 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 9.468354430379748e-06, |
| "loss": 2.3813, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 8.415611814345991e-06, |
| "loss": 2.3173, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 7.360759493670887e-06, |
| "loss": 2.3064, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 6.305907172995781e-06, |
| "loss": 2.3008, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 5.251054852320675e-06, |
| "loss": 2.3066, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_f1": 0.8696, |
| "eval_gen_len": 19.307071665875654, |
| "eval_loss": 2.501197099685669, |
| "eval_precision": 0.8756, |
| "eval_recall": 0.864, |
| "eval_rouge1": 0.2671, |
| "eval_rouge2": 0.0936, |
| "eval_rougeL": 0.221, |
| "eval_rougeLsum": 0.2211, |
| "eval_runtime": 336.0052, |
| "eval_samples_per_second": 6.271, |
| "eval_steps_per_second": 1.568, |
| "step": 7111 |
| }, |
| { |
| "epoch": 3.16, |
| "learning_rate": 4.19620253164557e-06, |
| "loss": 2.2216, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 3.141350210970465e-06, |
| "loss": 2.2071, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 2.0864978902953587e-06, |
| "loss": 2.1852, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 1.0316455696202532e-06, |
| "loss": 2.2041, |
| "step": 9000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_f1": 0.8705, |
| "eval_gen_len": 19.35548172757475, |
| "eval_loss": 2.503497838973999, |
| "eval_precision": 0.8768, |
| "eval_recall": 0.8648, |
| "eval_rouge1": 0.2709, |
| "eval_rouge2": 0.0948, |
| "eval_rougeL": 0.2244, |
| "eval_rougeLsum": 0.2244, |
| "eval_runtime": 336.6782, |
| "eval_samples_per_second": 6.258, |
| "eval_steps_per_second": 1.565, |
| "step": 9480 |
| }, |
| { |
| "epoch": 4.0, |
| "step": 9480, |
| "total_flos": 2.372598264346706e+17, |
| "train_loss": 2.436967770862177, |
| "train_runtime": 10313.084, |
| "train_samples_per_second": 14.709, |
| "train_steps_per_second": 0.919 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 9480, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "total_flos": 2.372598264346706e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|