| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "global_step": 572, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_bleu": 7.7545, |
| "eval_gen_len": 14.8741, |
| "eval_jit_compilation_time": 11.4032, |
| "eval_loss": 2.9266164302825928, |
| "eval_runtime": 12.5758, |
| "eval_samples_per_second": 11.371, |
| "eval_steps_per_second": 2.863, |
| "step": 143 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_bleu": 10.4293, |
| "eval_gen_len": 14.0629, |
| "eval_jit_compilation_time": 12.1037, |
| "eval_loss": 2.850106716156006, |
| "eval_runtime": 11.8772, |
| "eval_samples_per_second": 12.04, |
| "eval_steps_per_second": 3.031, |
| "step": 286 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_bleu": 11.0503, |
| "eval_gen_len": 14.5664, |
| "eval_jit_compilation_time": 12.7826, |
| "eval_loss": 2.875509262084961, |
| "eval_runtime": 12.4472, |
| "eval_samples_per_second": 11.489, |
| "eval_steps_per_second": 2.892, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 2.5874125874125877e-05, |
| "loss": 2.0491, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_bleu": 12.4752, |
| "eval_gen_len": 14.6014, |
| "eval_jit_compilation_time": 12.9294, |
| "eval_loss": 2.9099602699279785, |
| "eval_runtime": 12.6411, |
| "eval_samples_per_second": 11.312, |
| "eval_steps_per_second": 2.848, |
| "step": 572 |
| } |
| ], |
| "max_steps": 572, |
| "num_train_epochs": 4, |
| "total_flos": 11456058359808.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|