| { |
| "best_metric": 43.3155, |
| "best_model_checkpoint": "opus_big_ailem_random/checkpoint-80000", |
| "epoch": 1.9716574245224892, |
| "global_step": 80000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9753635243376465e-05, |
| "loss": 0.1794, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_bleu": 42.1115, |
| "eval_gen_len": 33.8782, |
| "eval_loss": 0.09834092110395432, |
| "eval_runtime": 120.1201, |
| "eval_samples_per_second": 8.683, |
| "eval_steps_per_second": 0.549, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.950723967960567e-05, |
| "loss": 0.1487, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_bleu": 42.5044, |
| "eval_gen_len": 34.0422, |
| "eval_loss": 0.09755747765302658, |
| "eval_runtime": 108.595, |
| "eval_samples_per_second": 9.604, |
| "eval_steps_per_second": 0.608, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.926090573012939e-05, |
| "loss": 0.1445, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_bleu": 42.1738, |
| "eval_gen_len": 33.3921, |
| "eval_loss": 0.09764789789915085, |
| "eval_runtime": 129.8931, |
| "eval_samples_per_second": 8.03, |
| "eval_steps_per_second": 0.508, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.90145101663586e-05, |
| "loss": 0.1407, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_bleu": 42.6761, |
| "eval_gen_len": 33.8092, |
| "eval_loss": 0.09669991582632065, |
| "eval_runtime": 115.0248, |
| "eval_samples_per_second": 9.068, |
| "eval_steps_per_second": 0.574, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.876814540973506e-05, |
| "loss": 0.1392, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 42.5089, |
| "eval_gen_len": 33.814, |
| "eval_loss": 0.09582150727510452, |
| "eval_runtime": 115.0911, |
| "eval_samples_per_second": 9.062, |
| "eval_steps_per_second": 0.573, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.852178065311152e-05, |
| "loss": 0.1241, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_bleu": 42.6428, |
| "eval_gen_len": 33.954, |
| "eval_loss": 0.09729909896850586, |
| "eval_runtime": 116.307, |
| "eval_samples_per_second": 8.968, |
| "eval_steps_per_second": 0.567, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.8275446703635243e-05, |
| "loss": 0.1244, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_bleu": 42.5021, |
| "eval_gen_len": 33.7661, |
| "eval_loss": 0.09766314178705215, |
| "eval_runtime": 141.066, |
| "eval_samples_per_second": 7.394, |
| "eval_steps_per_second": 0.468, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.802908194701171e-05, |
| "loss": 0.125, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_bleu": 42.8416, |
| "eval_gen_len": 33.999, |
| "eval_loss": 0.09677453339099884, |
| "eval_runtime": 152.4751, |
| "eval_samples_per_second": 6.84, |
| "eval_steps_per_second": 0.433, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.7782717190388173e-05, |
| "loss": 0.1245, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_bleu": 42.797, |
| "eval_gen_len": 34.5053, |
| "eval_loss": 0.09712178260087967, |
| "eval_runtime": 129.5634, |
| "eval_samples_per_second": 8.05, |
| "eval_steps_per_second": 0.509, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7536321626617377e-05, |
| "loss": 0.1242, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_bleu": 43.3155, |
| "eval_gen_len": 34.0355, |
| "eval_loss": 0.09631923586130142, |
| "eval_runtime": 129.8003, |
| "eval_samples_per_second": 8.035, |
| "eval_steps_per_second": 0.508, |
| "step": 80000 |
| } |
| ], |
| "max_steps": 649200, |
| "num_train_epochs": 16, |
| "total_flos": 2.5844916958632346e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|