| { |
| "best_metric": 43.7339, |
| "best_model_checkpoint": "opus_big_lsp_adapt_wce_prop_0.8_weight_1.75/checkpoint-80000", |
| "epoch": 1.9716574245224892, |
| "global_step": 80000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9753635243376465e-05, |
| "loss": 0.1872, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_bleu": 42.0446, |
| "eval_gen_len": 33.721, |
| "eval_loss": 0.09888985753059387, |
| "eval_runtime": 130.4471, |
| "eval_samples_per_second": 7.996, |
| "eval_steps_per_second": 0.506, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.950723967960567e-05, |
| "loss": 0.1556, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_bleu": 42.2512, |
| "eval_gen_len": 34.093, |
| "eval_loss": 0.09752173721790314, |
| "eval_runtime": 189.6683, |
| "eval_samples_per_second": 5.499, |
| "eval_steps_per_second": 0.348, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.9260874922982132e-05, |
| "loss": 0.1511, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_bleu": 42.4039, |
| "eval_gen_len": 33.7776, |
| "eval_loss": 0.09804832935333252, |
| "eval_runtime": 125.3237, |
| "eval_samples_per_second": 8.322, |
| "eval_steps_per_second": 0.527, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.901447935921134e-05, |
| "loss": 0.1468, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_bleu": 42.384, |
| "eval_gen_len": 34.0077, |
| "eval_loss": 0.09726251661777496, |
| "eval_runtime": 127.0486, |
| "eval_samples_per_second": 8.209, |
| "eval_steps_per_second": 0.519, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.8768114602587803e-05, |
| "loss": 0.1453, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 42.5465, |
| "eval_gen_len": 34.2138, |
| "eval_loss": 0.09616752713918686, |
| "eval_runtime": 149.6762, |
| "eval_samples_per_second": 6.968, |
| "eval_steps_per_second": 0.441, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.8521749845964266e-05, |
| "loss": 0.1292, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_bleu": 42.7614, |
| "eval_gen_len": 33.6558, |
| "eval_loss": 0.09714562445878983, |
| "eval_runtime": 140.3139, |
| "eval_samples_per_second": 7.433, |
| "eval_steps_per_second": 0.47, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.827535428219347e-05, |
| "loss": 0.1296, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_bleu": 42.2625, |
| "eval_gen_len": 34.7987, |
| "eval_loss": 0.09766771644353867, |
| "eval_runtime": 149.3991, |
| "eval_samples_per_second": 6.981, |
| "eval_steps_per_second": 0.442, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.8029020332717192e-05, |
| "loss": 0.1301, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_bleu": 42.8366, |
| "eval_gen_len": 34.4842, |
| "eval_loss": 0.0971095860004425, |
| "eval_runtime": 150.9733, |
| "eval_samples_per_second": 6.909, |
| "eval_steps_per_second": 0.437, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.7782624768946397e-05, |
| "loss": 0.1295, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_bleu": 42.763, |
| "eval_gen_len": 34.9012, |
| "eval_loss": 0.09676354378461838, |
| "eval_runtime": 156.8388, |
| "eval_samples_per_second": 6.65, |
| "eval_steps_per_second": 0.421, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.753626001232286e-05, |
| "loss": 0.1293, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_bleu": 43.7339, |
| "eval_gen_len": 33.8734, |
| "eval_loss": 0.09614978730678558, |
| "eval_runtime": 162.1547, |
| "eval_samples_per_second": 6.432, |
| "eval_steps_per_second": 0.407, |
| "step": 80000 |
| } |
| ], |
| "max_steps": 649200, |
| "num_train_epochs": 16, |
| "total_flos": 2.5844916958632346e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|