| { |
| "best_metric": 43.3065, |
| "best_model_checkpoint": "opus_big_lsp_adapt_wce_bands_4_ubweight_2.0/checkpoint-80000", |
| "epoch": 1.9716574245224892, |
| "global_step": 80000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9753635243376465e-05, |
| "loss": 0.1822, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_bleu": 41.6724, |
| "eval_gen_len": 33.535, |
| "eval_loss": 0.09890133887529373, |
| "eval_runtime": 90.9254, |
| "eval_samples_per_second": 11.471, |
| "eval_steps_per_second": 0.726, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.950723967960567e-05, |
| "loss": 0.1511, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_bleu": 42.4546, |
| "eval_gen_len": 33.838, |
| "eval_loss": 0.09774070978164673, |
| "eval_runtime": 188.0787, |
| "eval_samples_per_second": 5.546, |
| "eval_steps_per_second": 0.351, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.9260874922982132e-05, |
| "loss": 0.1468, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_bleu": 42.5654, |
| "eval_gen_len": 34.9655, |
| "eval_loss": 0.09777849912643433, |
| "eval_runtime": 89.4604, |
| "eval_samples_per_second": 11.659, |
| "eval_steps_per_second": 0.738, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.901447935921134e-05, |
| "loss": 0.1428, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_bleu": 42.2326, |
| "eval_gen_len": 34.0834, |
| "eval_loss": 0.0976356640458107, |
| "eval_runtime": 96.433, |
| "eval_samples_per_second": 10.816, |
| "eval_steps_per_second": 0.684, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.876814540973506e-05, |
| "loss": 0.1413, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 42.0941, |
| "eval_gen_len": 34.8849, |
| "eval_loss": 0.09630727022886276, |
| "eval_runtime": 99.7469, |
| "eval_samples_per_second": 10.456, |
| "eval_steps_per_second": 0.662, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.852178065311152e-05, |
| "loss": 0.1258, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_bleu": 42.7386, |
| "eval_gen_len": 33.6261, |
| "eval_loss": 0.09742352366447449, |
| "eval_runtime": 76.9301, |
| "eval_samples_per_second": 13.558, |
| "eval_steps_per_second": 0.858, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.8275415896487988e-05, |
| "loss": 0.1261, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_bleu": 42.7075, |
| "eval_gen_len": 34.2646, |
| "eval_loss": 0.09748394042253494, |
| "eval_runtime": 96.146, |
| "eval_samples_per_second": 10.848, |
| "eval_steps_per_second": 0.686, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.802905113986445e-05, |
| "loss": 0.1266, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_bleu": 41.5564, |
| "eval_gen_len": 35.6529, |
| "eval_loss": 0.0969226285815239, |
| "eval_runtime": 104.3825, |
| "eval_samples_per_second": 9.992, |
| "eval_steps_per_second": 0.632, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.778268638324091e-05, |
| "loss": 0.1261, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_bleu": 42.379, |
| "eval_gen_len": 34.3078, |
| "eval_loss": 0.09717730432748795, |
| "eval_runtime": 151.7683, |
| "eval_samples_per_second": 6.872, |
| "eval_steps_per_second": 0.435, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7536321626617377e-05, |
| "loss": 0.1258, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_bleu": 43.3065, |
| "eval_gen_len": 33.5158, |
| "eval_loss": 0.096554696559906, |
| "eval_runtime": 85.4527, |
| "eval_samples_per_second": 12.206, |
| "eval_steps_per_second": 0.772, |
| "step": 80000 |
| } |
| ], |
| "max_steps": 649200, |
| "num_train_epochs": 16, |
| "total_flos": 2.5844916958632346e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|