{ "best_metric": 43.3065, "best_model_checkpoint": "opus_big_lsp_adapt_wce_bands_4_ubweight_2.0/checkpoint-80000", "epoch": 1.9716574245224892, "global_step": 80000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753635243376465e-05, "loss": 0.1822, "step": 8000 }, { "epoch": 0.2, "eval_bleu": 41.6724, "eval_gen_len": 33.535, "eval_loss": 0.09890133887529373, "eval_runtime": 90.9254, "eval_samples_per_second": 11.471, "eval_steps_per_second": 0.726, "step": 8000 }, { "epoch": 0.39, "learning_rate": 1.950723967960567e-05, "loss": 0.1511, "step": 16000 }, { "epoch": 0.39, "eval_bleu": 42.4546, "eval_gen_len": 33.838, "eval_loss": 0.09774070978164673, "eval_runtime": 188.0787, "eval_samples_per_second": 5.546, "eval_steps_per_second": 0.351, "step": 16000 }, { "epoch": 0.59, "learning_rate": 1.9260874922982132e-05, "loss": 0.1468, "step": 24000 }, { "epoch": 0.59, "eval_bleu": 42.5654, "eval_gen_len": 34.9655, "eval_loss": 0.09777849912643433, "eval_runtime": 89.4604, "eval_samples_per_second": 11.659, "eval_steps_per_second": 0.738, "step": 24000 }, { "epoch": 0.79, "learning_rate": 1.901447935921134e-05, "loss": 0.1428, "step": 32000 }, { "epoch": 0.79, "eval_bleu": 42.2326, "eval_gen_len": 34.0834, "eval_loss": 0.0976356640458107, "eval_runtime": 96.433, "eval_samples_per_second": 10.816, "eval_steps_per_second": 0.684, "step": 32000 }, { "epoch": 0.99, "learning_rate": 1.876814540973506e-05, "loss": 0.1413, "step": 40000 }, { "epoch": 0.99, "eval_bleu": 42.0941, "eval_gen_len": 34.8849, "eval_loss": 0.09630727022886276, "eval_runtime": 99.7469, "eval_samples_per_second": 10.456, "eval_steps_per_second": 0.662, "step": 40000 }, { "epoch": 1.18, "learning_rate": 1.852178065311152e-05, "loss": 0.1258, "step": 48000 }, { "epoch": 1.18, "eval_bleu": 42.7386, "eval_gen_len": 33.6261, "eval_loss": 0.09742352366447449, "eval_runtime": 76.9301, "eval_samples_per_second": 13.558, "eval_steps_per_second": 0.858, "step": 48000 }, { "epoch": 1.38, "learning_rate": 1.8275415896487988e-05, "loss": 0.1261, "step": 56000 }, { "epoch": 1.38, "eval_bleu": 42.7075, "eval_gen_len": 34.2646, "eval_loss": 0.09748394042253494, "eval_runtime": 96.146, "eval_samples_per_second": 10.848, "eval_steps_per_second": 0.686, "step": 56000 }, { "epoch": 1.58, "learning_rate": 1.802905113986445e-05, "loss": 0.1266, "step": 64000 }, { "epoch": 1.58, "eval_bleu": 41.5564, "eval_gen_len": 35.6529, "eval_loss": 0.0969226285815239, "eval_runtime": 104.3825, "eval_samples_per_second": 9.992, "eval_steps_per_second": 0.632, "step": 64000 }, { "epoch": 1.77, "learning_rate": 1.778268638324091e-05, "loss": 0.1261, "step": 72000 }, { "epoch": 1.77, "eval_bleu": 42.379, "eval_gen_len": 34.3078, "eval_loss": 0.09717730432748795, "eval_runtime": 151.7683, "eval_samples_per_second": 6.872, "eval_steps_per_second": 0.435, "step": 72000 }, { "epoch": 1.97, "learning_rate": 1.7536321626617377e-05, "loss": 0.1258, "step": 80000 }, { "epoch": 1.97, "eval_bleu": 43.3065, "eval_gen_len": 33.5158, "eval_loss": 0.096554696559906, "eval_runtime": 85.4527, "eval_samples_per_second": 12.206, "eval_steps_per_second": 0.772, "step": 80000 } ], "max_steps": 649200, "num_train_epochs": 16, "total_flos": 2.5844916958632346e+17, "trial_name": null, "trial_params": null }