{ "best_metric": 42.7412, "best_model_checkpoint": "opus_base_adapt_wce_gloss_unsampled_bands_7_ubweight_1.5/checkpoint-48000", "epoch": 2.365930599369085, "global_step": 48000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753918572555207e-05, "loss": 0.2075, "step": 4000 }, { "epoch": 0.2, "eval_bleu": 35.693, "eval_gen_len": 39.837, "eval_loss": 0.10906277596950531, "eval_runtime": 203.6881, "eval_samples_per_second": 5.121, "eval_steps_per_second": 0.162, "step": 4000 }, { "epoch": 0.39, "learning_rate": 1.950746746845426e-05, "loss": 0.1742, "step": 8000 }, { "epoch": 0.39, "eval_bleu": 37.6497, "eval_gen_len": 37.93, "eval_loss": 0.10657652467489243, "eval_runtime": 221.5331, "eval_samples_per_second": 4.708, "eval_steps_per_second": 0.149, "step": 8000 }, { "epoch": 0.59, "learning_rate": 1.9261016364353314e-05, "loss": 0.1672, "step": 12000 }, { "epoch": 0.59, "eval_bleu": 41.3565, "eval_gen_len": 34.6529, "eval_loss": 0.10534726828336716, "eval_runtime": 168.8583, "eval_samples_per_second": 6.177, "eval_steps_per_second": 0.195, "step": 12000 }, { "epoch": 0.79, "learning_rate": 1.9014565260252367e-05, "loss": 0.1617, "step": 16000 }, { "epoch": 0.79, "eval_bleu": 41.4881, "eval_gen_len": 35.4343, "eval_loss": 0.10427802801132202, "eval_runtime": 136.9034, "eval_samples_per_second": 7.619, "eval_steps_per_second": 0.241, "step": 16000 }, { "epoch": 0.99, "learning_rate": 1.876811415615142e-05, "loss": 0.1593, "step": 20000 }, { "epoch": 0.99, "eval_bleu": 41.6185, "eval_gen_len": 35.9434, "eval_loss": 0.10372872650623322, "eval_runtime": 158.7854, "eval_samples_per_second": 6.569, "eval_steps_per_second": 0.208, "step": 20000 }, { "epoch": 1.18, "learning_rate": 1.8521786277602524e-05, "loss": 0.1475, "step": 24000 }, { "epoch": 1.18, "eval_bleu": 41.9106, "eval_gen_len": 35.1831, "eval_loss": 0.10363597422838211, "eval_runtime": 127.398, "eval_samples_per_second": 8.187, "eval_steps_per_second": 0.259, "step": 24000 }, { "epoch": 1.38, "learning_rate": 1.8275520011829655e-05, "loss": 0.1473, "step": 28000 }, { "epoch": 1.38, "eval_bleu": 41.0151, "eval_gen_len": 36.2119, "eval_loss": 0.10283295065164566, "eval_runtime": 132.5317, "eval_samples_per_second": 7.87, "eval_steps_per_second": 0.249, "step": 28000 }, { "epoch": 1.58, "learning_rate": 1.8029130520504733e-05, "loss": 0.1469, "step": 32000 }, { "epoch": 1.58, "eval_bleu": 42.0417, "eval_gen_len": 34.9779, "eval_loss": 0.1022760346531868, "eval_runtime": 134.6277, "eval_samples_per_second": 7.747, "eval_steps_per_second": 0.245, "step": 32000 }, { "epoch": 1.77, "learning_rate": 1.7782741029179815e-05, "loss": 0.1455, "step": 36000 }, { "epoch": 1.77, "eval_bleu": 40.3879, "eval_gen_len": 37.6788, "eval_loss": 0.10188300907611847, "eval_runtime": 124.1502, "eval_samples_per_second": 8.401, "eval_steps_per_second": 0.266, "step": 36000 }, { "epoch": 1.97, "learning_rate": 1.7536413150630914e-05, "loss": 0.1447, "step": 40000 }, { "epoch": 1.97, "eval_bleu": 41.9744, "eval_gen_len": 35.4314, "eval_loss": 0.10198679566383362, "eval_runtime": 133.2603, "eval_samples_per_second": 7.827, "eval_steps_per_second": 0.248, "step": 40000 }, { "epoch": 2.17, "learning_rate": 1.729008527208202e-05, "loss": 0.1371, "step": 44000 }, { "epoch": 2.17, "eval_bleu": 42.1364, "eval_gen_len": 34.9569, "eval_loss": 0.10233627259731293, "eval_runtime": 158.4624, "eval_samples_per_second": 6.582, "eval_steps_per_second": 0.208, "step": 44000 }, { "epoch": 2.37, "learning_rate": 1.70436957807571e-05, "loss": 0.1362, "step": 48000 }, { "epoch": 2.37, "eval_bleu": 42.7412, "eval_gen_len": 34.8322, "eval_loss": 0.10221327096223831, "eval_runtime": 107.2167, "eval_samples_per_second": 9.728, "eval_steps_per_second": 0.308, "step": 48000 } ], "max_steps": 324608, "num_train_epochs": 16, "total_flos": 8.279444701052928e+16, "trial_name": null, "trial_params": null }