{ "best_metric": 42.9732, "best_model_checkpoint": "opus_base_wce_adaptified/checkpoint-80000", "epoch": 3.943217665615142, "global_step": 80000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753918572555207e-05, "loss": 0.2051, "step": 4000 }, { "epoch": 0.2, "eval_bleu": 35.5597, "eval_gen_len": 39.8495, "eval_loss": 0.10906314104795456, "eval_runtime": 140.7392, "eval_samples_per_second": 7.411, "eval_steps_per_second": 0.234, "step": 4000 }, { "epoch": 0.39, "learning_rate": 1.950746746845426e-05, "loss": 0.1719, "step": 8000 }, { "epoch": 0.39, "eval_bleu": 40.9319, "eval_gen_len": 34.6337, "eval_loss": 0.10657753050327301, "eval_runtime": 125.8016, "eval_samples_per_second": 8.291, "eval_steps_per_second": 0.262, "step": 8000 }, { "epoch": 0.59, "learning_rate": 1.9261016364353314e-05, "loss": 0.165, "step": 12000 }, { "epoch": 0.59, "eval_bleu": 41.3844, "eval_gen_len": 34.6453, "eval_loss": 0.10533162206411362, "eval_runtime": 111.0041, "eval_samples_per_second": 9.396, "eval_steps_per_second": 0.297, "step": 12000 }, { "epoch": 0.79, "learning_rate": 1.9014565260252367e-05, "loss": 0.1596, "step": 16000 }, { "epoch": 0.79, "eval_bleu": 41.3616, "eval_gen_len": 35.1601, "eval_loss": 0.10429085791110992, "eval_runtime": 141.5674, "eval_samples_per_second": 7.368, "eval_steps_per_second": 0.233, "step": 16000 }, { "epoch": 0.99, "learning_rate": 1.876811415615142e-05, "loss": 0.1573, "step": 20000 }, { "epoch": 0.99, "eval_bleu": 41.6835, "eval_gen_len": 34.9942, "eval_loss": 0.10373106598854065, "eval_runtime": 109.7842, "eval_samples_per_second": 9.5, "eval_steps_per_second": 0.301, "step": 20000 }, { "epoch": 1.18, "learning_rate": 1.8521786277602524e-05, "loss": 0.1457, "step": 24000 }, { "epoch": 1.18, "eval_bleu": 41.1613, "eval_gen_len": 36.2349, "eval_loss": 0.10356967151165009, "eval_runtime": 116.302, "eval_samples_per_second": 8.968, "eval_steps_per_second": 0.284, "step": 24000 }, { "epoch": 1.38, "learning_rate": 1.827545839905363e-05, "loss": 0.1455, "step": 28000 }, { "epoch": 1.38, "eval_bleu": 41.8822, "eval_gen_len": 34.4698, "eval_loss": 0.10275906324386597, "eval_runtime": 88.2743, "eval_samples_per_second": 11.815, "eval_steps_per_second": 0.374, "step": 28000 }, { "epoch": 1.58, "learning_rate": 1.8029130520504733e-05, "loss": 0.1451, "step": 32000 }, { "epoch": 1.58, "eval_bleu": 41.7092, "eval_gen_len": 35.2943, "eval_loss": 0.10221899300813675, "eval_runtime": 93.7664, "eval_samples_per_second": 11.123, "eval_steps_per_second": 0.352, "step": 32000 }, { "epoch": 1.77, "learning_rate": 1.7782802641955836e-05, "loss": 0.1437, "step": 36000 }, { "epoch": 1.77, "eval_bleu": 40.1745, "eval_gen_len": 38.5484, "eval_loss": 0.10177244991064072, "eval_runtime": 92.869, "eval_samples_per_second": 11.231, "eval_steps_per_second": 0.355, "step": 36000 }, { "epoch": 1.97, "learning_rate": 1.7536474763406942e-05, "loss": 0.1429, "step": 40000 }, { "epoch": 1.97, "eval_bleu": 42.5078, "eval_gen_len": 34.3826, "eval_loss": 0.10191857814788818, "eval_runtime": 85.4817, "eval_samples_per_second": 12.201, "eval_steps_per_second": 0.386, "step": 40000 }, { "epoch": 2.17, "learning_rate": 1.7290146884858045e-05, "loss": 0.1354, "step": 44000 }, { "epoch": 2.17, "eval_bleu": 42.0937, "eval_gen_len": 35.7114, "eval_loss": 0.10230503231287003, "eval_runtime": 96.9439, "eval_samples_per_second": 10.759, "eval_steps_per_second": 0.34, "step": 44000 }, { "epoch": 2.37, "learning_rate": 1.704381900630915e-05, "loss": 0.1345, "step": 48000 }, { "epoch": 2.37, "eval_bleu": 42.3842, "eval_gen_len": 35.3586, "eval_loss": 0.10215254127979279, "eval_runtime": 94.9854, "eval_samples_per_second": 10.981, "eval_steps_per_second": 0.347, "step": 48000 }, { "epoch": 2.56, "learning_rate": 1.6797491127760255e-05, "loss": 0.1335, "step": 52000 }, { "epoch": 2.56, "eval_bleu": 42.1993, "eval_gen_len": 34.0729, "eval_loss": 0.10184619575738907, "eval_runtime": 104.2027, "eval_samples_per_second": 10.009, "eval_steps_per_second": 0.317, "step": 52000 }, { "epoch": 2.76, "learning_rate": 1.6551101636435333e-05, "loss": 0.1338, "step": 56000 }, { "epoch": 2.76, "eval_bleu": 42.3348, "eval_gen_len": 35.5158, "eval_loss": 0.10167574882507324, "eval_runtime": 95.3873, "eval_samples_per_second": 10.934, "eval_steps_per_second": 0.346, "step": 56000 }, { "epoch": 2.96, "learning_rate": 1.6304773757886436e-05, "loss": 0.1341, "step": 60000 }, { "epoch": 2.96, "eval_bleu": 42.6183, "eval_gen_len": 34.5225, "eval_loss": 0.10100951045751572, "eval_runtime": 105.9409, "eval_samples_per_second": 9.845, "eval_steps_per_second": 0.311, "step": 60000 }, { "epoch": 3.15, "learning_rate": 1.6058445879337542e-05, "loss": 0.1278, "step": 64000 }, { "epoch": 3.15, "eval_bleu": 42.354, "eval_gen_len": 35.1323, "eval_loss": 0.10181604325771332, "eval_runtime": 90.7174, "eval_samples_per_second": 11.497, "eval_steps_per_second": 0.364, "step": 64000 }, { "epoch": 3.35, "learning_rate": 1.5812118000788645e-05, "loss": 0.1272, "step": 68000 }, { "epoch": 3.35, "eval_bleu": 41.9485, "eval_gen_len": 35.4727, "eval_loss": 0.10201819986104965, "eval_runtime": 99.9879, "eval_samples_per_second": 10.431, "eval_steps_per_second": 0.33, "step": 68000 }, { "epoch": 3.55, "learning_rate": 1.5565728509463723e-05, "loss": 0.1261, "step": 72000 }, { "epoch": 3.55, "eval_bleu": 42.3042, "eval_gen_len": 35.2704, "eval_loss": 0.10195478051900864, "eval_runtime": 70.0936, "eval_samples_per_second": 14.88, "eval_steps_per_second": 0.471, "step": 72000 }, { "epoch": 3.75, "learning_rate": 1.5319400630914826e-05, "loss": 0.1265, "step": 76000 }, { "epoch": 3.75, "eval_bleu": 39.7039, "eval_gen_len": 37.767, "eval_loss": 0.10168451070785522, "eval_runtime": 112.9245, "eval_samples_per_second": 9.236, "eval_steps_per_second": 0.292, "step": 76000 }, { "epoch": 3.94, "learning_rate": 1.5073011139589906e-05, "loss": 0.1273, "step": 80000 }, { "epoch": 3.94, "eval_bleu": 42.9732, "eval_gen_len": 34.5542, "eval_loss": 0.10154784470796585, "eval_runtime": 147.6247, "eval_samples_per_second": 7.065, "eval_steps_per_second": 0.224, "step": 80000 } ], "max_steps": 324608, "num_train_epochs": 16, "total_flos": 1.3796012702564352e+17, "trial_name": null, "trial_params": null }