{ "best_metric": 43.1111, "best_model_checkpoint": "opus_base_lsp_adapt_wce_bands_5_ubweight_2.0/checkpoint-80000", "epoch": 3.943217665615142, "global_step": 80000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753918572555207e-05, "loss": 0.2142, "step": 4000 }, { "epoch": 0.2, "eval_bleu": 32.7905, "eval_gen_len": 43.1007, "eval_loss": 0.10939062386751175, "eval_runtime": 145.1214, "eval_samples_per_second": 7.187, "eval_steps_per_second": 0.227, "step": 4000 }, { "epoch": 0.39, "learning_rate": 1.950746746845426e-05, "loss": 0.18, "step": 8000 }, { "epoch": 0.39, "eval_bleu": 36.4262, "eval_gen_len": 39.3643, "eval_loss": 0.10692563652992249, "eval_runtime": 115.1385, "eval_samples_per_second": 9.059, "eval_steps_per_second": 0.287, "step": 8000 }, { "epoch": 0.59, "learning_rate": 1.9261016364353314e-05, "loss": 0.1726, "step": 12000 }, { "epoch": 0.59, "eval_bleu": 41.077, "eval_gen_len": 35.0671, "eval_loss": 0.10563468933105469, "eval_runtime": 127.7336, "eval_samples_per_second": 8.165, "eval_steps_per_second": 0.258, "step": 12000 }, { "epoch": 0.79, "learning_rate": 1.9014565260252367e-05, "loss": 0.1669, "step": 16000 }, { "epoch": 0.79, "eval_bleu": 41.2349, "eval_gen_len": 35.2464, "eval_loss": 0.10455626994371414, "eval_runtime": 88.8086, "eval_samples_per_second": 11.744, "eval_steps_per_second": 0.372, "step": 16000 }, { "epoch": 0.99, "learning_rate": 1.876811415615142e-05, "loss": 0.1644, "step": 20000 }, { "epoch": 0.99, "eval_bleu": 41.641, "eval_gen_len": 35.3337, "eval_loss": 0.10399404913187027, "eval_runtime": 137.4077, "eval_samples_per_second": 7.591, "eval_steps_per_second": 0.24, "step": 20000 }, { "epoch": 1.18, "learning_rate": 1.8521786277602524e-05, "loss": 0.1522, "step": 24000 }, { "epoch": 1.18, "eval_bleu": 41.4007, "eval_gen_len": 35.7028, "eval_loss": 0.10385829210281372, "eval_runtime": 141.3172, "eval_samples_per_second": 7.381, "eval_steps_per_second": 0.234, "step": 24000 }, { "epoch": 1.38, "learning_rate": 1.8275520011829655e-05, "loss": 0.152, "step": 28000 }, { "epoch": 1.38, "eval_bleu": 41.7231, "eval_gen_len": 34.8265, "eval_loss": 0.10312652587890625, "eval_runtime": 99.4665, "eval_samples_per_second": 10.486, "eval_steps_per_second": 0.332, "step": 28000 }, { "epoch": 1.58, "learning_rate": 1.8029130520504733e-05, "loss": 0.1515, "step": 32000 }, { "epoch": 1.58, "eval_bleu": 42.1288, "eval_gen_len": 35.4545, "eval_loss": 0.10245882719755173, "eval_runtime": 82.6, "eval_samples_per_second": 12.627, "eval_steps_per_second": 0.4, "step": 32000 }, { "epoch": 1.77, "learning_rate": 1.7782741029179815e-05, "loss": 0.1499, "step": 36000 }, { "epoch": 1.77, "eval_bleu": 39.4245, "eval_gen_len": 38.9329, "eval_loss": 0.10212080180644989, "eval_runtime": 114.4105, "eval_samples_per_second": 9.116, "eval_steps_per_second": 0.288, "step": 36000 }, { "epoch": 1.97, "learning_rate": 1.7536413150630914e-05, "loss": 0.1491, "step": 40000 }, { "epoch": 1.97, "eval_bleu": 42.32, "eval_gen_len": 34.3471, "eval_loss": 0.10226055979728699, "eval_runtime": 99.1685, "eval_samples_per_second": 10.517, "eval_steps_per_second": 0.333, "step": 40000 }, { "epoch": 2.17, "learning_rate": 1.729008527208202e-05, "loss": 0.1413, "step": 44000 }, { "epoch": 2.17, "eval_bleu": 40.4318, "eval_gen_len": 36.4919, "eval_loss": 0.1026298999786377, "eval_runtime": 116.9488, "eval_samples_per_second": 8.918, "eval_steps_per_second": 0.282, "step": 44000 }, { "epoch": 2.37, "learning_rate": 1.70436957807571e-05, "loss": 0.1404, "step": 48000 }, { "epoch": 2.37, "eval_bleu": 42.4697, "eval_gen_len": 35.14, "eval_loss": 0.10236479341983795, "eval_runtime": 95.0688, "eval_samples_per_second": 10.971, "eval_steps_per_second": 0.347, "step": 48000 }, { "epoch": 2.56, "learning_rate": 1.679742951498423e-05, "loss": 0.1392, "step": 52000 }, { "epoch": 2.56, "eval_bleu": 42.1067, "eval_gen_len": 34.4813, "eval_loss": 0.10209206491708755, "eval_runtime": 107.2908, "eval_samples_per_second": 9.721, "eval_steps_per_second": 0.308, "step": 52000 }, { "epoch": 2.76, "learning_rate": 1.6551040023659308e-05, "loss": 0.1396, "step": 56000 }, { "epoch": 2.76, "eval_bleu": 41.9687, "eval_gen_len": 35.069, "eval_loss": 0.10199315845966339, "eval_runtime": 123.666, "eval_samples_per_second": 8.434, "eval_steps_per_second": 0.267, "step": 56000 }, { "epoch": 2.96, "learning_rate": 1.6304650532334386e-05, "loss": 0.1398, "step": 60000 }, { "epoch": 2.96, "eval_bleu": 42.5934, "eval_gen_len": 34.4506, "eval_loss": 0.10132639110088348, "eval_runtime": 87.6332, "eval_samples_per_second": 11.902, "eval_steps_per_second": 0.377, "step": 60000 }, { "epoch": 3.15, "learning_rate": 1.605832265378549e-05, "loss": 0.1332, "step": 64000 }, { "epoch": 3.15, "eval_bleu": 42.1707, "eval_gen_len": 34.278, "eval_loss": 0.10205266624689102, "eval_runtime": 75.2908, "eval_samples_per_second": 13.853, "eval_steps_per_second": 0.438, "step": 64000 }, { "epoch": 3.35, "learning_rate": 1.5811994775236596e-05, "loss": 0.1327, "step": 68000 }, { "epoch": 3.35, "eval_bleu": 42.4582, "eval_gen_len": 34.0326, "eval_loss": 0.10221361368894577, "eval_runtime": 85.03, "eval_samples_per_second": 12.266, "eval_steps_per_second": 0.388, "step": 68000 }, { "epoch": 3.55, "learning_rate": 1.55656668966877e-05, "loss": 0.1314, "step": 72000 }, { "epoch": 3.55, "eval_bleu": 42.0756, "eval_gen_len": 34.6932, "eval_loss": 0.10215744376182556, "eval_runtime": 85.9762, "eval_samples_per_second": 12.131, "eval_steps_per_second": 0.384, "step": 72000 }, { "epoch": 3.75, "learning_rate": 1.5319339018138805e-05, "loss": 0.1319, "step": 76000 }, { "epoch": 3.75, "eval_bleu": 42.4042, "eval_gen_len": 34.7958, "eval_loss": 0.10203341394662857, "eval_runtime": 88.8632, "eval_samples_per_second": 11.737, "eval_steps_per_second": 0.371, "step": 76000 }, { "epoch": 3.94, "learning_rate": 1.5073011139589906e-05, "loss": 0.1327, "step": 80000 }, { "epoch": 3.94, "eval_bleu": 43.1111, "eval_gen_len": 34.6481, "eval_loss": 0.10178661346435547, "eval_runtime": 99.1474, "eval_samples_per_second": 10.52, "eval_steps_per_second": 0.333, "step": 80000 } ], "max_steps": 324608, "num_train_epochs": 16, "total_flos": 1.3796012702564352e+17, "trial_name": null, "trial_params": null }