| { | |
| "best_metric": 42.7412, | |
| "best_model_checkpoint": "opus_base_adapt_wce_gloss_unsampled_bands_7_ubweight_1.5/checkpoint-48000", | |
| "epoch": 2.365930599369085, | |
| "global_step": 48000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.9753918572555207e-05, | |
| "loss": 0.2075, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_bleu": 35.693, | |
| "eval_gen_len": 39.837, | |
| "eval_loss": 0.10906277596950531, | |
| "eval_runtime": 203.6881, | |
| "eval_samples_per_second": 5.121, | |
| "eval_steps_per_second": 0.162, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.950746746845426e-05, | |
| "loss": 0.1742, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_bleu": 37.6497, | |
| "eval_gen_len": 37.93, | |
| "eval_loss": 0.10657652467489243, | |
| "eval_runtime": 221.5331, | |
| "eval_samples_per_second": 4.708, | |
| "eval_steps_per_second": 0.149, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.9261016364353314e-05, | |
| "loss": 0.1672, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_bleu": 41.3565, | |
| "eval_gen_len": 34.6529, | |
| "eval_loss": 0.10534726828336716, | |
| "eval_runtime": 168.8583, | |
| "eval_samples_per_second": 6.177, | |
| "eval_steps_per_second": 0.195, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.9014565260252367e-05, | |
| "loss": 0.1617, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_bleu": 41.4881, | |
| "eval_gen_len": 35.4343, | |
| "eval_loss": 0.10427802801132202, | |
| "eval_runtime": 136.9034, | |
| "eval_samples_per_second": 7.619, | |
| "eval_steps_per_second": 0.241, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.876811415615142e-05, | |
| "loss": 0.1593, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_bleu": 41.6185, | |
| "eval_gen_len": 35.9434, | |
| "eval_loss": 0.10372872650623322, | |
| "eval_runtime": 158.7854, | |
| "eval_samples_per_second": 6.569, | |
| "eval_steps_per_second": 0.208, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.8521786277602524e-05, | |
| "loss": 0.1475, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_bleu": 41.9106, | |
| "eval_gen_len": 35.1831, | |
| "eval_loss": 0.10363597422838211, | |
| "eval_runtime": 127.398, | |
| "eval_samples_per_second": 8.187, | |
| "eval_steps_per_second": 0.259, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.8275520011829655e-05, | |
| "loss": 0.1473, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_bleu": 41.0151, | |
| "eval_gen_len": 36.2119, | |
| "eval_loss": 0.10283295065164566, | |
| "eval_runtime": 132.5317, | |
| "eval_samples_per_second": 7.87, | |
| "eval_steps_per_second": 0.249, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.8029130520504733e-05, | |
| "loss": 0.1469, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_bleu": 42.0417, | |
| "eval_gen_len": 34.9779, | |
| "eval_loss": 0.1022760346531868, | |
| "eval_runtime": 134.6277, | |
| "eval_samples_per_second": 7.747, | |
| "eval_steps_per_second": 0.245, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 1.7782741029179815e-05, | |
| "loss": 0.1455, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_bleu": 40.3879, | |
| "eval_gen_len": 37.6788, | |
| "eval_loss": 0.10188300907611847, | |
| "eval_runtime": 124.1502, | |
| "eval_samples_per_second": 8.401, | |
| "eval_steps_per_second": 0.266, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7536413150630914e-05, | |
| "loss": 0.1447, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_bleu": 41.9744, | |
| "eval_gen_len": 35.4314, | |
| "eval_loss": 0.10198679566383362, | |
| "eval_runtime": 133.2603, | |
| "eval_samples_per_second": 7.827, | |
| "eval_steps_per_second": 0.248, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.729008527208202e-05, | |
| "loss": 0.1371, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_bleu": 42.1364, | |
| "eval_gen_len": 34.9569, | |
| "eval_loss": 0.10233627259731293, | |
| "eval_runtime": 158.4624, | |
| "eval_samples_per_second": 6.582, | |
| "eval_steps_per_second": 0.208, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.70436957807571e-05, | |
| "loss": 0.1362, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_bleu": 42.7412, | |
| "eval_gen_len": 34.8322, | |
| "eval_loss": 0.10221327096223831, | |
| "eval_runtime": 107.2167, | |
| "eval_samples_per_second": 9.728, | |
| "eval_steps_per_second": 0.308, | |
| "step": 48000 | |
| } | |
| ], | |
| "max_steps": 324608, | |
| "num_train_epochs": 16, | |
| "total_flos": 8.279444701052928e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |