| { |
| "best_metric": 43.0883, |
| "best_model_checkpoint": "opus_big_ailem_adaptified/checkpoint-64000", |
| "epoch": 1.5773259396179915, |
| "global_step": 64000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9753635243376465e-05, |
| "loss": 0.1745, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_bleu": 42.0213, |
| "eval_gen_len": 33.5187, |
| "eval_loss": 0.0983896404504776, |
| "eval_runtime": 137.809, |
| "eval_samples_per_second": 7.568, |
| "eval_steps_per_second": 0.479, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.9507270486752928e-05, |
| "loss": 0.144, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_bleu": 42.5956, |
| "eval_gen_len": 34.3337, |
| "eval_loss": 0.09711522608995438, |
| "eval_runtime": 202.6277, |
| "eval_samples_per_second": 5.147, |
| "eval_steps_per_second": 0.326, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.9260874922982132e-05, |
| "loss": 0.14, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_bleu": 42.4234, |
| "eval_gen_len": 33.6222, |
| "eval_loss": 0.09710835665464401, |
| "eval_runtime": 146.6065, |
| "eval_samples_per_second": 7.114, |
| "eval_steps_per_second": 0.45, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.90145101663586e-05, |
| "loss": 0.1363, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_bleu": 43.0677, |
| "eval_gen_len": 33.6654, |
| "eval_loss": 0.0967707633972168, |
| "eval_runtime": 133.705, |
| "eval_samples_per_second": 7.801, |
| "eval_steps_per_second": 0.494, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.876814540973506e-05, |
| "loss": 0.1349, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 42.7333, |
| "eval_gen_len": 34.8533, |
| "eval_loss": 0.09592821449041367, |
| "eval_runtime": 131.6304, |
| "eval_samples_per_second": 7.924, |
| "eval_steps_per_second": 0.501, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.8521749845964266e-05, |
| "loss": 0.1201, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_bleu": 42.8479, |
| "eval_gen_len": 34.2771, |
| "eval_loss": 0.09712815284729004, |
| "eval_runtime": 129.1233, |
| "eval_samples_per_second": 8.078, |
| "eval_steps_per_second": 0.511, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.827535428219347e-05, |
| "loss": 0.1205, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_bleu": 42.4623, |
| "eval_gen_len": 34.1122, |
| "eval_loss": 0.09739168733358383, |
| "eval_runtime": 183.6808, |
| "eval_samples_per_second": 5.678, |
| "eval_steps_per_second": 0.359, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.8029020332717192e-05, |
| "loss": 0.1211, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_bleu": 43.0883, |
| "eval_gen_len": 34.9319, |
| "eval_loss": 0.09650042653083801, |
| "eval_runtime": 173.7588, |
| "eval_samples_per_second": 6.003, |
| "eval_steps_per_second": 0.38, |
| "step": 64000 |
| } |
| ], |
| "max_steps": 649200, |
| "num_train_epochs": 16, |
| "total_flos": 2.0676742327723622e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|