| { |
| "best_metric": 43.5535, |
| "best_model_checkpoint": "big_wce_weights_1.25_train_0.75/checkpoint-80000", |
| "epoch": 1.9716574245224892, |
| "global_step": 80000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9753635243376465e-05, |
| "loss": 0.1866, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_bleu": 42.0127, |
| "eval_gen_len": 33.6165, |
| "eval_loss": 0.09847372770309448, |
| "eval_runtime": 164.5065, |
| "eval_samples_per_second": 6.34, |
| "eval_steps_per_second": 0.401, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.950723967960567e-05, |
| "loss": 0.1554, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_bleu": 42.646, |
| "eval_gen_len": 34.6596, |
| "eval_loss": 0.09764392673969269, |
| "eval_runtime": 134.5362, |
| "eval_samples_per_second": 7.753, |
| "eval_steps_per_second": 0.491, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.9260844115834877e-05, |
| "loss": 0.1511, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_bleu": 42.5994, |
| "eval_gen_len": 34.0719, |
| "eval_loss": 0.09760789573192596, |
| "eval_runtime": 181.8789, |
| "eval_samples_per_second": 5.735, |
| "eval_steps_per_second": 0.363, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.90145101663586e-05, |
| "loss": 0.147, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_bleu": 42.7909, |
| "eval_gen_len": 33.908, |
| "eval_loss": 0.09649083018302917, |
| "eval_runtime": 134.5535, |
| "eval_samples_per_second": 7.752, |
| "eval_steps_per_second": 0.491, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.876814540973506e-05, |
| "loss": 0.1455, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 42.558, |
| "eval_gen_len": 34.4334, |
| "eval_loss": 0.09574901312589645, |
| "eval_runtime": 126.7021, |
| "eval_samples_per_second": 8.232, |
| "eval_steps_per_second": 0.521, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.8521749845964266e-05, |
| "loss": 0.1297, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_bleu": 42.9867, |
| "eval_gen_len": 34.3883, |
| "eval_loss": 0.09689216315746307, |
| "eval_runtime": 112.9116, |
| "eval_samples_per_second": 9.237, |
| "eval_steps_per_second": 0.585, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.827538508934073e-05, |
| "loss": 0.13, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_bleu": 42.8217, |
| "eval_gen_len": 33.4756, |
| "eval_loss": 0.09708236902952194, |
| "eval_runtime": 102.1103, |
| "eval_samples_per_second": 10.214, |
| "eval_steps_per_second": 0.646, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.802905113986445e-05, |
| "loss": 0.1306, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_bleu": 42.8216, |
| "eval_gen_len": 33.9636, |
| "eval_loss": 0.09652863442897797, |
| "eval_runtime": 110.6087, |
| "eval_samples_per_second": 9.43, |
| "eval_steps_per_second": 0.597, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.778268638324091e-05, |
| "loss": 0.1301, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_bleu": 42.4795, |
| "eval_gen_len": 35.419, |
| "eval_loss": 0.09661434590816498, |
| "eval_runtime": 132.5432, |
| "eval_samples_per_second": 7.869, |
| "eval_steps_per_second": 0.498, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7536321626617377e-05, |
| "loss": 0.1298, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_bleu": 43.5535, |
| "eval_gen_len": 33.8552, |
| "eval_loss": 0.09589231759309769, |
| "eval_runtime": 104.1898, |
| "eval_samples_per_second": 10.011, |
| "eval_steps_per_second": 0.633, |
| "step": 80000 |
| } |
| ], |
| "max_steps": 649200, |
| "num_train_epochs": 16, |
| "total_flos": 2.5844916958632346e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|