| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 41.666666666666664, | |
| "eval_steps": 500, | |
| "global_step": 1500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 31.6092, | |
| "eval_gen_len": 18.7891, | |
| "eval_loss": 0.4555904269218445, | |
| "eval_meteor": 0.5392, | |
| "eval_runtime": 9.3254, | |
| "eval_samples_per_second": 15.763, | |
| "eval_steps_per_second": 0.536, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 39.2195, | |
| "eval_gen_len": 18.9592, | |
| "eval_loss": 0.2636318802833557, | |
| "eval_meteor": 0.596, | |
| "eval_runtime": 4.5819, | |
| "eval_samples_per_second": 32.083, | |
| "eval_steps_per_second": 1.091, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu": 41.6894, | |
| "eval_gen_len": 18.9456, | |
| "eval_loss": 0.19347849488258362, | |
| "eval_meteor": 0.6148, | |
| "eval_runtime": 4.5897, | |
| "eval_samples_per_second": 32.028, | |
| "eval_steps_per_second": 1.089, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu": 42.741, | |
| "eval_gen_len": 18.9524, | |
| "eval_loss": 0.14780716598033905, | |
| "eval_meteor": 0.629, | |
| "eval_runtime": 4.5508, | |
| "eval_samples_per_second": 32.302, | |
| "eval_steps_per_second": 1.099, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bleu": 44.9412, | |
| "eval_gen_len": 18.9592, | |
| "eval_loss": 0.10975220054388046, | |
| "eval_meteor": 0.6429, | |
| "eval_runtime": 4.5865, | |
| "eval_samples_per_second": 32.05, | |
| "eval_steps_per_second": 1.09, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bleu": 44.7737, | |
| "eval_gen_len": 18.9592, | |
| "eval_loss": 0.0995095744729042, | |
| "eval_meteor": 0.6438, | |
| "eval_runtime": 4.5237, | |
| "eval_samples_per_second": 32.495, | |
| "eval_steps_per_second": 1.105, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bleu": 46.3121, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.07827065885066986, | |
| "eval_meteor": 0.6584, | |
| "eval_runtime": 4.5425, | |
| "eval_samples_per_second": 32.361, | |
| "eval_steps_per_second": 1.101, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu": 46.0999, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.06610731780529022, | |
| "eval_meteor": 0.6517, | |
| "eval_runtime": 4.517, | |
| "eval_samples_per_second": 32.544, | |
| "eval_steps_per_second": 1.107, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bleu": 46.8293, | |
| "eval_gen_len": 18.9592, | |
| "eval_loss": 0.05300338938832283, | |
| "eval_meteor": 0.6635, | |
| "eval_runtime": 4.5646, | |
| "eval_samples_per_second": 32.204, | |
| "eval_steps_per_second": 1.095, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bleu": 46.8475, | |
| "eval_gen_len": 18.9592, | |
| "eval_loss": 0.04656795412302017, | |
| "eval_meteor": 0.6619, | |
| "eval_runtime": 4.5539, | |
| "eval_samples_per_second": 32.28, | |
| "eval_steps_per_second": 1.098, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_bleu": 47.1376, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.038304273039102554, | |
| "eval_meteor": 0.6641, | |
| "eval_runtime": 4.5164, | |
| "eval_samples_per_second": 32.548, | |
| "eval_steps_per_second": 1.107, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_bleu": 47.323, | |
| "eval_gen_len": 18.9796, | |
| "eval_loss": 0.03775802627205849, | |
| "eval_meteor": 0.6633, | |
| "eval_runtime": 4.5165, | |
| "eval_samples_per_second": 32.547, | |
| "eval_steps_per_second": 1.107, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_bleu": 47.3289, | |
| "eval_gen_len": 18.9524, | |
| "eval_loss": 0.03001909889280796, | |
| "eval_meteor": 0.666, | |
| "eval_runtime": 4.5251, | |
| "eval_samples_per_second": 32.486, | |
| "eval_steps_per_second": 1.105, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 13.89, | |
| "learning_rate": 1.4444444444444446e-05, | |
| "loss": 0.3225, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_bleu": 47.5584, | |
| "eval_gen_len": 18.9796, | |
| "eval_loss": 0.025304924696683884, | |
| "eval_meteor": 0.6656, | |
| "eval_runtime": 4.5255, | |
| "eval_samples_per_second": 32.483, | |
| "eval_steps_per_second": 1.105, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_bleu": 47.811, | |
| "eval_gen_len": 18.9796, | |
| "eval_loss": 0.020229607820510864, | |
| "eval_meteor": 0.667, | |
| "eval_runtime": 4.5539, | |
| "eval_samples_per_second": 32.28, | |
| "eval_steps_per_second": 1.098, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_bleu": 47.4249, | |
| "eval_gen_len": 18.9524, | |
| "eval_loss": 0.01784553937613964, | |
| "eval_meteor": 0.6666, | |
| "eval_runtime": 4.5361, | |
| "eval_samples_per_second": 32.407, | |
| "eval_steps_per_second": 1.102, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_bleu": 47.8092, | |
| "eval_gen_len": 18.9796, | |
| "eval_loss": 0.014756113290786743, | |
| "eval_meteor": 0.6668, | |
| "eval_runtime": 4.546, | |
| "eval_samples_per_second": 32.336, | |
| "eval_steps_per_second": 1.1, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_bleu": 47.9094, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.012134283781051636, | |
| "eval_meteor": 0.6684, | |
| "eval_runtime": 4.5836, | |
| "eval_samples_per_second": 32.071, | |
| "eval_steps_per_second": 1.091, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_bleu": 47.9966, | |
| "eval_gen_len": 18.9796, | |
| "eval_loss": 0.011289956048130989, | |
| "eval_meteor": 0.6676, | |
| "eval_runtime": 4.5433, | |
| "eval_samples_per_second": 32.355, | |
| "eval_steps_per_second": 1.101, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_bleu": 47.7647, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.009249957278370857, | |
| "eval_meteor": 0.6692, | |
| "eval_runtime": 4.5767, | |
| "eval_samples_per_second": 32.119, | |
| "eval_steps_per_second": 1.092, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_bleu": 47.7044, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.010435191914439201, | |
| "eval_meteor": 0.6681, | |
| "eval_runtime": 4.5541, | |
| "eval_samples_per_second": 32.279, | |
| "eval_steps_per_second": 1.098, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_bleu": 47.8202, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0071021514013409615, | |
| "eval_meteor": 0.6691, | |
| "eval_runtime": 4.5533, | |
| "eval_samples_per_second": 32.284, | |
| "eval_steps_per_second": 1.098, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_bleu": 47.7776, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.006448396481573582, | |
| "eval_meteor": 0.6691, | |
| "eval_runtime": 4.5473, | |
| "eval_samples_per_second": 32.327, | |
| "eval_steps_per_second": 1.1, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_bleu": 47.7776, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.005185275804251432, | |
| "eval_meteor": 0.6691, | |
| "eval_runtime": 4.5157, | |
| "eval_samples_per_second": 32.553, | |
| "eval_steps_per_second": 1.107, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_bleu": 47.8964, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.00483354926109314, | |
| "eval_meteor": 0.669, | |
| "eval_runtime": 4.5248, | |
| "eval_samples_per_second": 32.488, | |
| "eval_steps_per_second": 1.105, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_bleu": 47.8964, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.007188287563621998, | |
| "eval_meteor": 0.6689, | |
| "eval_runtime": 4.5497, | |
| "eval_samples_per_second": 32.309, | |
| "eval_steps_per_second": 1.099, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_bleu": 47.7776, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0029953974299132824, | |
| "eval_meteor": 0.6691, | |
| "eval_runtime": 4.5837, | |
| "eval_samples_per_second": 32.07, | |
| "eval_steps_per_second": 1.091, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 27.78, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.0406, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_bleu": 47.7776, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.003659368259832263, | |
| "eval_meteor": 0.6691, | |
| "eval_runtime": 4.5366, | |
| "eval_samples_per_second": 32.403, | |
| "eval_steps_per_second": 1.102, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_bleu": 47.8202, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.002260121051222086, | |
| "eval_meteor": 0.6691, | |
| "eval_runtime": 4.5255, | |
| "eval_samples_per_second": 32.483, | |
| "eval_steps_per_second": 1.105, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_bleu": 47.8768, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.004143106751143932, | |
| "eval_meteor": 0.6693, | |
| "eval_runtime": 4.5546, | |
| "eval_samples_per_second": 32.275, | |
| "eval_steps_per_second": 1.098, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_bleu": 47.8964, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.003928070422261953, | |
| "eval_meteor": 0.669, | |
| "eval_runtime": 4.5798, | |
| "eval_samples_per_second": 32.097, | |
| "eval_steps_per_second": 1.092, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_bleu": 47.8964, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0019372537499293685, | |
| "eval_meteor": 0.669, | |
| "eval_runtime": 4.5224, | |
| "eval_samples_per_second": 32.505, | |
| "eval_steps_per_second": 1.106, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_bleu": 47.8964, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0019416833529248834, | |
| "eval_meteor": 0.669, | |
| "eval_runtime": 4.5629, | |
| "eval_samples_per_second": 32.216, | |
| "eval_steps_per_second": 1.096, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_bleu": 47.82, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0017969176406040788, | |
| "eval_meteor": 0.669, | |
| "eval_runtime": 4.5036, | |
| "eval_samples_per_second": 32.64, | |
| "eval_steps_per_second": 1.11, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_bleu": 48.0042, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0013237865641713142, | |
| "eval_meteor": 0.6701, | |
| "eval_runtime": 4.545, | |
| "eval_samples_per_second": 32.343, | |
| "eval_steps_per_second": 1.1, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_bleu": 47.8964, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.001009355066344142, | |
| "eval_meteor": 0.669, | |
| "eval_runtime": 4.5054, | |
| "eval_samples_per_second": 32.628, | |
| "eval_steps_per_second": 1.11, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_bleu": 47.8964, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0010085658868774772, | |
| "eval_meteor": 0.669, | |
| "eval_runtime": 4.512, | |
| "eval_samples_per_second": 32.58, | |
| "eval_steps_per_second": 1.108, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_bleu": 47.9279, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0019252120982855558, | |
| "eval_meteor": 0.67, | |
| "eval_runtime": 4.5663, | |
| "eval_samples_per_second": 32.192, | |
| "eval_steps_per_second": 1.095, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_bleu": 48.0042, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0006166099337860942, | |
| "eval_meteor": 0.6701, | |
| "eval_runtime": 4.5135, | |
| "eval_samples_per_second": 32.569, | |
| "eval_steps_per_second": 1.108, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_bleu": 48.0042, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0006395149976015091, | |
| "eval_meteor": 0.6701, | |
| "eval_runtime": 4.5938, | |
| "eval_samples_per_second": 31.999, | |
| "eval_steps_per_second": 1.088, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_bleu": 48.0042, | |
| "eval_gen_len": 18.966, | |
| "eval_loss": 0.0003768605238292366, | |
| "eval_meteor": 0.6701, | |
| "eval_runtime": 4.5485, | |
| "eval_samples_per_second": 32.318, | |
| "eval_steps_per_second": 1.099, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 41.67, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.0166, | |
| "step": 1500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 1800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 5983929469071360.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |