viT5-comparative-only / trainer_state.json
duyvu8373's picture
Upload 12 files
792435f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 41.666666666666664,
"eval_steps": 500,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 31.6092,
"eval_gen_len": 18.7891,
"eval_loss": 0.4555904269218445,
"eval_meteor": 0.5392,
"eval_runtime": 9.3254,
"eval_samples_per_second": 15.763,
"eval_steps_per_second": 0.536,
"step": 36
},
{
"epoch": 2.0,
"eval_bleu": 39.2195,
"eval_gen_len": 18.9592,
"eval_loss": 0.2636318802833557,
"eval_meteor": 0.596,
"eval_runtime": 4.5819,
"eval_samples_per_second": 32.083,
"eval_steps_per_second": 1.091,
"step": 72
},
{
"epoch": 3.0,
"eval_bleu": 41.6894,
"eval_gen_len": 18.9456,
"eval_loss": 0.19347849488258362,
"eval_meteor": 0.6148,
"eval_runtime": 4.5897,
"eval_samples_per_second": 32.028,
"eval_steps_per_second": 1.089,
"step": 108
},
{
"epoch": 4.0,
"eval_bleu": 42.741,
"eval_gen_len": 18.9524,
"eval_loss": 0.14780716598033905,
"eval_meteor": 0.629,
"eval_runtime": 4.5508,
"eval_samples_per_second": 32.302,
"eval_steps_per_second": 1.099,
"step": 144
},
{
"epoch": 5.0,
"eval_bleu": 44.9412,
"eval_gen_len": 18.9592,
"eval_loss": 0.10975220054388046,
"eval_meteor": 0.6429,
"eval_runtime": 4.5865,
"eval_samples_per_second": 32.05,
"eval_steps_per_second": 1.09,
"step": 180
},
{
"epoch": 6.0,
"eval_bleu": 44.7737,
"eval_gen_len": 18.9592,
"eval_loss": 0.0995095744729042,
"eval_meteor": 0.6438,
"eval_runtime": 4.5237,
"eval_samples_per_second": 32.495,
"eval_steps_per_second": 1.105,
"step": 216
},
{
"epoch": 7.0,
"eval_bleu": 46.3121,
"eval_gen_len": 18.966,
"eval_loss": 0.07827065885066986,
"eval_meteor": 0.6584,
"eval_runtime": 4.5425,
"eval_samples_per_second": 32.361,
"eval_steps_per_second": 1.101,
"step": 252
},
{
"epoch": 8.0,
"eval_bleu": 46.0999,
"eval_gen_len": 18.966,
"eval_loss": 0.06610731780529022,
"eval_meteor": 0.6517,
"eval_runtime": 4.517,
"eval_samples_per_second": 32.544,
"eval_steps_per_second": 1.107,
"step": 288
},
{
"epoch": 9.0,
"eval_bleu": 46.8293,
"eval_gen_len": 18.9592,
"eval_loss": 0.05300338938832283,
"eval_meteor": 0.6635,
"eval_runtime": 4.5646,
"eval_samples_per_second": 32.204,
"eval_steps_per_second": 1.095,
"step": 324
},
{
"epoch": 10.0,
"eval_bleu": 46.8475,
"eval_gen_len": 18.9592,
"eval_loss": 0.04656795412302017,
"eval_meteor": 0.6619,
"eval_runtime": 4.5539,
"eval_samples_per_second": 32.28,
"eval_steps_per_second": 1.098,
"step": 360
},
{
"epoch": 11.0,
"eval_bleu": 47.1376,
"eval_gen_len": 18.966,
"eval_loss": 0.038304273039102554,
"eval_meteor": 0.6641,
"eval_runtime": 4.5164,
"eval_samples_per_second": 32.548,
"eval_steps_per_second": 1.107,
"step": 396
},
{
"epoch": 12.0,
"eval_bleu": 47.323,
"eval_gen_len": 18.9796,
"eval_loss": 0.03775802627205849,
"eval_meteor": 0.6633,
"eval_runtime": 4.5165,
"eval_samples_per_second": 32.547,
"eval_steps_per_second": 1.107,
"step": 432
},
{
"epoch": 13.0,
"eval_bleu": 47.3289,
"eval_gen_len": 18.9524,
"eval_loss": 0.03001909889280796,
"eval_meteor": 0.666,
"eval_runtime": 4.5251,
"eval_samples_per_second": 32.486,
"eval_steps_per_second": 1.105,
"step": 468
},
{
"epoch": 13.89,
"learning_rate": 1.4444444444444446e-05,
"loss": 0.3225,
"step": 500
},
{
"epoch": 14.0,
"eval_bleu": 47.5584,
"eval_gen_len": 18.9796,
"eval_loss": 0.025304924696683884,
"eval_meteor": 0.6656,
"eval_runtime": 4.5255,
"eval_samples_per_second": 32.483,
"eval_steps_per_second": 1.105,
"step": 504
},
{
"epoch": 15.0,
"eval_bleu": 47.811,
"eval_gen_len": 18.9796,
"eval_loss": 0.020229607820510864,
"eval_meteor": 0.667,
"eval_runtime": 4.5539,
"eval_samples_per_second": 32.28,
"eval_steps_per_second": 1.098,
"step": 540
},
{
"epoch": 16.0,
"eval_bleu": 47.4249,
"eval_gen_len": 18.9524,
"eval_loss": 0.01784553937613964,
"eval_meteor": 0.6666,
"eval_runtime": 4.5361,
"eval_samples_per_second": 32.407,
"eval_steps_per_second": 1.102,
"step": 576
},
{
"epoch": 17.0,
"eval_bleu": 47.8092,
"eval_gen_len": 18.9796,
"eval_loss": 0.014756113290786743,
"eval_meteor": 0.6668,
"eval_runtime": 4.546,
"eval_samples_per_second": 32.336,
"eval_steps_per_second": 1.1,
"step": 612
},
{
"epoch": 18.0,
"eval_bleu": 47.9094,
"eval_gen_len": 18.966,
"eval_loss": 0.012134283781051636,
"eval_meteor": 0.6684,
"eval_runtime": 4.5836,
"eval_samples_per_second": 32.071,
"eval_steps_per_second": 1.091,
"step": 648
},
{
"epoch": 19.0,
"eval_bleu": 47.9966,
"eval_gen_len": 18.9796,
"eval_loss": 0.011289956048130989,
"eval_meteor": 0.6676,
"eval_runtime": 4.5433,
"eval_samples_per_second": 32.355,
"eval_steps_per_second": 1.101,
"step": 684
},
{
"epoch": 20.0,
"eval_bleu": 47.7647,
"eval_gen_len": 18.966,
"eval_loss": 0.009249957278370857,
"eval_meteor": 0.6692,
"eval_runtime": 4.5767,
"eval_samples_per_second": 32.119,
"eval_steps_per_second": 1.092,
"step": 720
},
{
"epoch": 21.0,
"eval_bleu": 47.7044,
"eval_gen_len": 18.966,
"eval_loss": 0.010435191914439201,
"eval_meteor": 0.6681,
"eval_runtime": 4.5541,
"eval_samples_per_second": 32.279,
"eval_steps_per_second": 1.098,
"step": 756
},
{
"epoch": 22.0,
"eval_bleu": 47.8202,
"eval_gen_len": 18.966,
"eval_loss": 0.0071021514013409615,
"eval_meteor": 0.6691,
"eval_runtime": 4.5533,
"eval_samples_per_second": 32.284,
"eval_steps_per_second": 1.098,
"step": 792
},
{
"epoch": 23.0,
"eval_bleu": 47.7776,
"eval_gen_len": 18.966,
"eval_loss": 0.006448396481573582,
"eval_meteor": 0.6691,
"eval_runtime": 4.5473,
"eval_samples_per_second": 32.327,
"eval_steps_per_second": 1.1,
"step": 828
},
{
"epoch": 24.0,
"eval_bleu": 47.7776,
"eval_gen_len": 18.966,
"eval_loss": 0.005185275804251432,
"eval_meteor": 0.6691,
"eval_runtime": 4.5157,
"eval_samples_per_second": 32.553,
"eval_steps_per_second": 1.107,
"step": 864
},
{
"epoch": 25.0,
"eval_bleu": 47.8964,
"eval_gen_len": 18.966,
"eval_loss": 0.00483354926109314,
"eval_meteor": 0.669,
"eval_runtime": 4.5248,
"eval_samples_per_second": 32.488,
"eval_steps_per_second": 1.105,
"step": 900
},
{
"epoch": 26.0,
"eval_bleu": 47.8964,
"eval_gen_len": 18.966,
"eval_loss": 0.007188287563621998,
"eval_meteor": 0.6689,
"eval_runtime": 4.5497,
"eval_samples_per_second": 32.309,
"eval_steps_per_second": 1.099,
"step": 936
},
{
"epoch": 27.0,
"eval_bleu": 47.7776,
"eval_gen_len": 18.966,
"eval_loss": 0.0029953974299132824,
"eval_meteor": 0.6691,
"eval_runtime": 4.5837,
"eval_samples_per_second": 32.07,
"eval_steps_per_second": 1.091,
"step": 972
},
{
"epoch": 27.78,
"learning_rate": 8.888888888888888e-06,
"loss": 0.0406,
"step": 1000
},
{
"epoch": 28.0,
"eval_bleu": 47.7776,
"eval_gen_len": 18.966,
"eval_loss": 0.003659368259832263,
"eval_meteor": 0.6691,
"eval_runtime": 4.5366,
"eval_samples_per_second": 32.403,
"eval_steps_per_second": 1.102,
"step": 1008
},
{
"epoch": 29.0,
"eval_bleu": 47.8202,
"eval_gen_len": 18.966,
"eval_loss": 0.002260121051222086,
"eval_meteor": 0.6691,
"eval_runtime": 4.5255,
"eval_samples_per_second": 32.483,
"eval_steps_per_second": 1.105,
"step": 1044
},
{
"epoch": 30.0,
"eval_bleu": 47.8768,
"eval_gen_len": 18.966,
"eval_loss": 0.004143106751143932,
"eval_meteor": 0.6693,
"eval_runtime": 4.5546,
"eval_samples_per_second": 32.275,
"eval_steps_per_second": 1.098,
"step": 1080
},
{
"epoch": 31.0,
"eval_bleu": 47.8964,
"eval_gen_len": 18.966,
"eval_loss": 0.003928070422261953,
"eval_meteor": 0.669,
"eval_runtime": 4.5798,
"eval_samples_per_second": 32.097,
"eval_steps_per_second": 1.092,
"step": 1116
},
{
"epoch": 32.0,
"eval_bleu": 47.8964,
"eval_gen_len": 18.966,
"eval_loss": 0.0019372537499293685,
"eval_meteor": 0.669,
"eval_runtime": 4.5224,
"eval_samples_per_second": 32.505,
"eval_steps_per_second": 1.106,
"step": 1152
},
{
"epoch": 33.0,
"eval_bleu": 47.8964,
"eval_gen_len": 18.966,
"eval_loss": 0.0019416833529248834,
"eval_meteor": 0.669,
"eval_runtime": 4.5629,
"eval_samples_per_second": 32.216,
"eval_steps_per_second": 1.096,
"step": 1188
},
{
"epoch": 34.0,
"eval_bleu": 47.82,
"eval_gen_len": 18.966,
"eval_loss": 0.0017969176406040788,
"eval_meteor": 0.669,
"eval_runtime": 4.5036,
"eval_samples_per_second": 32.64,
"eval_steps_per_second": 1.11,
"step": 1224
},
{
"epoch": 35.0,
"eval_bleu": 48.0042,
"eval_gen_len": 18.966,
"eval_loss": 0.0013237865641713142,
"eval_meteor": 0.6701,
"eval_runtime": 4.545,
"eval_samples_per_second": 32.343,
"eval_steps_per_second": 1.1,
"step": 1260
},
{
"epoch": 36.0,
"eval_bleu": 47.8964,
"eval_gen_len": 18.966,
"eval_loss": 0.001009355066344142,
"eval_meteor": 0.669,
"eval_runtime": 4.5054,
"eval_samples_per_second": 32.628,
"eval_steps_per_second": 1.11,
"step": 1296
},
{
"epoch": 37.0,
"eval_bleu": 47.8964,
"eval_gen_len": 18.966,
"eval_loss": 0.0010085658868774772,
"eval_meteor": 0.669,
"eval_runtime": 4.512,
"eval_samples_per_second": 32.58,
"eval_steps_per_second": 1.108,
"step": 1332
},
{
"epoch": 38.0,
"eval_bleu": 47.9279,
"eval_gen_len": 18.966,
"eval_loss": 0.0019252120982855558,
"eval_meteor": 0.67,
"eval_runtime": 4.5663,
"eval_samples_per_second": 32.192,
"eval_steps_per_second": 1.095,
"step": 1368
},
{
"epoch": 39.0,
"eval_bleu": 48.0042,
"eval_gen_len": 18.966,
"eval_loss": 0.0006166099337860942,
"eval_meteor": 0.6701,
"eval_runtime": 4.5135,
"eval_samples_per_second": 32.569,
"eval_steps_per_second": 1.108,
"step": 1404
},
{
"epoch": 40.0,
"eval_bleu": 48.0042,
"eval_gen_len": 18.966,
"eval_loss": 0.0006395149976015091,
"eval_meteor": 0.6701,
"eval_runtime": 4.5938,
"eval_samples_per_second": 31.999,
"eval_steps_per_second": 1.088,
"step": 1440
},
{
"epoch": 41.0,
"eval_bleu": 48.0042,
"eval_gen_len": 18.966,
"eval_loss": 0.0003768605238292366,
"eval_meteor": 0.6701,
"eval_runtime": 4.5485,
"eval_samples_per_second": 32.318,
"eval_steps_per_second": 1.099,
"step": 1476
},
{
"epoch": 41.67,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0166,
"step": 1500
}
],
"logging_steps": 500,
"max_steps": 1800,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 5983929469071360.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}