vit5-lowerdata / trainer_state.json
duyvu8373's picture
Upload 12 files
f57eca9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 22.123893805309734,
"eval_steps": 500,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 52.477,
"eval_gen_len": 15.9297,
"eval_loss": 0.23837900161743164,
"eval_meteor": 0.6883,
"eval_runtime": 19.7008,
"eval_samples_per_second": 26.699,
"eval_steps_per_second": 0.863,
"step": 113
},
{
"epoch": 2.0,
"eval_bleu": 61.8918,
"eval_gen_len": 17.5057,
"eval_loss": 0.19293558597564697,
"eval_meteor": 0.7932,
"eval_runtime": 14.8562,
"eval_samples_per_second": 35.406,
"eval_steps_per_second": 1.144,
"step": 226
},
{
"epoch": 3.0,
"eval_bleu": 65.2499,
"eval_gen_len": 17.6084,
"eval_loss": 0.1683175414800644,
"eval_meteor": 0.8119,
"eval_runtime": 14.8084,
"eval_samples_per_second": 35.52,
"eval_steps_per_second": 1.148,
"step": 339
},
{
"epoch": 4.0,
"eval_bleu": 65.682,
"eval_gen_len": 17.5951,
"eval_loss": 0.17639563977718353,
"eval_meteor": 0.8201,
"eval_runtime": 15.1305,
"eval_samples_per_second": 34.764,
"eval_steps_per_second": 1.124,
"step": 452
},
{
"epoch": 4.42,
"learning_rate": 0.0002469026548672566,
"loss": 0.2514,
"step": 500
},
{
"epoch": 5.0,
"eval_bleu": 65.9583,
"eval_gen_len": 17.6046,
"eval_loss": 0.18957138061523438,
"eval_meteor": 0.8191,
"eval_runtime": 15.1997,
"eval_samples_per_second": 34.606,
"eval_steps_per_second": 1.118,
"step": 565
},
{
"epoch": 6.0,
"eval_bleu": 66.7228,
"eval_gen_len": 17.616,
"eval_loss": 0.18413878977298737,
"eval_meteor": 0.8247,
"eval_runtime": 14.9673,
"eval_samples_per_second": 35.143,
"eval_steps_per_second": 1.136,
"step": 678
},
{
"epoch": 7.0,
"eval_bleu": 65.9782,
"eval_gen_len": 17.6065,
"eval_loss": 0.18860693275928497,
"eval_meteor": 0.82,
"eval_runtime": 14.8993,
"eval_samples_per_second": 35.304,
"eval_steps_per_second": 1.141,
"step": 791
},
{
"epoch": 8.0,
"eval_bleu": 65.6854,
"eval_gen_len": 17.4943,
"eval_loss": 0.17572908103466034,
"eval_meteor": 0.8259,
"eval_runtime": 14.925,
"eval_samples_per_second": 35.243,
"eval_steps_per_second": 1.139,
"step": 904
},
{
"epoch": 8.85,
"learning_rate": 0.00019380530973451326,
"loss": 0.0582,
"step": 1000
},
{
"epoch": 9.0,
"eval_bleu": 65.5495,
"eval_gen_len": 17.5494,
"eval_loss": 0.19610682129859924,
"eval_meteor": 0.8194,
"eval_runtime": 15.0953,
"eval_samples_per_second": 34.845,
"eval_steps_per_second": 1.126,
"step": 1017
},
{
"epoch": 10.0,
"eval_bleu": 66.5886,
"eval_gen_len": 17.6008,
"eval_loss": 0.20674178004264832,
"eval_meteor": 0.8255,
"eval_runtime": 14.9084,
"eval_samples_per_second": 35.282,
"eval_steps_per_second": 1.14,
"step": 1130
},
{
"epoch": 11.0,
"eval_bleu": 66.8651,
"eval_gen_len": 17.5494,
"eval_loss": 0.21284210681915283,
"eval_meteor": 0.8279,
"eval_runtime": 15.4057,
"eval_samples_per_second": 34.143,
"eval_steps_per_second": 1.103,
"step": 1243
},
{
"epoch": 12.0,
"eval_bleu": 66.0318,
"eval_gen_len": 17.6008,
"eval_loss": 0.2158810794353485,
"eval_meteor": 0.8188,
"eval_runtime": 15.3536,
"eval_samples_per_second": 34.259,
"eval_steps_per_second": 1.107,
"step": 1356
},
{
"epoch": 13.0,
"eval_bleu": 65.7773,
"eval_gen_len": 17.635,
"eval_loss": 0.21044525504112244,
"eval_meteor": 0.8116,
"eval_runtime": 14.9886,
"eval_samples_per_second": 35.093,
"eval_steps_per_second": 1.134,
"step": 1469
},
{
"epoch": 13.27,
"learning_rate": 0.0001407079646017699,
"loss": 0.0208,
"step": 1500
},
{
"epoch": 14.0,
"eval_bleu": 65.4921,
"eval_gen_len": 17.6559,
"eval_loss": 0.231339693069458,
"eval_meteor": 0.8044,
"eval_runtime": 14.9755,
"eval_samples_per_second": 35.124,
"eval_steps_per_second": 1.135,
"step": 1582
},
{
"epoch": 15.0,
"eval_bleu": 66.0398,
"eval_gen_len": 17.6464,
"eval_loss": 0.23520441353321075,
"eval_meteor": 0.8128,
"eval_runtime": 15.0741,
"eval_samples_per_second": 34.894,
"eval_steps_per_second": 1.128,
"step": 1695
},
{
"epoch": 16.0,
"eval_bleu": 67.0346,
"eval_gen_len": 17.5304,
"eval_loss": 0.21621014177799225,
"eval_meteor": 0.8352,
"eval_runtime": 15.0483,
"eval_samples_per_second": 34.954,
"eval_steps_per_second": 1.13,
"step": 1808
},
{
"epoch": 17.0,
"eval_bleu": 66.4929,
"eval_gen_len": 17.5551,
"eval_loss": 0.25872427225112915,
"eval_meteor": 0.8265,
"eval_runtime": 14.9009,
"eval_samples_per_second": 35.3,
"eval_steps_per_second": 1.141,
"step": 1921
},
{
"epoch": 17.7,
"learning_rate": 8.761061946902654e-05,
"loss": 0.0085,
"step": 2000
},
{
"epoch": 18.0,
"eval_bleu": 66.3935,
"eval_gen_len": 17.5875,
"eval_loss": 0.24429753422737122,
"eval_meteor": 0.8231,
"eval_runtime": 15.0363,
"eval_samples_per_second": 34.982,
"eval_steps_per_second": 1.131,
"step": 2034
},
{
"epoch": 19.0,
"eval_bleu": 66.582,
"eval_gen_len": 17.5475,
"eval_loss": 0.2678430378437042,
"eval_meteor": 0.8292,
"eval_runtime": 14.967,
"eval_samples_per_second": 35.144,
"eval_steps_per_second": 1.136,
"step": 2147
},
{
"epoch": 20.0,
"eval_bleu": 66.9099,
"eval_gen_len": 17.5513,
"eval_loss": 0.2873556911945343,
"eval_meteor": 0.8307,
"eval_runtime": 15.2473,
"eval_samples_per_second": 34.498,
"eval_steps_per_second": 1.115,
"step": 2260
},
{
"epoch": 21.0,
"eval_bleu": 67.0252,
"eval_gen_len": 17.5703,
"eval_loss": 0.2893502712249756,
"eval_meteor": 0.8283,
"eval_runtime": 15.0711,
"eval_samples_per_second": 34.901,
"eval_steps_per_second": 1.128,
"step": 2373
},
{
"epoch": 22.0,
"eval_bleu": 66.8167,
"eval_gen_len": 17.5722,
"eval_loss": 0.3005259931087494,
"eval_meteor": 0.8266,
"eval_runtime": 15.0539,
"eval_samples_per_second": 34.941,
"eval_steps_per_second": 1.129,
"step": 2486
},
{
"epoch": 22.12,
"learning_rate": 3.451327433628318e-05,
"loss": 0.0034,
"step": 2500
}
],
"logging_steps": 500,
"max_steps": 2825,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 500,
"total_flos": 9808303958876160.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}