qg-t5-base-squad / trainer_state.json
xiaothung's picture
Initial commit with model files
394b8f4
{
"best_metric": 29.4578,
"best_model_checkpoint": "/content/gdrive/MyDrive/FYP/t5-base-squad/checkpoint-4000",
"epoch": 0.91324200913242,
"eval_steps": 1000,
"global_step": 9000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"grad_norm": 0.5294313430786133,
"learning_rate": 3.5961440892947744e-05,
"loss": 0.4964,
"step": 1000
},
{
"epoch": 0.1,
"eval_gen_len": 14.3475,
"eval_loss": 0.20427940785884857,
"eval_rouge1": 28.3615,
"eval_rouge2": 8.4682,
"eval_rougeL": 25.4968,
"eval_rougeLsum": 25.5017,
"eval_runtime": 1076.2576,
"eval_samples_per_second": 8.139,
"eval_steps_per_second": 1.017,
"step": 1000
},
{
"epoch": 0.2,
"grad_norm": 0.3378017246723175,
"learning_rate": 3.19066463723998e-05,
"loss": 0.2181,
"step": 2000
},
{
"epoch": 0.2,
"eval_gen_len": 14.0145,
"eval_loss": 0.19709938764572144,
"eval_rouge1": 28.848,
"eval_rouge2": 8.9434,
"eval_rougeL": 25.9934,
"eval_rougeLsum": 25.9925,
"eval_runtime": 1064.9247,
"eval_samples_per_second": 8.226,
"eval_steps_per_second": 1.028,
"step": 2000
},
{
"epoch": 0.3,
"grad_norm": 0.4546686112880707,
"learning_rate": 2.786808726534754e-05,
"loss": 0.2113,
"step": 3000
},
{
"epoch": 0.3,
"eval_gen_len": 13.3511,
"eval_loss": 0.19329853355884552,
"eval_rouge1": 29.2691,
"eval_rouge2": 9.1353,
"eval_rougeL": 26.4378,
"eval_rougeLsum": 26.4387,
"eval_runtime": 1053.7948,
"eval_samples_per_second": 8.313,
"eval_steps_per_second": 1.039,
"step": 3000
},
{
"epoch": 0.41,
"grad_norm": 0.3615570664405823,
"learning_rate": 2.3829528158295285e-05,
"loss": 0.2055,
"step": 4000
},
{
"epoch": 0.41,
"eval_gen_len": 14.0945,
"eval_loss": 0.1883835792541504,
"eval_rouge1": 29.4578,
"eval_rouge2": 9.2642,
"eval_rougeL": 26.5988,
"eval_rougeLsum": 26.6249,
"eval_runtime": 1085.8301,
"eval_samples_per_second": 8.068,
"eval_steps_per_second": 1.008,
"step": 4000
},
{
"epoch": 0.51,
"grad_norm": 0.3282989263534546,
"learning_rate": 1.9795027904616947e-05,
"loss": 0.2032,
"step": 5000
},
{
"epoch": 0.51,
"eval_gen_len": 14.0758,
"eval_loss": 0.18715335428714752,
"eval_rouge1": 29.1722,
"eval_rouge2": 9.0803,
"eval_rougeL": 26.3435,
"eval_rougeLsum": 26.3577,
"eval_runtime": 1079.1365,
"eval_samples_per_second": 8.118,
"eval_steps_per_second": 1.015,
"step": 5000
},
{
"epoch": 0.61,
"grad_norm": 0.43038222193717957,
"learning_rate": 1.5752409944190767e-05,
"loss": 0.1997,
"step": 6000
},
{
"epoch": 0.61,
"eval_gen_len": 14.4075,
"eval_loss": 0.18170754611492157,
"eval_rouge1": 29.2152,
"eval_rouge2": 9.0871,
"eval_rougeL": 26.2656,
"eval_rougeLsum": 26.2699,
"eval_runtime": 1047.9477,
"eval_samples_per_second": 8.359,
"eval_steps_per_second": 1.045,
"step": 6000
},
{
"epoch": 0.71,
"grad_norm": 0.35650455951690674,
"learning_rate": 1.171385083713851e-05,
"loss": 0.201,
"step": 7000
},
{
"epoch": 0.71,
"eval_gen_len": 14.4231,
"eval_loss": 0.18083173036575317,
"eval_rouge1": 28.9127,
"eval_rouge2": 8.9743,
"eval_rougeL": 26.0338,
"eval_rougeLsum": 26.0194,
"eval_runtime": 1047.0949,
"eval_samples_per_second": 8.366,
"eval_steps_per_second": 1.046,
"step": 7000
},
{
"epoch": 0.81,
"grad_norm": 0.4643958508968353,
"learning_rate": 7.675291730086252e-06,
"loss": 0.1971,
"step": 8000
},
{
"epoch": 0.81,
"eval_gen_len": 14.3037,
"eval_loss": 0.18144848942756653,
"eval_rouge1": 29.4071,
"eval_rouge2": 9.4867,
"eval_rougeL": 26.4771,
"eval_rougeLsum": 26.473,
"eval_runtime": 1059.6714,
"eval_samples_per_second": 8.267,
"eval_steps_per_second": 1.033,
"step": 8000
},
{
"epoch": 0.91,
"grad_norm": 0.4707474410533905,
"learning_rate": 3.640791476407915e-06,
"loss": 0.1994,
"step": 9000
},
{
"epoch": 0.91,
"eval_gen_len": 14.1849,
"eval_loss": 0.18111075460910797,
"eval_rouge1": 29.293,
"eval_rouge2": 9.3682,
"eval_rougeL": 26.3751,
"eval_rougeLsum": 26.3635,
"eval_runtime": 1052.0617,
"eval_samples_per_second": 8.327,
"eval_steps_per_second": 1.041,
"step": 9000
}
],
"logging_steps": 1000,
"max_steps": 9855,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 4.389855641468928e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}