model_fewrel_1_5 / trainer_state_1.json
Sefika's picture
Upload 9 files
e81706f verified
{
"best_metric": 0.0300140380859375,
"best_model_checkpoint": "model_fewrel_1_1-task2/checkpoint-1260",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 2100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 0.0692138671875,
"eval_rouge1": 95.7517,
"eval_rouge2": 94.6841,
"eval_rougeL": 95.6971,
"eval_rougeLsum": 95.7331,
"eval_runtime": 33.2038,
"eval_samples_per_second": 33.731,
"eval_steps_per_second": 1.054,
"step": 210
},
{
"epoch": 2.0,
"eval_loss": 0.034423828125,
"eval_rouge1": 97.351,
"eval_rouge2": 96.6251,
"eval_rougeL": 97.3032,
"eval_rougeLsum": 97.2964,
"eval_runtime": 32.6308,
"eval_samples_per_second": 34.323,
"eval_steps_per_second": 1.073,
"step": 420
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.4508669972419739,
"learning_rate": 0.0008665259359149131,
"loss": 0.0924,
"step": 500
},
{
"epoch": 3.0,
"eval_loss": 0.03924560546875,
"eval_rouge1": 97.2483,
"eval_rouge2": 96.6208,
"eval_rougeL": 97.2291,
"eval_rougeLsum": 97.2002,
"eval_runtime": 33.4876,
"eval_samples_per_second": 33.445,
"eval_steps_per_second": 1.045,
"step": 630
},
{
"epoch": 4.0,
"eval_loss": 0.0413818359375,
"eval_rouge1": 97.1906,
"eval_rouge2": 96.559,
"eval_rougeL": 97.1839,
"eval_rougeLsum": 97.1621,
"eval_runtime": 34.4429,
"eval_samples_per_second": 32.518,
"eval_steps_per_second": 1.016,
"step": 840
},
{
"epoch": 4.761904761904762,
"grad_norm": 0.1619143784046173,
"learning_rate": 0.0005373650467932121,
"loss": 0.0276,
"step": 1000
},
{
"epoch": 5.0,
"eval_loss": 0.042449951171875,
"eval_rouge1": 97.4464,
"eval_rouge2": 96.8611,
"eval_rougeL": 97.4297,
"eval_rougeLsum": 97.4266,
"eval_runtime": 32.4102,
"eval_samples_per_second": 34.557,
"eval_steps_per_second": 1.08,
"step": 1050
},
{
"epoch": 6.0,
"eval_loss": 0.0300140380859375,
"eval_rouge1": 98.1516,
"eval_rouge2": 97.6994,
"eval_rougeL": 98.1475,
"eval_rougeLsum": 98.155,
"eval_runtime": 32.3626,
"eval_samples_per_second": 34.608,
"eval_steps_per_second": 1.081,
"step": 1260
},
{
"epoch": 7.0,
"eval_loss": 0.031494140625,
"eval_rouge1": 97.6953,
"eval_rouge2": 97.1861,
"eval_rougeL": 97.7355,
"eval_rougeLsum": 97.713,
"eval_runtime": 31.6892,
"eval_samples_per_second": 35.343,
"eval_steps_per_second": 1.104,
"step": 1470
},
{
"epoch": 7.142857142857143,
"grad_norm": 0.17737896740436554,
"learning_rate": 0.00018825509907063325,
"loss": 0.0138,
"step": 1500
},
{
"epoch": 8.0,
"eval_loss": 0.03729248046875,
"eval_rouge1": 98.0557,
"eval_rouge2": 97.5844,
"eval_rougeL": 98.0434,
"eval_rougeLsum": 98.0396,
"eval_runtime": 32.4888,
"eval_samples_per_second": 34.473,
"eval_steps_per_second": 1.077,
"step": 1680
},
{
"epoch": 9.0,
"eval_loss": 0.0333251953125,
"eval_rouge1": 98.2547,
"eval_rouge2": 97.8119,
"eval_rougeL": 98.2452,
"eval_rougeLsum": 98.2669,
"eval_runtime": 31.823,
"eval_samples_per_second": 35.195,
"eval_steps_per_second": 1.1,
"step": 1890
},
{
"epoch": 9.523809523809524,
"grad_norm": 0.1923867166042328,
"learning_rate": 5.5845868874357386e-06,
"loss": 0.0088,
"step": 2000
},
{
"epoch": 10.0,
"eval_loss": 0.03363037109375,
"eval_rouge1": 98.2547,
"eval_rouge2": 97.8119,
"eval_rougeL": 98.2452,
"eval_rougeLsum": 98.2669,
"eval_runtime": 31.6927,
"eval_samples_per_second": 35.339,
"eval_steps_per_second": 1.104,
"step": 2100
}
],
"logging_steps": 500,
"max_steps": 2100,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.3099168784384e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}