model_fewrel_1_5 / trainer_state_9.json
Sefika's picture
Upload 9 files
e81706f verified
{
"best_metric": 0.0093231201171875,
"best_model_checkpoint": "model_fewrel_1_8-task9/checkpoint-1890",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 2100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 0.012847900390625,
"eval_rouge1": 98.6287,
"eval_rouge2": 98.0199,
"eval_rougeL": 98.3895,
"eval_rougeLsum": 98.5995,
"eval_runtime": 34.0174,
"eval_samples_per_second": 32.924,
"eval_steps_per_second": 1.029,
"step": 210
},
{
"epoch": 2.0,
"eval_loss": 0.0115509033203125,
"eval_rouge1": 98.9154,
"eval_rouge2": 98.5367,
"eval_rougeL": 98.8038,
"eval_rougeLsum": 98.9131,
"eval_runtime": 34.4471,
"eval_samples_per_second": 32.514,
"eval_steps_per_second": 1.016,
"step": 420
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.4975210428237915,
"learning_rate": 0.0008665259359149131,
"loss": 0.055,
"step": 500
},
{
"epoch": 3.0,
"eval_loss": 0.01337432861328125,
"eval_rouge1": 98.6194,
"eval_rouge2": 98.0506,
"eval_rougeL": 98.3926,
"eval_rougeLsum": 98.5818,
"eval_runtime": 33.5087,
"eval_samples_per_second": 33.424,
"eval_steps_per_second": 1.045,
"step": 630
},
{
"epoch": 4.0,
"eval_loss": 0.01132965087890625,
"eval_rouge1": 98.8522,
"eval_rouge2": 98.4253,
"eval_rougeL": 98.7051,
"eval_rougeLsum": 98.8197,
"eval_runtime": 33.1182,
"eval_samples_per_second": 33.818,
"eval_steps_per_second": 1.057,
"step": 840
},
{
"epoch": 4.761904761904762,
"grad_norm": 0.9489365816116333,
"learning_rate": 0.0005373650467932121,
"loss": 0.0088,
"step": 1000
},
{
"epoch": 5.0,
"eval_loss": 0.01326751708984375,
"eval_rouge1": 99.2134,
"eval_rouge2": 98.8765,
"eval_rougeL": 99.0941,
"eval_rougeLsum": 99.2096,
"eval_runtime": 36.9437,
"eval_samples_per_second": 30.316,
"eval_steps_per_second": 0.947,
"step": 1050
},
{
"epoch": 6.0,
"eval_loss": 0.01346588134765625,
"eval_rouge1": 99.2312,
"eval_rouge2": 98.8839,
"eval_rougeL": 99.0944,
"eval_rougeLsum": 99.2153,
"eval_runtime": 34.9945,
"eval_samples_per_second": 32.005,
"eval_steps_per_second": 1.0,
"step": 1260
},
{
"epoch": 7.0,
"eval_loss": 0.00937652587890625,
"eval_rouge1": 99.5998,
"eval_rouge2": 99.3899,
"eval_rougeL": 99.5205,
"eval_rougeLsum": 99.5994,
"eval_runtime": 34.6991,
"eval_samples_per_second": 32.278,
"eval_steps_per_second": 1.009,
"step": 1470
},
{
"epoch": 7.142857142857143,
"grad_norm": 0.030887478962540627,
"learning_rate": 0.00018825509907063325,
"loss": 0.0032,
"step": 1500
},
{
"epoch": 8.0,
"eval_loss": 0.010223388671875,
"eval_rouge1": 99.4127,
"eval_rouge2": 99.1295,
"eval_rougeL": 99.3072,
"eval_rougeLsum": 99.3986,
"eval_runtime": 34.3802,
"eval_samples_per_second": 32.577,
"eval_steps_per_second": 1.018,
"step": 1680
},
{
"epoch": 9.0,
"eval_loss": 0.0093231201171875,
"eval_rouge1": 99.4127,
"eval_rouge2": 99.1295,
"eval_rougeL": 99.3072,
"eval_rougeLsum": 99.3986,
"eval_runtime": 34.2644,
"eval_samples_per_second": 32.687,
"eval_steps_per_second": 1.021,
"step": 1890
},
{
"epoch": 9.523809523809524,
"grad_norm": 0.0026726792566478252,
"learning_rate": 5.5845868874357386e-06,
"loss": 0.0011,
"step": 2000
},
{
"epoch": 10.0,
"eval_loss": 0.00940704345703125,
"eval_rouge1": 99.4127,
"eval_rouge2": 99.1295,
"eval_rougeL": 99.3072,
"eval_rougeLsum": 99.3986,
"eval_runtime": 34.3879,
"eval_samples_per_second": 32.57,
"eval_steps_per_second": 1.018,
"step": 2100
}
],
"logging_steps": 500,
"max_steps": 2100,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.3099168784384e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}