test / trainer_state.json
jayanta's picture
Training in progress, epoch 0
8e3a06f
raw
history blame
8.36 kB
{
"best_metric": 0.9166666666666666,
"best_model_checkpoint": "test/checkpoint-20",
"epoch": 19.727272727272727,
"global_step": 40,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.73,
"eval_accuracy": 0.7222222222222222,
"eval_f1": 0.6707818930041152,
"eval_loss": 0.561225950717926,
"eval_precision": 0.8090277777777778,
"eval_recall": 0.7222222222222222,
"eval_runtime": 5.7451,
"eval_samples_per_second": 6.266,
"eval_steps_per_second": 0.174,
"step": 2
},
{
"epoch": 1.73,
"eval_accuracy": 0.7777777777777778,
"eval_f1": 0.7608080808080808,
"eval_loss": 0.3748733699321747,
"eval_precision": 0.7986111111111112,
"eval_recall": 0.7777777777777778,
"eval_runtime": 5.1904,
"eval_samples_per_second": 6.936,
"eval_steps_per_second": 0.193,
"step": 4
},
{
"epoch": 2.73,
"eval_accuracy": 0.8055555555555556,
"eval_f1": 0.8066066066066067,
"eval_loss": 0.522664487361908,
"eval_precision": 0.8703703703703703,
"eval_recall": 0.8055555555555556,
"eval_runtime": 5.3959,
"eval_samples_per_second": 6.672,
"eval_steps_per_second": 0.185,
"step": 6
},
{
"epoch": 3.73,
"eval_accuracy": 0.8055555555555556,
"eval_f1": 0.7864923747276689,
"eval_loss": 0.5397895574569702,
"eval_precision": 0.8524904214559387,
"eval_recall": 0.8055555555555556,
"eval_runtime": 5.524,
"eval_samples_per_second": 6.517,
"eval_steps_per_second": 0.181,
"step": 8
},
{
"epoch": 4.73,
"learning_rate": 0.0001,
"loss": 0.5735,
"step": 10
},
{
"epoch": 4.73,
"eval_accuracy": 0.8611111111111112,
"eval_f1": 0.8600823045267489,
"eval_loss": 0.24970261752605438,
"eval_precision": 0.8604607952434039,
"eval_recall": 0.8611111111111112,
"eval_runtime": 5.1179,
"eval_samples_per_second": 7.034,
"eval_steps_per_second": 0.195,
"step": 10
},
{
"epoch": 5.73,
"eval_accuracy": 0.8888888888888888,
"eval_f1": 0.8899470899470899,
"eval_loss": 0.26077044010162354,
"eval_precision": 0.8965277777777777,
"eval_recall": 0.8888888888888888,
"eval_runtime": 4.7095,
"eval_samples_per_second": 7.644,
"eval_steps_per_second": 0.212,
"step": 12
},
{
"epoch": 6.73,
"eval_accuracy": 0.8611111111111112,
"eval_f1": 0.8618907600463335,
"eval_loss": 0.2541900873184204,
"eval_precision": 0.8640211640211641,
"eval_recall": 0.8611111111111112,
"eval_runtime": 5.4129,
"eval_samples_per_second": 6.651,
"eval_steps_per_second": 0.185,
"step": 14
},
{
"epoch": 7.73,
"eval_accuracy": 0.8611111111111112,
"eval_f1": 0.857210401891253,
"eval_loss": 0.2999325394630432,
"eval_precision": 0.8668686868686869,
"eval_recall": 0.8611111111111112,
"eval_runtime": 5.0159,
"eval_samples_per_second": 7.177,
"eval_steps_per_second": 0.199,
"step": 16
},
{
"epoch": 8.73,
"eval_accuracy": 0.8888888888888888,
"eval_f1": 0.8870308435525827,
"eval_loss": 0.2993186116218567,
"eval_precision": 0.8912037037037036,
"eval_recall": 0.8888888888888888,
"eval_runtime": 4.9756,
"eval_samples_per_second": 7.235,
"eval_steps_per_second": 0.201,
"step": 18
},
{
"epoch": 9.73,
"learning_rate": 6.666666666666667e-05,
"loss": 0.0476,
"step": 20
},
{
"epoch": 9.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9160493827160493,
"eval_loss": 0.2704654335975647,
"eval_precision": 0.9169453734671126,
"eval_recall": 0.9166666666666666,
"eval_runtime": 6.2342,
"eval_samples_per_second": 5.775,
"eval_steps_per_second": 0.16,
"step": 20
},
{
"epoch": 10.73,
"eval_accuracy": 0.8888888888888888,
"eval_f1": 0.8888888888888888,
"eval_loss": 0.28197962045669556,
"eval_precision": 0.8888888888888888,
"eval_recall": 0.8888888888888888,
"eval_runtime": 5.1606,
"eval_samples_per_second": 6.976,
"eval_steps_per_second": 0.194,
"step": 22
},
{
"epoch": 11.73,
"eval_accuracy": 0.8611111111111112,
"eval_f1": 0.8618907600463335,
"eval_loss": 0.275021493434906,
"eval_precision": 0.8640211640211641,
"eval_recall": 0.8611111111111112,
"eval_runtime": 4.8026,
"eval_samples_per_second": 7.496,
"eval_steps_per_second": 0.208,
"step": 24
},
{
"epoch": 12.73,
"eval_accuracy": 0.8888888888888888,
"eval_f1": 0.8888888888888888,
"eval_loss": 0.2932831346988678,
"eval_precision": 0.8888888888888888,
"eval_recall": 0.8888888888888888,
"eval_runtime": 4.6825,
"eval_samples_per_second": 7.688,
"eval_steps_per_second": 0.214,
"step": 26
},
{
"epoch": 13.73,
"eval_accuracy": 0.8611111111111112,
"eval_f1": 0.8618907600463335,
"eval_loss": 0.2971099317073822,
"eval_precision": 0.8640211640211641,
"eval_recall": 0.8611111111111112,
"eval_runtime": 5.7546,
"eval_samples_per_second": 6.256,
"eval_steps_per_second": 0.174,
"step": 28
},
{
"epoch": 14.73,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.003,
"step": 30
},
{
"epoch": 14.73,
"eval_accuracy": 0.8888888888888888,
"eval_f1": 0.8888888888888888,
"eval_loss": 0.29043230414390564,
"eval_precision": 0.8888888888888888,
"eval_recall": 0.8888888888888888,
"eval_runtime": 9.7959,
"eval_samples_per_second": 3.675,
"eval_steps_per_second": 0.102,
"step": 30
},
{
"epoch": 15.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9160493827160493,
"eval_loss": 0.305411696434021,
"eval_precision": 0.9169453734671126,
"eval_recall": 0.9166666666666666,
"eval_runtime": 5.2177,
"eval_samples_per_second": 6.9,
"eval_steps_per_second": 0.192,
"step": 32
},
{
"epoch": 16.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9160493827160493,
"eval_loss": 0.29818451404571533,
"eval_precision": 0.9169453734671126,
"eval_recall": 0.9166666666666666,
"eval_runtime": 4.9626,
"eval_samples_per_second": 7.254,
"eval_steps_per_second": 0.202,
"step": 34
},
{
"epoch": 17.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9160493827160493,
"eval_loss": 0.30663102865219116,
"eval_precision": 0.9169453734671126,
"eval_recall": 0.9166666666666666,
"eval_runtime": 4.7376,
"eval_samples_per_second": 7.599,
"eval_steps_per_second": 0.211,
"step": 36
},
{
"epoch": 18.73,
"eval_accuracy": 0.8888888888888888,
"eval_f1": 0.8870308435525827,
"eval_loss": 0.30919915437698364,
"eval_precision": 0.8912037037037036,
"eval_recall": 0.8888888888888888,
"eval_runtime": 6.3085,
"eval_samples_per_second": 5.707,
"eval_steps_per_second": 0.159,
"step": 38
},
{
"epoch": 19.73,
"learning_rate": 0.0,
"loss": 0.0013,
"step": 40
},
{
"epoch": 19.73,
"eval_accuracy": 0.8888888888888888,
"eval_f1": 0.8870308435525827,
"eval_loss": 0.3030144274234772,
"eval_precision": 0.8912037037037036,
"eval_recall": 0.8888888888888888,
"eval_runtime": 4.7135,
"eval_samples_per_second": 7.638,
"eval_steps_per_second": 0.212,
"step": 40
},
{
"epoch": 19.73,
"step": 40,
"total_flos": 1.0463143975579976e+18,
"train_loss": 0.15636180818546563,
"train_runtime": 744.5374,
"train_samples_per_second": 18.374,
"train_steps_per_second": 0.054
}
],
"max_steps": 40,
"num_train_epochs": 20,
"total_flos": 1.0463143975579976e+18,
"trial_name": null,
"trial_params": null
}