test / trainer_state.json
jayanta's picture
Training in progress, epoch 0
9eb7273
raw
history blame
8.36 kB
{
"best_metric": 0.9444444444444444,
"best_model_checkpoint": "test/checkpoint-14",
"epoch": 19.727272727272727,
"global_step": 40,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.73,
"eval_accuracy": 0.6111111111111112,
"eval_f1": 0.5416666666666666,
"eval_loss": 0.7271409630775452,
"eval_precision": 0.5208333333333334,
"eval_recall": 0.6111111111111112,
"eval_runtime": 6.3413,
"eval_samples_per_second": 5.677,
"eval_steps_per_second": 0.158,
"step": 2
},
{
"epoch": 1.73,
"eval_accuracy": 0.8333333333333334,
"eval_f1": 0.8148148148148149,
"eval_loss": 0.46258121728897095,
"eval_precision": 0.8666666666666667,
"eval_recall": 0.8333333333333334,
"eval_runtime": 5.8395,
"eval_samples_per_second": 6.165,
"eval_steps_per_second": 0.171,
"step": 4
},
{
"epoch": 2.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9157054125998224,
"eval_loss": 0.3036719858646393,
"eval_precision": 0.9163636363636365,
"eval_recall": 0.9166666666666666,
"eval_runtime": 5.8411,
"eval_samples_per_second": 6.163,
"eval_steps_per_second": 0.171,
"step": 6
},
{
"epoch": 3.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9131652661064424,
"eval_loss": 0.24792712926864624,
"eval_precision": 0.9259259259259258,
"eval_recall": 0.9166666666666666,
"eval_runtime": 5.7041,
"eval_samples_per_second": 6.311,
"eval_steps_per_second": 0.175,
"step": 8
},
{
"epoch": 4.73,
"learning_rate": 0.0001,
"loss": 0.6338,
"step": 10
},
{
"epoch": 4.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9174468085106382,
"eval_loss": 0.23573820292949677,
"eval_precision": 0.919732441471572,
"eval_recall": 0.9166666666666666,
"eval_runtime": 5.6473,
"eval_samples_per_second": 6.375,
"eval_steps_per_second": 0.177,
"step": 10
},
{
"epoch": 5.73,
"eval_accuracy": 0.8888888888888888,
"eval_f1": 0.890746934225195,
"eval_loss": 0.21319927275180817,
"eval_precision": 0.8982683982683982,
"eval_recall": 0.8888888888888888,
"eval_runtime": 6.2525,
"eval_samples_per_second": 5.758,
"eval_steps_per_second": 0.16,
"step": 12
},
{
"epoch": 6.73,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.9430303030303031,
"eval_loss": 0.17125670611858368,
"eval_precision": 0.9487179487179487,
"eval_recall": 0.9444444444444444,
"eval_runtime": 5.7878,
"eval_samples_per_second": 6.22,
"eval_steps_per_second": 0.173,
"step": 14
},
{
"epoch": 7.73,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.9444444444444444,
"eval_loss": 0.15498512983322144,
"eval_precision": 0.9444444444444444,
"eval_recall": 0.9444444444444444,
"eval_runtime": 6.2641,
"eval_samples_per_second": 5.747,
"eval_steps_per_second": 0.16,
"step": 16
},
{
"epoch": 8.73,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.9444444444444444,
"eval_loss": 0.14064311981201172,
"eval_precision": 0.9444444444444444,
"eval_recall": 0.9444444444444444,
"eval_runtime": 5.34,
"eval_samples_per_second": 6.742,
"eval_steps_per_second": 0.187,
"step": 18
},
{
"epoch": 9.73,
"learning_rate": 6.666666666666667e-05,
"loss": 0.217,
"step": 20
},
{
"epoch": 9.73,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.9444444444444444,
"eval_loss": 0.16372796893119812,
"eval_precision": 0.9444444444444444,
"eval_recall": 0.9444444444444444,
"eval_runtime": 5.2833,
"eval_samples_per_second": 6.814,
"eval_steps_per_second": 0.189,
"step": 20
},
{
"epoch": 10.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9157054125998224,
"eval_loss": 0.16719864308834076,
"eval_precision": 0.9163636363636365,
"eval_recall": 0.9166666666666666,
"eval_runtime": 6.4055,
"eval_samples_per_second": 5.62,
"eval_steps_per_second": 0.156,
"step": 22
},
{
"epoch": 11.73,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.9444444444444444,
"eval_loss": 0.13150224089622498,
"eval_precision": 0.9444444444444444,
"eval_recall": 0.9444444444444444,
"eval_runtime": 5.1218,
"eval_samples_per_second": 7.029,
"eval_steps_per_second": 0.195,
"step": 24
},
{
"epoch": 12.73,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.9444444444444444,
"eval_loss": 0.1597292125225067,
"eval_precision": 0.9444444444444444,
"eval_recall": 0.9444444444444444,
"eval_runtime": 5.1268,
"eval_samples_per_second": 7.022,
"eval_steps_per_second": 0.195,
"step": 26
},
{
"epoch": 13.73,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.9444444444444444,
"eval_loss": 0.1412082463502884,
"eval_precision": 0.9444444444444444,
"eval_recall": 0.9444444444444444,
"eval_runtime": 5.6714,
"eval_samples_per_second": 6.348,
"eval_steps_per_second": 0.176,
"step": 28
},
{
"epoch": 14.73,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.0906,
"step": 30
},
{
"epoch": 14.73,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.9444444444444444,
"eval_loss": 0.1776580661535263,
"eval_precision": 0.9444444444444444,
"eval_recall": 0.9444444444444444,
"eval_runtime": 5.8702,
"eval_samples_per_second": 6.133,
"eval_steps_per_second": 0.17,
"step": 30
},
{
"epoch": 15.73,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.9444444444444444,
"eval_loss": 0.14607585966587067,
"eval_precision": 0.9444444444444444,
"eval_recall": 0.9444444444444444,
"eval_runtime": 7.113,
"eval_samples_per_second": 5.061,
"eval_steps_per_second": 0.141,
"step": 32
},
{
"epoch": 16.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9174468085106382,
"eval_loss": 0.19147923588752747,
"eval_precision": 0.919732441471572,
"eval_recall": 0.9166666666666666,
"eval_runtime": 5.3662,
"eval_samples_per_second": 6.709,
"eval_steps_per_second": 0.186,
"step": 34
},
{
"epoch": 17.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9174468085106382,
"eval_loss": 0.13753658533096313,
"eval_precision": 0.919732441471572,
"eval_recall": 0.9166666666666666,
"eval_runtime": 5.3487,
"eval_samples_per_second": 6.731,
"eval_steps_per_second": 0.187,
"step": 36
},
{
"epoch": 18.73,
"eval_accuracy": 0.9166666666666666,
"eval_f1": 0.9174468085106382,
"eval_loss": 0.1448293775320053,
"eval_precision": 0.919732441471572,
"eval_recall": 0.9166666666666666,
"eval_runtime": 5.5619,
"eval_samples_per_second": 6.473,
"eval_steps_per_second": 0.18,
"step": 38
},
{
"epoch": 19.73,
"learning_rate": 0.0,
"loss": 0.0548,
"step": 40
},
{
"epoch": 19.73,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.9444444444444444,
"eval_loss": 0.12921589612960815,
"eval_precision": 0.9444444444444444,
"eval_recall": 0.9444444444444444,
"eval_runtime": 5.8187,
"eval_samples_per_second": 6.187,
"eval_steps_per_second": 0.172,
"step": 40
},
{
"epoch": 19.73,
"step": 40,
"total_flos": 4.050283110270566e+16,
"train_loss": 0.24904835671186448,
"train_runtime": 1252.5827,
"train_samples_per_second": 10.921,
"train_steps_per_second": 0.032
}
],
"max_steps": 40,
"num_train_epochs": 20,
"total_flos": 4.050283110270566e+16,
"trial_name": null,
"trial_params": null
}