Variants-V1 / trainer_state.json
christine-withers's picture
Upload 11 files
e09ef37 verified
{
"best_metric": 0.8884462151394423,
"best_model_checkpoint": "/hps/software/users/chembl/christine/git_projects/OTAR3088/Entity-Extraction-Modular-pipeline/outputs/model_outputs/hf/Variants_V1/base/bioformers/checkpoint-464",
"epoch": 16.0,
"eval_steps": 500,
"global_step": 464,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 2.3756778240203857,
"learning_rate": 9.310344827586207e-06,
"loss": 0.7473,
"step": 29
},
{
"epoch": 1.0,
"eval_accuracy": 0.8869123252858958,
"eval_f1": 0.0,
"eval_loss": 0.4901806712150574,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 1.1486,
"eval_samples_per_second": 87.93,
"eval_steps_per_second": 6.094,
"step": 29
},
{
"epoch": 2.0,
"grad_norm": 0.7127112746238708,
"learning_rate": 1.931034482758621e-05,
"loss": 0.3253,
"step": 58
},
{
"epoch": 2.0,
"eval_accuracy": 0.9199491740787802,
"eval_f1": 0.2011173184357542,
"eval_loss": 0.2195604145526886,
"eval_precision": 0.3050847457627119,
"eval_recall": 0.15,
"eval_runtime": 0.9154,
"eval_samples_per_second": 110.332,
"eval_steps_per_second": 7.647,
"step": 58
},
{
"epoch": 3.0,
"grad_norm": 1.941286563873291,
"learning_rate": 1.896551724137931e-05,
"loss": 0.129,
"step": 87
},
{
"epoch": 3.0,
"eval_accuracy": 0.9618805590851334,
"eval_f1": 0.741747572815534,
"eval_loss": 0.10903950780630112,
"eval_precision": 0.6945454545454546,
"eval_recall": 0.7958333333333333,
"eval_runtime": 0.9369,
"eval_samples_per_second": 107.801,
"eval_steps_per_second": 7.471,
"step": 87
},
{
"epoch": 4.0,
"grad_norm": 1.2016586065292358,
"learning_rate": 1.78544061302682e-05,
"loss": 0.0581,
"step": 116
},
{
"epoch": 4.0,
"eval_accuracy": 0.974587039390089,
"eval_f1": 0.8183556405353729,
"eval_loss": 0.08683695644140244,
"eval_precision": 0.7561837455830389,
"eval_recall": 0.8916666666666667,
"eval_runtime": 0.9184,
"eval_samples_per_second": 109.975,
"eval_steps_per_second": 7.622,
"step": 116
},
{
"epoch": 5.0,
"grad_norm": 1.6464694738388062,
"learning_rate": 1.674329501915709e-05,
"loss": 0.0365,
"step": 145
},
{
"epoch": 5.0,
"eval_accuracy": 0.9760694620923338,
"eval_f1": 0.8285163776493256,
"eval_loss": 0.07583592087030411,
"eval_precision": 0.7706093189964157,
"eval_recall": 0.8958333333333334,
"eval_runtime": 0.9273,
"eval_samples_per_second": 108.916,
"eval_steps_per_second": 7.549,
"step": 145
},
{
"epoch": 6.0,
"grad_norm": 0.19638490676879883,
"learning_rate": 1.563218390804598e-05,
"loss": 0.0251,
"step": 174
},
{
"epoch": 6.0,
"eval_accuracy": 0.9788225328250741,
"eval_f1": 0.8470588235294118,
"eval_loss": 0.06711125373840332,
"eval_precision": 0.8,
"eval_recall": 0.9,
"eval_runtime": 0.9219,
"eval_samples_per_second": 109.558,
"eval_steps_per_second": 7.593,
"step": 174
},
{
"epoch": 7.0,
"grad_norm": 0.5702025294303894,
"learning_rate": 1.4521072796934867e-05,
"loss": 0.0153,
"step": 203
},
{
"epoch": 7.0,
"eval_accuracy": 0.9800931808555696,
"eval_f1": 0.8649706457925636,
"eval_loss": 0.06598453223705292,
"eval_precision": 0.8154981549815498,
"eval_recall": 0.9208333333333333,
"eval_runtime": 0.9157,
"eval_samples_per_second": 110.293,
"eval_steps_per_second": 7.644,
"step": 203
},
{
"epoch": 8.0,
"grad_norm": 0.4013058543205261,
"learning_rate": 1.3409961685823755e-05,
"loss": 0.0107,
"step": 232
},
{
"epoch": 8.0,
"eval_accuracy": 0.9815756035578145,
"eval_f1": 0.8678500986193293,
"eval_loss": 0.06503612548112869,
"eval_precision": 0.8239700374531835,
"eval_recall": 0.9166666666666666,
"eval_runtime": 0.9192,
"eval_samples_per_second": 109.876,
"eval_steps_per_second": 7.615,
"step": 232
},
{
"epoch": 9.0,
"grad_norm": 0.62887042760849,
"learning_rate": 1.2298850574712644e-05,
"loss": 0.0096,
"step": 261
},
{
"epoch": 9.0,
"eval_accuracy": 0.9781872088098263,
"eval_f1": 0.8727984344422701,
"eval_loss": 0.09074747562408447,
"eval_precision": 0.8228782287822878,
"eval_recall": 0.9291666666666667,
"eval_runtime": 0.9179,
"eval_samples_per_second": 110.04,
"eval_steps_per_second": 7.627,
"step": 261
},
{
"epoch": 10.0,
"grad_norm": 0.13981495797634125,
"learning_rate": 1.1187739463601532e-05,
"loss": 0.0075,
"step": 290
},
{
"epoch": 10.0,
"eval_accuracy": 0.9800931808555696,
"eval_f1": 0.8638132295719844,
"eval_loss": 0.07469259202480316,
"eval_precision": 0.8102189781021898,
"eval_recall": 0.925,
"eval_runtime": 0.8265,
"eval_samples_per_second": 122.199,
"eval_steps_per_second": 8.469,
"step": 290
},
{
"epoch": 11.0,
"grad_norm": 0.29558104276657104,
"learning_rate": 1.0076628352490422e-05,
"loss": 0.007,
"step": 319
},
{
"epoch": 11.0,
"eval_accuracy": 0.9794578568403219,
"eval_f1": 0.8621359223300971,
"eval_loss": 0.07568900287151337,
"eval_precision": 0.8072727272727273,
"eval_recall": 0.925,
"eval_runtime": 0.8257,
"eval_samples_per_second": 122.315,
"eval_steps_per_second": 8.477,
"step": 319
},
{
"epoch": 12.0,
"grad_norm": 0.15115247666835785,
"learning_rate": 8.965517241379312e-06,
"loss": 0.0059,
"step": 348
},
{
"epoch": 12.0,
"eval_accuracy": 0.9828462515883101,
"eval_f1": 0.8695652173913043,
"eval_loss": 0.07132618129253387,
"eval_precision": 0.8270676691729323,
"eval_recall": 0.9166666666666666,
"eval_runtime": 0.832,
"eval_samples_per_second": 121.396,
"eval_steps_per_second": 8.414,
"step": 348
},
{
"epoch": 13.0,
"grad_norm": 0.05523902550339699,
"learning_rate": 7.854406130268199e-06,
"loss": 0.0055,
"step": 377
},
{
"epoch": 13.0,
"eval_accuracy": 0.9822109275730623,
"eval_f1": 0.8848484848484848,
"eval_loss": 0.07388192415237427,
"eval_precision": 0.8588235294117647,
"eval_recall": 0.9125,
"eval_runtime": 0.8308,
"eval_samples_per_second": 121.568,
"eval_steps_per_second": 8.425,
"step": 377
},
{
"epoch": 14.0,
"grad_norm": 0.08825332671403885,
"learning_rate": 6.743295019157089e-06,
"loss": 0.0048,
"step": 406
},
{
"epoch": 14.0,
"eval_accuracy": 0.9822109275730623,
"eval_f1": 0.8717948717948718,
"eval_loss": 0.07594037801027298,
"eval_precision": 0.8277153558052435,
"eval_recall": 0.9208333333333333,
"eval_runtime": 0.9402,
"eval_samples_per_second": 107.427,
"eval_steps_per_second": 7.445,
"step": 406
},
{
"epoch": 15.0,
"grad_norm": 0.029169419780373573,
"learning_rate": 5.6321839080459775e-06,
"loss": 0.0051,
"step": 435
},
{
"epoch": 15.0,
"eval_accuracy": 0.981152054214316,
"eval_f1": 0.8858267716535433,
"eval_loss": 0.08297453075647354,
"eval_precision": 0.8395522388059702,
"eval_recall": 0.9375,
"eval_runtime": 0.9214,
"eval_samples_per_second": 109.612,
"eval_steps_per_second": 7.597,
"step": 435
},
{
"epoch": 16.0,
"grad_norm": 0.1920151710510254,
"learning_rate": 4.521072796934866e-06,
"loss": 0.0039,
"step": 464
},
{
"epoch": 16.0,
"eval_accuracy": 0.9832698009318086,
"eval_f1": 0.8884462151394423,
"eval_loss": 0.07141096144914627,
"eval_precision": 0.851145038167939,
"eval_recall": 0.9291666666666667,
"eval_runtime": 0.9091,
"eval_samples_per_second": 111.098,
"eval_steps_per_second": 7.7,
"step": 464
}
],
"logging_steps": 500,
"max_steps": 580,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 286480239652512.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}