Variant_reinit-llrd_PuBMedBert_V2 / trainer_state.json
Mardiyyah's picture
add Variant-V1 LLRD model
8b9f1e0 verified
{
"best_metric": 0.90625,
"best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/OT-Entity-Extraction-Pipeline/model_outputs/hf/Variant_V2/reinit_llrd/4K_no_reinit_classifier_llrd0.9/BiomedNLP_BiomedBERT_base/checkpoint-1408",
"epoch": 7.0,
"eval_steps": 500,
"global_step": 2464,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.3181160092353821,
"learning_rate": 9.886363636363637e-06,
"loss": 0.227,
"step": 352
},
{
"epoch": 1.0,
"eval_accuracy": 0.9924264890389397,
"eval_f1": 0.5577264653641208,
"eval_loss": 0.01651068590581417,
"eval_precision": 0.575091575091575,
"eval_recall": 0.5413793103448276,
"eval_runtime": 4.458,
"eval_samples_per_second": 270.525,
"eval_steps_per_second": 17.048,
"step": 352
},
{
"epoch": 2.0,
"grad_norm": 1.8566864728927612,
"learning_rate": 1.9886363636363638e-05,
"loss": 0.0131,
"step": 704
},
{
"epoch": 2.0,
"eval_accuracy": 0.9970116568195816,
"eval_f1": 0.8357723577235773,
"eval_loss": 0.009072142653167248,
"eval_precision": 0.7907692307692308,
"eval_recall": 0.8862068965517241,
"eval_runtime": 4.5121,
"eval_samples_per_second": 267.28,
"eval_steps_per_second": 16.844,
"step": 704
},
{
"epoch": 3.0,
"grad_norm": 0.004470929503440857,
"learning_rate": 1.8901515151515153e-05,
"loss": 0.0053,
"step": 1056
},
{
"epoch": 3.0,
"eval_accuracy": 0.9978328808996966,
"eval_f1": 0.8819444444444444,
"eval_loss": 0.005974676925688982,
"eval_precision": 0.8881118881118881,
"eval_recall": 0.8758620689655172,
"eval_runtime": 4.488,
"eval_samples_per_second": 268.719,
"eval_steps_per_second": 16.934,
"step": 1056
},
{
"epoch": 4.0,
"grad_norm": 1.6404166221618652,
"learning_rate": 1.779040404040404e-05,
"loss": 0.0026,
"step": 1408
},
{
"epoch": 4.0,
"eval_accuracy": 0.9981750575997445,
"eval_f1": 0.90625,
"eval_loss": 0.007304870057851076,
"eval_precision": 0.9125874125874126,
"eval_recall": 0.9,
"eval_runtime": 4.4508,
"eval_samples_per_second": 270.961,
"eval_steps_per_second": 17.075,
"step": 1408
},
{
"epoch": 5.0,
"grad_norm": 0.021774714812636375,
"learning_rate": 1.667929292929293e-05,
"loss": 0.0016,
"step": 1760
},
{
"epoch": 5.0,
"eval_accuracy": 0.9981066222597349,
"eval_f1": 0.8957264957264958,
"eval_loss": 0.006704295519739389,
"eval_precision": 0.888135593220339,
"eval_recall": 0.903448275862069,
"eval_runtime": 4.4921,
"eval_samples_per_second": 268.47,
"eval_steps_per_second": 16.918,
"step": 1760
},
{
"epoch": 6.0,
"grad_norm": 0.004604855552315712,
"learning_rate": 1.5568181818181822e-05,
"loss": 0.001,
"step": 2112
},
{
"epoch": 6.0,
"eval_accuracy": 0.9982663047197573,
"eval_f1": 0.9031141868512111,
"eval_loss": 0.0071674492210149765,
"eval_precision": 0.90625,
"eval_recall": 0.9,
"eval_runtime": 4.7306,
"eval_samples_per_second": 254.936,
"eval_steps_per_second": 16.066,
"step": 2112
},
{
"epoch": 7.0,
"grad_norm": 0.013881128281354904,
"learning_rate": 1.4457070707070708e-05,
"loss": 0.0007,
"step": 2464
},
{
"epoch": 7.0,
"eval_accuracy": 0.9980838104797317,
"eval_f1": 0.9014084507042254,
"eval_loss": 0.00853455625474453,
"eval_precision": 0.920863309352518,
"eval_recall": 0.8827586206896552,
"eval_runtime": 4.4218,
"eval_samples_per_second": 272.737,
"eval_steps_per_second": 17.187,
"step": 2464
}
],
"logging_steps": 500,
"max_steps": 7040,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 805331029976316.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}