bioformer-CeLLaTe_V2 / trainer_state.json
christine-withers's picture
Upload 11 files
d841237 verified
{
"best_metric": 0.7557603686635944,
"best_model_checkpoint": "/hps/software/users/chembl/christine/git_projects/OTAR3088/Entity-Extraction-Modular-pipeline/outputs/model_outputs/hf/model_outputs/hf/CeLLaTe_V2/reinit_llrd/4K_with_reinit_classifier_llrd0.9/bioformers/checkpoint-672",
"epoch": 14.0,
"eval_steps": 500,
"global_step": 672,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 6.153050422668457,
"learning_rate": 9.375000000000001e-06,
"loss": 2.3155,
"step": 48
},
{
"epoch": 1.0,
"eval_accuracy": 0.9688143571638718,
"eval_f1": 0.0,
"eval_loss": 0.2333633154630661,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 1.308,
"eval_samples_per_second": 144.499,
"eval_steps_per_second": 9.175,
"step": 48
},
{
"epoch": 2.0,
"grad_norm": 7.159886360168457,
"learning_rate": 1.9375e-05,
"loss": 0.1837,
"step": 96
},
{
"epoch": 2.0,
"eval_accuracy": 0.9791115033833481,
"eval_f1": 0.37777777777777777,
"eval_loss": 0.07473152875900269,
"eval_precision": 0.4473684210526316,
"eval_recall": 0.3269230769230769,
"eval_runtime": 1.3056,
"eval_samples_per_second": 144.764,
"eval_steps_per_second": 9.191,
"step": 96
},
{
"epoch": 3.0,
"grad_norm": 0.33205464482307434,
"learning_rate": 1.8958333333333334e-05,
"loss": 0.0771,
"step": 144
},
{
"epoch": 3.0,
"eval_accuracy": 0.9852897911150338,
"eval_f1": 0.5420560747663552,
"eval_loss": 0.056234169751405716,
"eval_precision": 0.5272727272727272,
"eval_recall": 0.5576923076923077,
"eval_runtime": 1.3018,
"eval_samples_per_second": 145.185,
"eval_steps_per_second": 9.218,
"step": 144
},
{
"epoch": 4.0,
"grad_norm": 0.3881724178791046,
"learning_rate": 1.7847222222222225e-05,
"loss": 0.0532,
"step": 192
},
{
"epoch": 4.0,
"eval_accuracy": 0.9872021182700794,
"eval_f1": 0.6190476190476191,
"eval_loss": 0.0470348596572876,
"eval_precision": 0.6132075471698113,
"eval_recall": 0.625,
"eval_runtime": 1.3002,
"eval_samples_per_second": 145.366,
"eval_steps_per_second": 9.23,
"step": 192
},
{
"epoch": 5.0,
"grad_norm": 4.382257461547852,
"learning_rate": 1.6736111111111113e-05,
"loss": 0.0403,
"step": 240
},
{
"epoch": 5.0,
"eval_accuracy": 0.9883789349808767,
"eval_f1": 0.6415094339622641,
"eval_loss": 0.04033330827951431,
"eval_precision": 0.6296296296296297,
"eval_recall": 0.6538461538461539,
"eval_runtime": 1.3061,
"eval_samples_per_second": 144.701,
"eval_steps_per_second": 9.187,
"step": 240
},
{
"epoch": 6.0,
"grad_norm": 0.024486979469656944,
"learning_rate": 1.5625e-05,
"loss": 0.0308,
"step": 288
},
{
"epoch": 6.0,
"eval_accuracy": 0.9877905266254781,
"eval_f1": 0.6351931330472104,
"eval_loss": 0.04146737605333328,
"eval_precision": 0.5736434108527132,
"eval_recall": 0.7115384615384616,
"eval_runtime": 1.3002,
"eval_samples_per_second": 145.36,
"eval_steps_per_second": 9.229,
"step": 288
},
{
"epoch": 7.0,
"grad_norm": 0.022526560351252556,
"learning_rate": 1.451388888888889e-05,
"loss": 0.0234,
"step": 336
},
{
"epoch": 7.0,
"eval_accuracy": 0.989997057958223,
"eval_f1": 0.7069767441860465,
"eval_loss": 0.036325544118881226,
"eval_precision": 0.6846846846846847,
"eval_recall": 0.7307692307692307,
"eval_runtime": 1.2996,
"eval_samples_per_second": 145.425,
"eval_steps_per_second": 9.233,
"step": 336
},
{
"epoch": 8.0,
"grad_norm": 0.0061901346780359745,
"learning_rate": 1.3402777777777779e-05,
"loss": 0.0183,
"step": 384
},
{
"epoch": 8.0,
"eval_accuracy": 0.990879670491321,
"eval_f1": 0.6909090909090909,
"eval_loss": 0.03637426719069481,
"eval_precision": 0.6551724137931034,
"eval_recall": 0.7307692307692307,
"eval_runtime": 1.3096,
"eval_samples_per_second": 144.324,
"eval_steps_per_second": 9.163,
"step": 384
},
{
"epoch": 9.0,
"grad_norm": 0.1458413451910019,
"learning_rate": 1.2291666666666668e-05,
"loss": 0.015,
"step": 432
},
{
"epoch": 9.0,
"eval_accuracy": 0.9904383642247719,
"eval_f1": 0.704225352112676,
"eval_loss": 0.03453047573566437,
"eval_precision": 0.6880733944954128,
"eval_recall": 0.7211538461538461,
"eval_runtime": 1.3147,
"eval_samples_per_second": 143.761,
"eval_steps_per_second": 9.128,
"step": 432
},
{
"epoch": 10.0,
"grad_norm": 0.05294337496161461,
"learning_rate": 1.1180555555555557e-05,
"loss": 0.0131,
"step": 480
},
{
"epoch": 10.0,
"eval_accuracy": 0.9905854663136217,
"eval_f1": 0.7123287671232875,
"eval_loss": 0.033786673098802567,
"eval_precision": 0.6782608695652174,
"eval_recall": 0.75,
"eval_runtime": 1.3033,
"eval_samples_per_second": 145.02,
"eval_steps_per_second": 9.208,
"step": 480
},
{
"epoch": 11.0,
"grad_norm": 0.021180154755711555,
"learning_rate": 1.0069444444444445e-05,
"loss": 0.0108,
"step": 528
},
{
"epoch": 11.0,
"eval_accuracy": 0.9904383642247719,
"eval_f1": 0.6912442396313364,
"eval_loss": 0.03413481265306473,
"eval_precision": 0.6637168141592921,
"eval_recall": 0.7211538461538461,
"eval_runtime": 1.3026,
"eval_samples_per_second": 145.096,
"eval_steps_per_second": 9.212,
"step": 528
},
{
"epoch": 12.0,
"grad_norm": 0.045118171721696854,
"learning_rate": 8.958333333333334e-06,
"loss": 0.01,
"step": 576
},
{
"epoch": 12.0,
"eval_accuracy": 0.990879670491321,
"eval_f1": 0.7264150943396226,
"eval_loss": 0.03440742939710617,
"eval_precision": 0.7129629629629629,
"eval_recall": 0.7403846153846154,
"eval_runtime": 1.3043,
"eval_samples_per_second": 144.91,
"eval_steps_per_second": 9.201,
"step": 576
},
{
"epoch": 13.0,
"grad_norm": 1.282688856124878,
"learning_rate": 7.847222222222223e-06,
"loss": 0.0086,
"step": 624
},
{
"epoch": 13.0,
"eval_accuracy": 0.990879670491321,
"eval_f1": 0.7348837209302327,
"eval_loss": 0.035170141607522964,
"eval_precision": 0.7117117117117117,
"eval_recall": 0.7596153846153846,
"eval_runtime": 1.3097,
"eval_samples_per_second": 144.31,
"eval_steps_per_second": 9.163,
"step": 624
},
{
"epoch": 14.0,
"grad_norm": 0.04552546516060829,
"learning_rate": 6.736111111111112e-06,
"loss": 0.0072,
"step": 672
},
{
"epoch": 14.0,
"eval_accuracy": 0.9910267725801707,
"eval_f1": 0.7557603686635944,
"eval_loss": 0.034706298261880875,
"eval_precision": 0.7256637168141593,
"eval_recall": 0.7884615384615384,
"eval_runtime": 1.3011,
"eval_samples_per_second": 145.257,
"eval_steps_per_second": 9.223,
"step": 672
}
],
"logging_steps": 500,
"max_steps": 960,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 789505749171048.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}