cellate-tapt_base-LR_2e-05 / trainer_state.json
Mardiyyah's picture
Model save
bcb6531 verified
{
"best_metric": 0.22371177209886886,
"best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/OT-Entity-Extraction-Pipeline/model_outputs/Continued_pretraining/TAPT/Mardiyyah/biomedbert_model_extended_untrained/cellate-tapt_base-LR_2e-05/checkpoint-50",
"epoch": 8.363636363636363,
"eval_steps": 500,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 22.1080379486084,
"learning_rate": 1.9183673469387756e-05,
"loss": 10.133,
"step": 6
},
{
"epoch": 1.0,
"eval_accuracy": 0.0,
"eval_loss": 10.35041332244873,
"eval_runtime": 0.7052,
"eval_samples_per_second": 43.957,
"eval_steps_per_second": 2.836,
"step": 6
},
{
"epoch": 2.0,
"grad_norm": 8.525097846984863,
"learning_rate": 1.673469387755102e-05,
"loss": 8.7383,
"step": 12
},
{
"epoch": 2.0,
"eval_accuracy": 0.011311269375785506,
"eval_loss": 8.706360816955566,
"eval_runtime": 0.6712,
"eval_samples_per_second": 46.188,
"eval_steps_per_second": 2.98,
"step": 12
},
{
"epoch": 3.0,
"grad_norm": 4.951538562774658,
"learning_rate": 1.4285714285714287e-05,
"loss": 7.6303,
"step": 18
},
{
"epoch": 3.0,
"eval_accuracy": 0.057813154587348134,
"eval_loss": 8.054218292236328,
"eval_runtime": 0.6857,
"eval_samples_per_second": 45.211,
"eval_steps_per_second": 2.917,
"step": 18
},
{
"epoch": 4.0,
"grad_norm": 4.658953666687012,
"learning_rate": 1.1836734693877552e-05,
"loss": 7.06,
"step": 24
},
{
"epoch": 4.0,
"eval_accuracy": 0.08211143695014662,
"eval_loss": 7.551440238952637,
"eval_runtime": 0.6769,
"eval_samples_per_second": 45.798,
"eval_steps_per_second": 2.955,
"step": 24
},
{
"epoch": 5.0,
"grad_norm": 4.83123779296875,
"learning_rate": 9.387755102040818e-06,
"loss": 6.6524,
"step": 30
},
{
"epoch": 5.0,
"eval_accuracy": 0.15207373271889402,
"eval_loss": 7.070725440979004,
"eval_runtime": 0.6777,
"eval_samples_per_second": 45.742,
"eval_steps_per_second": 2.951,
"step": 30
},
{
"epoch": 6.0,
"grad_norm": 4.068853378295898,
"learning_rate": 6.938775510204082e-06,
"loss": 6.2891,
"step": 36
},
{
"epoch": 6.0,
"eval_accuracy": 0.1805613741097612,
"eval_loss": 6.90059232711792,
"eval_runtime": 0.6808,
"eval_samples_per_second": 45.536,
"eval_steps_per_second": 2.938,
"step": 36
},
{
"epoch": 7.0,
"grad_norm": 6.190578460693359,
"learning_rate": 4.489795918367348e-06,
"loss": 6.0336,
"step": 42
},
{
"epoch": 7.0,
"eval_accuracy": 0.210305823209049,
"eval_loss": 6.646459579467773,
"eval_runtime": 0.7705,
"eval_samples_per_second": 40.235,
"eval_steps_per_second": 2.596,
"step": 42
},
{
"epoch": 8.0,
"grad_norm": 4.320862293243408,
"learning_rate": 2.0408163265306125e-06,
"loss": 5.9085,
"step": 48
},
{
"epoch": 8.0,
"eval_accuracy": 0.22077922077922077,
"eval_loss": 6.615872383117676,
"eval_runtime": 0.679,
"eval_samples_per_second": 45.655,
"eval_steps_per_second": 2.945,
"step": 48
},
{
"epoch": 8.363636363636363,
"grad_norm": 6.226965427398682,
"learning_rate": 1.2244897959183673e-06,
"loss": 6.375,
"step": 50
},
{
"epoch": 8.363636363636363,
"eval_accuracy": 0.22371177209886886,
"eval_loss": 6.628802299499512,
"eval_runtime": 0.7625,
"eval_samples_per_second": 40.656,
"eval_steps_per_second": 2.623,
"step": 50
},
{
"epoch": 8.363636363636363,
"step": 50,
"total_flos": 368493460070400.0,
"train_loss": 7.268417072296143,
"train_runtime": 82.8209,
"train_samples_per_second": 20.164,
"train_steps_per_second": 0.604
}
],
"logging_steps": 10,
"max_steps": 50,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 368493460070400.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}