no_vague_no_downsample / trainer_state.json
Mardiyyah's picture
add model to hub
939c6d1 verified
{
"best_metric": 0.7460203642621539,
"best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/Ner_Pipeline/src/ner_pipeline/model_outputs/ner/CeLLaTe_no_vague_1.0/cellate2.0_tapt_base_LR_5e/base/STANDARD/BaseTrainer/no_data_aug/checkpoint-600",
"epoch": 4.455445544554456,
"eval_steps": 100,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.49504950495049505,
"grad_norm": 0.5439660549163818,
"learning_rate": 1.9724931232808205e-05,
"loss": 0.7697,
"step": 100
},
{
"epoch": 0.49504950495049505,
"eval_accuracy": 0.9607623318385651,
"eval_f1": 0.25471864526371496,
"eval_loss": 0.15020258724689484,
"eval_precision": 0.30788912579957356,
"eval_recall": 0.21720818291215402,
"step": 100
},
{
"epoch": 0.9900990099009901,
"grad_norm": 2.310349464416504,
"learning_rate": 1.922480620155039e-05,
"loss": 0.1727,
"step": 200
},
{
"epoch": 0.9900990099009901,
"eval_accuracy": 0.9619623571022548,
"eval_f1": 0.5057967719936349,
"eval_loss": 0.11981263011693954,
"eval_precision": 0.40646693459992694,
"eval_recall": 0.6693742478941035,
"step": 200
},
{
"epoch": 1.4851485148514851,
"grad_norm": 1.5374152660369873,
"learning_rate": 1.8724681170292574e-05,
"loss": 0.1057,
"step": 300
},
{
"epoch": 1.4851485148514851,
"eval_accuracy": 0.9804364302406366,
"eval_f1": 0.6964094728800612,
"eval_loss": 0.08184882998466492,
"eval_precision": 0.7075442409189693,
"eval_recall": 0.6856197352587244,
"step": 300
},
{
"epoch": 1.9801980198019802,
"grad_norm": 0.818098783493042,
"learning_rate": 1.822455613903476e-05,
"loss": 0.0753,
"step": 400
},
{
"epoch": 1.9801980198019802,
"eval_accuracy": 0.9807364365565591,
"eval_f1": 0.720526630760024,
"eval_loss": 0.07648279517889023,
"eval_precision": 0.7166666666666667,
"eval_recall": 0.7244283995186522,
"step": 400
},
{
"epoch": 2.4752475247524752,
"grad_norm": 0.49367237091064453,
"learning_rate": 1.7724431107776944e-05,
"loss": 0.0555,
"step": 500
},
{
"epoch": 2.4752475247524752,
"eval_accuracy": 0.9471357291732457,
"eval_f1": 0.5116742081447964,
"eval_loss": 0.1018502488732338,
"eval_precision": 0.36590732591250325,
"eval_recall": 0.8504813477737665,
"step": 500
},
{
"epoch": 2.9702970297029703,
"grad_norm": 0.568962037563324,
"learning_rate": 1.722430607651913e-05,
"loss": 0.0511,
"step": 600
},
{
"epoch": 2.9702970297029703,
"eval_accuracy": 0.9815417166677193,
"eval_f1": 0.7460203642621539,
"eval_loss": 0.0740918517112732,
"eval_precision": 0.712798026856673,
"eval_recall": 0.7824909747292419,
"step": 600
},
{
"epoch": 3.4653465346534653,
"grad_norm": 0.7195326089859009,
"learning_rate": 1.6724181045261317e-05,
"loss": 0.0381,
"step": 700
},
{
"epoch": 3.4653465346534653,
"eval_accuracy": 0.9810601275816333,
"eval_f1": 0.7280469897209986,
"eval_loss": 0.08975373208522797,
"eval_precision": 0.7111302352266208,
"eval_recall": 0.7457882069795427,
"step": 700
},
{
"epoch": 3.9603960396039604,
"grad_norm": 0.6837287545204163,
"learning_rate": 1.6224056014003503e-05,
"loss": 0.0369,
"step": 800
},
{
"epoch": 3.9603960396039604,
"eval_accuracy": 0.9817706688561865,
"eval_f1": 0.7423093432536844,
"eval_loss": 0.08456307649612427,
"eval_precision": 0.7077762619372442,
"eval_recall": 0.7803850782190133,
"step": 800
},
{
"epoch": 4.455445544554456,
"grad_norm": 1.966813564300537,
"learning_rate": 1.5723930982745687e-05,
"loss": 0.0295,
"step": 900
},
{
"epoch": 4.455445544554456,
"eval_accuracy": 0.9808785448114697,
"eval_f1": 0.7300910125142206,
"eval_loss": 0.09194136410951614,
"eval_precision": 0.6922869471413161,
"eval_recall": 0.7722623345367028,
"step": 900
},
{
"epoch": 4.455445544554456,
"step": 900,
"total_flos": 760697313381126.0,
"train_loss": 0.1482748039563497,
"train_runtime": 320.0844,
"train_samples_per_second": 403.206,
"train_steps_per_second": 12.622
}
],
"logging_steps": 100,
"max_steps": 4040,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 100,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 760697313381126.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}