{ "best_metric": 0.7460203642621539, "best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/Ner_Pipeline/src/ner_pipeline/model_outputs/ner/CeLLaTe_no_vague_1.0/cellate2.0_tapt_base_LR_5e/base/STANDARD/BaseTrainer/no_data_aug/checkpoint-600", "epoch": 4.455445544554456, "eval_steps": 100, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.49504950495049505, "grad_norm": 0.5439660549163818, "learning_rate": 1.9724931232808205e-05, "loss": 0.7697, "step": 100 }, { "epoch": 0.49504950495049505, "eval_accuracy": 0.9607623318385651, "eval_f1": 0.25471864526371496, "eval_loss": 0.15020258724689484, "eval_precision": 0.30788912579957356, "eval_recall": 0.21720818291215402, "step": 100 }, { "epoch": 0.9900990099009901, "grad_norm": 2.310349464416504, "learning_rate": 1.922480620155039e-05, "loss": 0.1727, "step": 200 }, { "epoch": 0.9900990099009901, "eval_accuracy": 0.9619623571022548, "eval_f1": 0.5057967719936349, "eval_loss": 0.11981263011693954, "eval_precision": 0.40646693459992694, "eval_recall": 0.6693742478941035, "step": 200 }, { "epoch": 1.4851485148514851, "grad_norm": 1.5374152660369873, "learning_rate": 1.8724681170292574e-05, "loss": 0.1057, "step": 300 }, { "epoch": 1.4851485148514851, "eval_accuracy": 0.9804364302406366, "eval_f1": 0.6964094728800612, "eval_loss": 0.08184882998466492, "eval_precision": 0.7075442409189693, "eval_recall": 0.6856197352587244, "step": 300 }, { "epoch": 1.9801980198019802, "grad_norm": 0.818098783493042, "learning_rate": 1.822455613903476e-05, "loss": 0.0753, "step": 400 }, { "epoch": 1.9801980198019802, "eval_accuracy": 0.9807364365565591, "eval_f1": 0.720526630760024, "eval_loss": 0.07648279517889023, "eval_precision": 0.7166666666666667, "eval_recall": 0.7244283995186522, "step": 400 }, { "epoch": 2.4752475247524752, "grad_norm": 0.49367237091064453, "learning_rate": 1.7724431107776944e-05, "loss": 0.0555, "step": 500 }, { "epoch": 2.4752475247524752, "eval_accuracy": 0.9471357291732457, "eval_f1": 0.5116742081447964, "eval_loss": 0.1018502488732338, "eval_precision": 0.36590732591250325, "eval_recall": 0.8504813477737665, "step": 500 }, { "epoch": 2.9702970297029703, "grad_norm": 0.568962037563324, "learning_rate": 1.722430607651913e-05, "loss": 0.0511, "step": 600 }, { "epoch": 2.9702970297029703, "eval_accuracy": 0.9815417166677193, "eval_f1": 0.7460203642621539, "eval_loss": 0.0740918517112732, "eval_precision": 0.712798026856673, "eval_recall": 0.7824909747292419, "step": 600 }, { "epoch": 3.4653465346534653, "grad_norm": 0.7195326089859009, "learning_rate": 1.6724181045261317e-05, "loss": 0.0381, "step": 700 }, { "epoch": 3.4653465346534653, "eval_accuracy": 0.9810601275816333, "eval_f1": 0.7280469897209986, "eval_loss": 0.08975373208522797, "eval_precision": 0.7111302352266208, "eval_recall": 0.7457882069795427, "step": 700 }, { "epoch": 3.9603960396039604, "grad_norm": 0.6837287545204163, "learning_rate": 1.6224056014003503e-05, "loss": 0.0369, "step": 800 }, { "epoch": 3.9603960396039604, "eval_accuracy": 0.9817706688561865, "eval_f1": 0.7423093432536844, "eval_loss": 0.08456307649612427, "eval_precision": 0.7077762619372442, "eval_recall": 0.7803850782190133, "step": 800 }, { "epoch": 4.455445544554456, "grad_norm": 1.966813564300537, "learning_rate": 1.5723930982745687e-05, "loss": 0.0295, "step": 900 }, { "epoch": 4.455445544554456, "eval_accuracy": 0.9808785448114697, "eval_f1": 0.7300910125142206, "eval_loss": 0.09194136410951614, "eval_precision": 0.6922869471413161, "eval_recall": 0.7722623345367028, "step": 900 }, { "epoch": 4.455445544554456, "step": 900, "total_flos": 760697313381126.0, "train_loss": 0.1482748039563497, "train_runtime": 320.0844, "train_samples_per_second": 403.206, "train_steps_per_second": 12.622 } ], "logging_steps": 100, "max_steps": 4040, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 760697313381126.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }