{ "best_global_step": 3528, "best_metric": 0.8244905994684517, "best_model_checkpoint": "/projects/gyorilab/variants_ner_coarser/checkpoint-3528", "epoch": 8.0, "eval_steps": 500, "global_step": 3528, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.9403182734357377, "eval_f1": 0.47378233512001866, "eval_loss": 0.01702982187271118, "eval_precision": 0.5675600223338917, "eval_recall": 0.4066, "eval_runtime": 8.8591, "eval_samples_per_second": 316.058, "eval_steps_per_second": 39.507, "step": 441 }, { "epoch": 1.1338627339761769, "grad_norm": 0.19258293509483337, "learning_rate": 1.7736961451247167e-05, "loss": 0.1472896270751953, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.9491082668141367, "eval_f1": 0.5545414069456812, "eval_loss": 0.014995112083852291, "eval_precision": 0.6252510040160643, "eval_recall": 0.4982, "eval_runtime": 8.4202, "eval_samples_per_second": 332.534, "eval_steps_per_second": 41.567, "step": 882 }, { "epoch": 2.2677254679523537, "grad_norm": 0.7697210907936096, "learning_rate": 1.546938775510204e-05, "loss": 0.052211280822753905, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.9703556018016576, "eval_f1": 0.7212371134020619, "eval_loss": 0.017216848209500313, "eval_precision": 0.7442553191489362, "eval_recall": 0.6996, "eval_runtime": 8.3974, "eval_samples_per_second": 333.438, "eval_steps_per_second": 41.68, "step": 1323 }, { "epoch": 3.401588201928531, "grad_norm": 0.342649906873703, "learning_rate": 1.3201814058956916e-05, "loss": 0.03277603530883789, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.9722474877936971, "eval_f1": 0.7500510516642842, "eval_loss": 0.01973508670926094, "eval_precision": 0.7661660408844388, "eval_recall": 0.7346, "eval_runtime": 8.5095, "eval_samples_per_second": 329.042, "eval_steps_per_second": 41.13, "step": 1764 }, { "epoch": 4.5354509359047075, "grad_norm": 0.06497496366500854, "learning_rate": 1.0934240362811793e-05, "loss": 0.019792444229125978, "step": 2000 }, { "epoch": 5.0, "eval_accuracy": 0.9771954973113389, "eval_f1": 0.7891332470892627, "eval_loss": 0.03587024286389351, "eval_precision": 0.7853040205981382, "eval_recall": 0.793, "eval_runtime": 8.4468, "eval_samples_per_second": 331.488, "eval_steps_per_second": 41.436, "step": 2205 }, { "epoch": 5.669313669880885, "grad_norm": 0.02606065385043621, "learning_rate": 8.666666666666668e-06, "loss": 0.013434083938598632, "step": 2500 }, { "epoch": 6.0, "eval_accuracy": 0.9781268873381892, "eval_f1": 0.8013205282112845, "eval_loss": 0.03571802005171776, "eval_precision": 0.8016413130504404, "eval_recall": 0.801, "eval_runtime": 8.4407, "eval_samples_per_second": 331.725, "eval_steps_per_second": 41.466, "step": 2646 }, { "epoch": 6.803176403857062, "grad_norm": 0.06468810141086578, "learning_rate": 6.399092970521542e-06, "loss": 0.009369298934936524, "step": 3000 }, { "epoch": 7.0, "eval_accuracy": 0.9796622255855751, "eval_f1": 0.8202601497832086, "eval_loss": 0.06079654023051262, "eval_precision": 0.8084693084693084, "eval_recall": 0.8324, "eval_runtime": 8.4309, "eval_samples_per_second": 332.113, "eval_steps_per_second": 41.514, "step": 3087 }, { "epoch": 7.9370391378332386, "grad_norm": 0.018788253888487816, "learning_rate": 4.131519274376418e-06, "loss": 0.005781527042388916, "step": 3500 }, { "epoch": 8.0, "eval_accuracy": 0.9798732436385333, "eval_f1": 0.8244905994684517, "eval_loss": 0.06755472719669342, "eval_precision": 0.8117852296956775, "eval_recall": 0.8376, "eval_runtime": 8.4796, "eval_samples_per_second": 330.204, "eval_steps_per_second": 41.276, "step": 3528 } ], "logging_steps": 500, "max_steps": 4410, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.254969894362148e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }