{ "best_global_step": 300, "best_metric": 0.997979797979798, "best_model_checkpoint": "nci-binary-detector/checkpoint-200", "epoch": 0.9803921568627451, "eval_steps": 100, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.032679738562091505, "grad_norm": 2.849823236465454, "learning_rate": 1.2418300653594772e-06, "loss": 0.1789, "step": 20 }, { "epoch": 0.06535947712418301, "grad_norm": 1.1499906778335571, "learning_rate": 2.549019607843137e-06, "loss": 0.1045, "step": 40 }, { "epoch": 0.09803921568627451, "grad_norm": 1.5816751718521118, "learning_rate": 3.856209150326798e-06, "loss": 0.0389, "step": 60 }, { "epoch": 0.13071895424836602, "grad_norm": 0.3664398789405823, "learning_rate": 5.163398692810458e-06, "loss": 0.0164, "step": 80 }, { "epoch": 0.16339869281045752, "grad_norm": 0.15663352608680725, "learning_rate": 6.470588235294119e-06, "loss": 0.0123, "step": 100 }, { "epoch": 0.16339869281045752, "eval_accuracy": 0.9837869137232195, "eval_f1": 0.9860279441117764, "eval_loss": 0.005597150418907404, "eval_precision": 0.9743589743589743, "eval_recall": 0.997979797979798, "eval_roc_auc": 0.9985759905705631, "eval_runtime": 11.5844, "eval_samples_per_second": 149.08, "eval_steps_per_second": 4.661, "step": 100 }, { "epoch": 0.19607843137254902, "grad_norm": 0.10866066068410873, "learning_rate": 7.77777777777778e-06, "loss": 0.0073, "step": 120 }, { "epoch": 0.22875816993464052, "grad_norm": 0.06542709469795227, "learning_rate": 9.084967320261438e-06, "loss": 0.0019, "step": 140 }, { "epoch": 0.26143790849673204, "grad_norm": 0.15572959184646606, "learning_rate": 1.03921568627451e-05, "loss": 0.0014, "step": 160 }, { "epoch": 0.29411764705882354, "grad_norm": 0.009457970038056374, "learning_rate": 1.169934640522876e-05, "loss": 0.0066, "step": 180 }, { "epoch": 0.32679738562091504, "grad_norm": 0.016104549169540405, "learning_rate": 1.3006535947712419e-05, "loss": 0.0007, "step": 200 }, { "epoch": 0.32679738562091504, "eval_accuracy": 0.9971048060220035, "eval_f1": 0.9974785678265254, "eval_loss": 0.0025405457708984613, "eval_precision": 0.9959718026183283, "eval_recall": 0.998989898989899, "eval_roc_auc": 0.998836396529748, "eval_runtime": 8.7693, "eval_samples_per_second": 196.936, "eval_steps_per_second": 6.158, "step": 200 }, { "epoch": 0.35947712418300654, "grad_norm": 0.10450628399848938, "learning_rate": 1.431372549019608e-05, "loss": 0.0166, "step": 220 }, { "epoch": 0.39215686274509803, "grad_norm": 0.11410112679004669, "learning_rate": 1.562091503267974e-05, "loss": 0.001, "step": 240 }, { "epoch": 0.42483660130718953, "grad_norm": 0.056244976818561554, "learning_rate": 1.69281045751634e-05, "loss": 0.0044, "step": 260 }, { "epoch": 0.45751633986928103, "grad_norm": 0.044668033719062805, "learning_rate": 1.823529411764706e-05, "loss": 0.0019, "step": 280 }, { "epoch": 0.49019607843137253, "grad_norm": 0.004644064232707024, "learning_rate": 1.954248366013072e-05, "loss": 0.0004, "step": 300 }, { "epoch": 0.49019607843137253, "eval_accuracy": 0.9976838448176027, "eval_f1": 0.997979797979798, "eval_loss": 0.002409732900559902, "eval_precision": 0.997979797979798, "eval_recall": 0.997979797979798, "eval_roc_auc": 0.9996368022148212, "eval_runtime": 8.7756, "eval_samples_per_second": 196.796, "eval_steps_per_second": 6.153, "step": 300 }, { "epoch": 0.5228758169934641, "grad_norm": 0.31552189588546753, "learning_rate": 1.990559186637618e-05, "loss": 0.0033, "step": 320 }, { "epoch": 0.5555555555555556, "grad_norm": 0.0014637637650594115, "learning_rate": 1.9760348583877997e-05, "loss": 0.0002, "step": 340 }, { "epoch": 0.5882352941176471, "grad_norm": 2.498281955718994, "learning_rate": 1.9615105301379813e-05, "loss": 0.0008, "step": 360 }, { "epoch": 0.6209150326797386, "grad_norm": 1.514452014816925e-05, "learning_rate": 1.946986201888163e-05, "loss": 0.0001, "step": 380 }, { "epoch": 0.6535947712418301, "grad_norm": 0.016483725979924202, "learning_rate": 1.9324618736383445e-05, "loss": 0.0046, "step": 400 }, { "epoch": 0.6535947712418301, "eval_accuracy": 0.9936305732484076, "eval_f1": 0.994475138121547, "eval_loss": 0.0015705447876825929, "eval_precision": 0.989010989010989, "eval_recall": 1.0, "eval_roc_auc": 0.9999013198470458, "eval_runtime": 8.774, "eval_samples_per_second": 196.831, "eval_steps_per_second": 6.155, "step": 400 }, { "epoch": 0.6862745098039216, "grad_norm": 0.006017300765961409, "learning_rate": 1.9179375453885257e-05, "loss": 0.0006, "step": 420 }, { "epoch": 0.7189542483660131, "grad_norm": 0.0029072267934679985, "learning_rate": 1.9034132171387073e-05, "loss": 0.0008, "step": 440 }, { "epoch": 0.7516339869281046, "grad_norm": 0.011737746186554432, "learning_rate": 1.888888888888889e-05, "loss": 0.0001, "step": 460 }, { "epoch": 0.7843137254901961, "grad_norm": 0.043070435523986816, "learning_rate": 1.8743645606390705e-05, "loss": 0.0054, "step": 480 }, { "epoch": 0.8169934640522876, "grad_norm": 0.0014130477793514729, "learning_rate": 1.859840232389252e-05, "loss": 0.0002, "step": 500 }, { "epoch": 0.8169934640522876, "eval_accuracy": 0.9976838448176027, "eval_f1": 0.997979797979798, "eval_loss": 0.0020458686631172895, "eval_precision": 0.997979797979798, "eval_recall": 0.997979797979798, "eval_roc_auc": 0.9999479188081631, "eval_runtime": 8.7772, "eval_samples_per_second": 196.761, "eval_steps_per_second": 6.152, "step": 500 }, { "epoch": 0.8496732026143791, "grad_norm": 0.1288619190454483, "learning_rate": 1.8453159041394337e-05, "loss": 0.0, "step": 520 }, { "epoch": 0.8823529411764706, "grad_norm": 0.001577255199663341, "learning_rate": 1.8307915758896153e-05, "loss": 0.0036, "step": 540 }, { "epoch": 0.9150326797385621, "grad_norm": 0.0329805426299572, "learning_rate": 1.816267247639797e-05, "loss": 0.0034, "step": 560 }, { "epoch": 0.9477124183006536, "grad_norm": 0.0058128549717366695, "learning_rate": 1.801742919389978e-05, "loss": 0.0035, "step": 580 }, { "epoch": 0.9803921568627451, "grad_norm": 0.00015560630708932877, "learning_rate": 1.7872185911401597e-05, "loss": 0.0008, "step": 600 }, { "epoch": 0.9803921568627451, "eval_accuracy": 0.9971048060220035, "eval_f1": 0.9974785678265254, "eval_loss": 0.0012797409435734153, "eval_precision": 0.9959718026183283, "eval_recall": 0.998989898989899, "eval_roc_auc": 0.9999218782122447, "eval_runtime": 8.7835, "eval_samples_per_second": 196.619, "eval_steps_per_second": 6.148, "step": 600 } ], "logging_steps": 20, "max_steps": 3060, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6542550643507200.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }