{ "best_global_step": 1890, "best_metric": 0.7751064362634611, "best_model_checkpoint": "./my_unified_model_classification_final/checkpoint-1890", "epoch": 2.0, "eval_steps": 315, "global_step": 2108, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04743833017077799, "grad_norm": 60.25, "learning_rate": 2.3333333333333336e-06, "loss": 1.0421, "step": 50 }, { "epoch": 0.09487666034155598, "grad_norm": 59.75, "learning_rate": 4.714285714285715e-06, "loss": 0.8211, "step": 100 }, { "epoch": 0.14231499051233396, "grad_norm": 41.0, "learning_rate": 7.095238095238096e-06, "loss": 0.7358, "step": 150 }, { "epoch": 0.18975332068311196, "grad_norm": 53.0, "learning_rate": 9.476190476190476e-06, "loss": 0.7224, "step": 200 }, { "epoch": 0.23719165085388993, "grad_norm": 109.5, "learning_rate": 9.794520547945206e-06, "loss": 0.6881, "step": 250 }, { "epoch": 0.2846299810246679, "grad_norm": 67.0, "learning_rate": 9.531085353003163e-06, "loss": 0.6706, "step": 300 }, { "epoch": 0.29886148007590135, "eval_accuracy": 0.6481514450180998, "eval_auc": 0.7337608189235229, "eval_f1": 0.6774035584090539, "eval_false_negatives": 1756, "eval_false_positives": 4173, "eval_loss": 0.6601030230522156, "eval_precision": 0.5986728216964801, "eval_recall": 0.7799774464352838, "eval_runtime": 565.4848, "eval_samples_per_second": 29.799, "eval_specificity": 0.5295377677564825, "eval_steps_per_second": 0.233, "eval_true_negatives": 4697, "eval_true_positives": 6225, "step": 315 }, { "epoch": 0.29886148007590135, "step": 315, "train_accuracy": 0.689, "train_auc": 0.7696650342487676, "train_f1": 0.7210762331838565, "train_false_negatives": 106, "train_false_positives": 205, "train_loss": 0.6076448559761047, "train_precision": 0.6622734761120264, "train_recall": 0.7913385826771654, "train_runtime": 33.5457, "train_samples_per_second": 29.81, "train_specificity": 0.5833333333333334, "train_steps_per_second": 0.238, "train_true_negatives": 287, "train_true_positives": 402 }, { "epoch": 0.33206831119544594, "grad_norm": 27.25, "learning_rate": 9.267650158061118e-06, "loss": 0.637, "step": 350 }, { "epoch": 0.3795066413662239, "grad_norm": 32.25, "learning_rate": 9.004214963119073e-06, "loss": 0.6261, "step": 400 }, { "epoch": 0.4269449715370019, "grad_norm": 12.6875, "learning_rate": 8.74077976817703e-06, "loss": 0.6213, "step": 450 }, { "epoch": 0.47438330170777987, "grad_norm": 10.6875, "learning_rate": 8.477344573234985e-06, "loss": 0.6218, "step": 500 }, { "epoch": 0.5218216318785579, "grad_norm": 9.75, "learning_rate": 8.21390937829294e-06, "loss": 0.6033, "step": 550 }, { "epoch": 0.5692599620493358, "grad_norm": 17.75, "learning_rate": 7.950474183350897e-06, "loss": 0.6017, "step": 600 }, { "epoch": 0.5977229601518027, "eval_accuracy": 0.6978814313690582, "eval_auc": 0.7703106885617717, "eval_f1": 0.6362272240085745, "eval_false_negatives": 3529, "eval_false_positives": 1562, "eval_loss": 0.589336097240448, "eval_precision": 0.7402726970402395, "eval_recall": 0.5578248339807041, "eval_runtime": 566.1112, "eval_samples_per_second": 29.766, "eval_specificity": 0.8239007891770012, "eval_steps_per_second": 0.233, "eval_true_negatives": 7308, "eval_true_positives": 4452, "step": 630 }, { "epoch": 0.5977229601518027, "step": 630, "train_accuracy": 0.699, "train_auc": 0.7801548075728242, "train_f1": 0.6403823178016727, "train_false_negatives": 209, "train_false_positives": 92, "train_loss": 0.5804136991500854, "train_precision": 0.7444444444444445, "train_recall": 0.5618448637316562, "train_runtime": 33.5615, "train_samples_per_second": 29.796, "train_specificity": 0.8240917782026769, "train_steps_per_second": 0.238, "train_true_negatives": 431, "train_true_positives": 268 }, { "epoch": 0.6166982922201139, "grad_norm": 65.5, "learning_rate": 7.687038988408853e-06, "loss": 0.5989, "step": 650 }, { "epoch": 0.6641366223908919, "grad_norm": 38.75, "learning_rate": 7.423603793466808e-06, "loss": 0.5827, "step": 700 }, { "epoch": 0.7115749525616698, "grad_norm": 15.25, "learning_rate": 7.1601685985247635e-06, "loss": 0.5679, "step": 750 }, { "epoch": 0.7590132827324478, "grad_norm": 31.875, "learning_rate": 6.896733403582719e-06, "loss": 0.561, "step": 800 }, { "epoch": 0.8064516129032258, "grad_norm": 24.25, "learning_rate": 6.633298208640675e-06, "loss": 0.5675, "step": 850 }, { "epoch": 0.8538899430740038, "grad_norm": 16.0, "learning_rate": 6.36986301369863e-06, "loss": 0.544, "step": 900 }, { "epoch": 0.896584440227704, "eval_accuracy": 0.7435167052400451, "eval_auc": 0.8127202542905241, "eval_f1": 0.7221293557927221, "eval_false_negatives": 2365, "eval_false_positives": 1957, "eval_loss": 0.5301145911216736, "eval_precision": 0.7415819358246402, "eval_recall": 0.7036712191454705, "eval_runtime": 565.3739, "eval_samples_per_second": 29.805, "eval_specificity": 0.7793686583990981, "eval_steps_per_second": 0.233, "eval_true_negatives": 6913, "eval_true_positives": 5616, "step": 945 }, { "epoch": 0.896584440227704, "step": 945, "train_accuracy": 0.769, "train_auc": 0.8395073580294321, "train_f1": 0.7621009268795057, "train_false_negatives": 129, "train_false_positives": 102, "train_loss": 0.498809278011322, "train_precision": 0.7838983050847458, "train_recall": 0.7414829659318637, "train_runtime": 33.5647, "train_samples_per_second": 29.793, "train_specificity": 0.7964071856287425, "train_steps_per_second": 0.238, "train_true_negatives": 399, "train_true_positives": 370 }, { "epoch": 0.9013282732447818, "grad_norm": 21.375, "learning_rate": 6.106427818756586e-06, "loss": 0.5341, "step": 950 }, { "epoch": 0.9487666034155597, "grad_norm": 17.625, "learning_rate": 5.8429926238145414e-06, "loss": 0.5227, "step": 1000 }, { "epoch": 0.9962049335863378, "grad_norm": 17.5, "learning_rate": 5.579557428872497e-06, "loss": 0.5105, "step": 1050 }, { "epoch": 1.0436432637571158, "grad_norm": 15.8125, "learning_rate": 5.316122233930453e-06, "loss": 0.4476, "step": 1100 }, { "epoch": 1.0910815939278937, "grad_norm": 36.5, "learning_rate": 5.05268703898841e-06, "loss": 0.432, "step": 1150 }, { "epoch": 1.1385199240986716, "grad_norm": 20.625, "learning_rate": 4.789251844046365e-06, "loss": 0.4303, "step": 1200 }, { "epoch": 1.1859582542694498, "grad_norm": 15.0, "learning_rate": 4.525816649104321e-06, "loss": 0.431, "step": 1250 }, { "epoch": 1.1954459203036052, "eval_accuracy": 0.7668981069372738, "eval_auc": 0.8454007170637932, "eval_f1": 0.7403146899378553, "eval_false_negatives": 2382, "eval_false_positives": 1546, "eval_loss": 0.49584120512008667, "eval_precision": 0.7836249125262421, "eval_recall": 0.7015411602556071, "eval_runtime": 565.3929, "eval_samples_per_second": 29.804, "eval_specificity": 0.8257046223224351, "eval_steps_per_second": 0.233, "eval_true_negatives": 7324, "eval_true_positives": 5599, "step": 1260 }, { "epoch": 1.1954459203036052, "step": 1260, "train_accuracy": 0.816, "train_auc": 0.8946033653846154, "train_f1": 0.7960088691796009, "train_false_negatives": 121, "train_false_positives": 63, "train_loss": 0.4144395887851715, "train_precision": 0.8507109004739336, "train_recall": 0.7479166666666667, "train_runtime": 33.5611, "train_samples_per_second": 29.796, "train_specificity": 0.8788461538461538, "train_steps_per_second": 0.238, "train_true_negatives": 457, "train_true_positives": 359 }, { "epoch": 1.2333965844402277, "grad_norm": 26.375, "learning_rate": 4.262381454162276e-06, "loss": 0.4144, "step": 1300 }, { "epoch": 1.2808349146110056, "grad_norm": 27.375, "learning_rate": 3.998946259220232e-06, "loss": 0.4234, "step": 1350 }, { "epoch": 1.3282732447817835, "grad_norm": 37.0, "learning_rate": 3.7355110642781876e-06, "loss": 0.4121, "step": 1400 }, { "epoch": 1.3757115749525617, "grad_norm": 22.0, "learning_rate": 3.4720758693361435e-06, "loss": 0.4019, "step": 1450 }, { "epoch": 1.4231499051233396, "grad_norm": 22.75, "learning_rate": 3.2086406743940995e-06, "loss": 0.3934, "step": 1500 }, { "epoch": 1.4705882352941178, "grad_norm": 22.75, "learning_rate": 2.945205479452055e-06, "loss": 0.3826, "step": 1550 }, { "epoch": 1.4943074003795067, "eval_accuracy": 0.7803691175597888, "eval_auc": 0.8628874001345077, "eval_f1": 0.7676272995542162, "eval_false_negatives": 1868, "eval_false_positives": 1833, "eval_loss": 0.4711809456348419, "eval_precision": 0.7693178957966272, "eval_recall": 0.7659441172785365, "eval_runtime": 565.811, "eval_samples_per_second": 29.782, "eval_specificity": 0.793348365276212, "eval_steps_per_second": 0.233, "eval_true_negatives": 7037, "eval_true_positives": 6113, "step": 1575 }, { "epoch": 1.4943074003795067, "step": 1575, "train_accuracy": 0.864, "train_auc": 0.936523558617489, "train_f1": 0.859504132231405, "train_false_negatives": 77, "train_false_positives": 59, "train_loss": 0.32817962765693665, "train_precision": 0.8757894736842106, "train_recall": 0.8438133874239351, "train_runtime": 33.5625, "train_samples_per_second": 29.795, "train_specificity": 0.883629191321499, "train_steps_per_second": 0.238, "train_true_negatives": 448, "train_true_positives": 416 }, { "epoch": 1.5180265654648957, "grad_norm": 26.25, "learning_rate": 2.681770284510011e-06, "loss": 0.3818, "step": 1600 }, { "epoch": 1.5654648956356736, "grad_norm": 23.75, "learning_rate": 2.4183350895679664e-06, "loss": 0.4057, "step": 1650 }, { "epoch": 1.6129032258064515, "grad_norm": 31.375, "learning_rate": 2.1548998946259223e-06, "loss": 0.3949, "step": 1700 }, { "epoch": 1.6603415559772297, "grad_norm": 22.625, "learning_rate": 1.8914646996838779e-06, "loss": 0.3875, "step": 1750 }, { "epoch": 1.7077798861480076, "grad_norm": 26.375, "learning_rate": 1.6280295047418338e-06, "loss": 0.38, "step": 1800 }, { "epoch": 1.7552182163187857, "grad_norm": 26.75, "learning_rate": 1.3645943097997893e-06, "loss": 0.3754, "step": 1850 }, { "epoch": 1.793168880455408, "eval_accuracy": 0.7868375764049611, "eval_auc": 0.8702927979882322, "eval_f1": 0.7751064362634611, "eval_false_negatives": 1791, "eval_false_positives": 1801, "eval_loss": 0.45739424228668213, "eval_precision": 0.7746214491302715, "eval_recall": 0.7755920310738003, "eval_runtime": 565.5123, "eval_samples_per_second": 29.798, "eval_specificity": 0.7969560315670801, "eval_steps_per_second": 0.233, "eval_true_negatives": 7069, "eval_true_positives": 6190, "step": 1890 }, { "epoch": 1.793168880455408, "step": 1890, "train_accuracy": 0.869, "train_auc": 0.9383460241118122, "train_f1": 0.8659160696008188, "train_false_negatives": 68, "train_false_positives": 63, "train_loss": 0.32318422198295593, "train_precision": 0.8703703703703703, "train_recall": 0.8615071283095723, "train_runtime": 33.5363, "train_samples_per_second": 29.818, "train_specificity": 0.8762278978388998, "train_steps_per_second": 0.239, "train_true_negatives": 446, "train_true_positives": 423 }, { "epoch": 1.8026565464895636, "grad_norm": 29.0, "learning_rate": 1.101159114857745e-06, "loss": 0.3655, "step": 1900 }, { "epoch": 1.8500948766603416, "grad_norm": 25.625, "learning_rate": 8.377239199157008e-07, "loss": 0.3777, "step": 1950 }, { "epoch": 1.8975332068311195, "grad_norm": 26.25, "learning_rate": 5.742887249736566e-07, "loss": 0.371, "step": 2000 }, { "epoch": 1.9449715370018974, "grad_norm": 34.25, "learning_rate": 3.108535300316122e-07, "loss": 0.3917, "step": 2050 }, { "epoch": 1.9924098671726755, "grad_norm": 24.75, "learning_rate": 4.741833508956797e-08, "loss": 0.3776, "step": 2100 } ], "logging_steps": 50, "max_steps": 2108, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 315, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1560301981608182e+19, "train_batch_size": 128, "trial_name": null, "trial_params": null }