| { |
| "best_global_step": 1890, |
| "best_metric": 0.7751064362634611, |
| "best_model_checkpoint": "./my_unified_model_classification_final/checkpoint-1890", |
| "epoch": 1.793168880455408, |
| "eval_steps": 315, |
| "global_step": 1890, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04743833017077799, |
| "grad_norm": 60.25, |
| "learning_rate": 2.3333333333333336e-06, |
| "loss": 1.0421, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09487666034155598, |
| "grad_norm": 59.75, |
| "learning_rate": 4.714285714285715e-06, |
| "loss": 0.8211, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14231499051233396, |
| "grad_norm": 41.0, |
| "learning_rate": 7.095238095238096e-06, |
| "loss": 0.7358, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.18975332068311196, |
| "grad_norm": 53.0, |
| "learning_rate": 9.476190476190476e-06, |
| "loss": 0.7224, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.23719165085388993, |
| "grad_norm": 109.5, |
| "learning_rate": 9.794520547945206e-06, |
| "loss": 0.6881, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2846299810246679, |
| "grad_norm": 67.0, |
| "learning_rate": 9.531085353003163e-06, |
| "loss": 0.6706, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.29886148007590135, |
| "eval_accuracy": 0.6481514450180998, |
| "eval_auc": 0.7337608189235229, |
| "eval_f1": 0.6774035584090539, |
| "eval_false_negatives": 1756, |
| "eval_false_positives": 4173, |
| "eval_loss": 0.6601030230522156, |
| "eval_precision": 0.5986728216964801, |
| "eval_recall": 0.7799774464352838, |
| "eval_runtime": 565.4848, |
| "eval_samples_per_second": 29.799, |
| "eval_specificity": 0.5295377677564825, |
| "eval_steps_per_second": 0.233, |
| "eval_true_negatives": 4697, |
| "eval_true_positives": 6225, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.29886148007590135, |
| "step": 315, |
| "train_accuracy": 0.689, |
| "train_auc": 0.7696650342487676, |
| "train_f1": 0.7210762331838565, |
| "train_false_negatives": 106, |
| "train_false_positives": 205, |
| "train_loss": 0.6076448559761047, |
| "train_precision": 0.6622734761120264, |
| "train_recall": 0.7913385826771654, |
| "train_runtime": 33.5457, |
| "train_samples_per_second": 29.81, |
| "train_specificity": 0.5833333333333334, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 287, |
| "train_true_positives": 402 |
| }, |
| { |
| "epoch": 0.33206831119544594, |
| "grad_norm": 27.25, |
| "learning_rate": 9.267650158061118e-06, |
| "loss": 0.637, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3795066413662239, |
| "grad_norm": 32.25, |
| "learning_rate": 9.004214963119073e-06, |
| "loss": 0.6261, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4269449715370019, |
| "grad_norm": 12.6875, |
| "learning_rate": 8.74077976817703e-06, |
| "loss": 0.6213, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.47438330170777987, |
| "grad_norm": 10.6875, |
| "learning_rate": 8.477344573234985e-06, |
| "loss": 0.6218, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5218216318785579, |
| "grad_norm": 9.75, |
| "learning_rate": 8.21390937829294e-06, |
| "loss": 0.6033, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5692599620493358, |
| "grad_norm": 17.75, |
| "learning_rate": 7.950474183350897e-06, |
| "loss": 0.6017, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5977229601518027, |
| "eval_accuracy": 0.6978814313690582, |
| "eval_auc": 0.7703106885617717, |
| "eval_f1": 0.6362272240085745, |
| "eval_false_negatives": 3529, |
| "eval_false_positives": 1562, |
| "eval_loss": 0.589336097240448, |
| "eval_precision": 0.7402726970402395, |
| "eval_recall": 0.5578248339807041, |
| "eval_runtime": 566.1112, |
| "eval_samples_per_second": 29.766, |
| "eval_specificity": 0.8239007891770012, |
| "eval_steps_per_second": 0.233, |
| "eval_true_negatives": 7308, |
| "eval_true_positives": 4452, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5977229601518027, |
| "step": 630, |
| "train_accuracy": 0.699, |
| "train_auc": 0.7801548075728242, |
| "train_f1": 0.6403823178016727, |
| "train_false_negatives": 209, |
| "train_false_positives": 92, |
| "train_loss": 0.5804136991500854, |
| "train_precision": 0.7444444444444445, |
| "train_recall": 0.5618448637316562, |
| "train_runtime": 33.5615, |
| "train_samples_per_second": 29.796, |
| "train_specificity": 0.8240917782026769, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 431, |
| "train_true_positives": 268 |
| }, |
| { |
| "epoch": 0.6166982922201139, |
| "grad_norm": 65.5, |
| "learning_rate": 7.687038988408853e-06, |
| "loss": 0.5989, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6641366223908919, |
| "grad_norm": 38.75, |
| "learning_rate": 7.423603793466808e-06, |
| "loss": 0.5827, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7115749525616698, |
| "grad_norm": 15.25, |
| "learning_rate": 7.1601685985247635e-06, |
| "loss": 0.5679, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7590132827324478, |
| "grad_norm": 31.875, |
| "learning_rate": 6.896733403582719e-06, |
| "loss": 0.561, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "grad_norm": 24.25, |
| "learning_rate": 6.633298208640675e-06, |
| "loss": 0.5675, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8538899430740038, |
| "grad_norm": 16.0, |
| "learning_rate": 6.36986301369863e-06, |
| "loss": 0.544, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.896584440227704, |
| "eval_accuracy": 0.7435167052400451, |
| "eval_auc": 0.8127202542905241, |
| "eval_f1": 0.7221293557927221, |
| "eval_false_negatives": 2365, |
| "eval_false_positives": 1957, |
| "eval_loss": 0.5301145911216736, |
| "eval_precision": 0.7415819358246402, |
| "eval_recall": 0.7036712191454705, |
| "eval_runtime": 565.3739, |
| "eval_samples_per_second": 29.805, |
| "eval_specificity": 0.7793686583990981, |
| "eval_steps_per_second": 0.233, |
| "eval_true_negatives": 6913, |
| "eval_true_positives": 5616, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.896584440227704, |
| "step": 945, |
| "train_accuracy": 0.769, |
| "train_auc": 0.8395073580294321, |
| "train_f1": 0.7621009268795057, |
| "train_false_negatives": 129, |
| "train_false_positives": 102, |
| "train_loss": 0.498809278011322, |
| "train_precision": 0.7838983050847458, |
| "train_recall": 0.7414829659318637, |
| "train_runtime": 33.5647, |
| "train_samples_per_second": 29.793, |
| "train_specificity": 0.7964071856287425, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 399, |
| "train_true_positives": 370 |
| }, |
| { |
| "epoch": 0.9013282732447818, |
| "grad_norm": 21.375, |
| "learning_rate": 6.106427818756586e-06, |
| "loss": 0.5341, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9487666034155597, |
| "grad_norm": 17.625, |
| "learning_rate": 5.8429926238145414e-06, |
| "loss": 0.5227, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9962049335863378, |
| "grad_norm": 17.5, |
| "learning_rate": 5.579557428872497e-06, |
| "loss": 0.5105, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.0436432637571158, |
| "grad_norm": 15.8125, |
| "learning_rate": 5.316122233930453e-06, |
| "loss": 0.4476, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0910815939278937, |
| "grad_norm": 36.5, |
| "learning_rate": 5.05268703898841e-06, |
| "loss": 0.432, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.1385199240986716, |
| "grad_norm": 20.625, |
| "learning_rate": 4.789251844046365e-06, |
| "loss": 0.4303, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.1859582542694498, |
| "grad_norm": 15.0, |
| "learning_rate": 4.525816649104321e-06, |
| "loss": 0.431, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.1954459203036052, |
| "eval_accuracy": 0.7668981069372738, |
| "eval_auc": 0.8454007170637932, |
| "eval_f1": 0.7403146899378553, |
| "eval_false_negatives": 2382, |
| "eval_false_positives": 1546, |
| "eval_loss": 0.49584120512008667, |
| "eval_precision": 0.7836249125262421, |
| "eval_recall": 0.7015411602556071, |
| "eval_runtime": 565.3929, |
| "eval_samples_per_second": 29.804, |
| "eval_specificity": 0.8257046223224351, |
| "eval_steps_per_second": 0.233, |
| "eval_true_negatives": 7324, |
| "eval_true_positives": 5599, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.1954459203036052, |
| "step": 1260, |
| "train_accuracy": 0.816, |
| "train_auc": 0.8946033653846154, |
| "train_f1": 0.7960088691796009, |
| "train_false_negatives": 121, |
| "train_false_positives": 63, |
| "train_loss": 0.4144395887851715, |
| "train_precision": 0.8507109004739336, |
| "train_recall": 0.7479166666666667, |
| "train_runtime": 33.5611, |
| "train_samples_per_second": 29.796, |
| "train_specificity": 0.8788461538461538, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 457, |
| "train_true_positives": 359 |
| }, |
| { |
| "epoch": 1.2333965844402277, |
| "grad_norm": 26.375, |
| "learning_rate": 4.262381454162276e-06, |
| "loss": 0.4144, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.2808349146110056, |
| "grad_norm": 27.375, |
| "learning_rate": 3.998946259220232e-06, |
| "loss": 0.4234, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.3282732447817835, |
| "grad_norm": 37.0, |
| "learning_rate": 3.7355110642781876e-06, |
| "loss": 0.4121, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.3757115749525617, |
| "grad_norm": 22.0, |
| "learning_rate": 3.4720758693361435e-06, |
| "loss": 0.4019, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.4231499051233396, |
| "grad_norm": 22.75, |
| "learning_rate": 3.2086406743940995e-06, |
| "loss": 0.3934, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 22.75, |
| "learning_rate": 2.945205479452055e-06, |
| "loss": 0.3826, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.4943074003795067, |
| "eval_accuracy": 0.7803691175597888, |
| "eval_auc": 0.8628874001345077, |
| "eval_f1": 0.7676272995542162, |
| "eval_false_negatives": 1868, |
| "eval_false_positives": 1833, |
| "eval_loss": 0.4711809456348419, |
| "eval_precision": 0.7693178957966272, |
| "eval_recall": 0.7659441172785365, |
| "eval_runtime": 565.811, |
| "eval_samples_per_second": 29.782, |
| "eval_specificity": 0.793348365276212, |
| "eval_steps_per_second": 0.233, |
| "eval_true_negatives": 7037, |
| "eval_true_positives": 6113, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.4943074003795067, |
| "step": 1575, |
| "train_accuracy": 0.864, |
| "train_auc": 0.936523558617489, |
| "train_f1": 0.859504132231405, |
| "train_false_negatives": 77, |
| "train_false_positives": 59, |
| "train_loss": 0.32817962765693665, |
| "train_precision": 0.8757894736842106, |
| "train_recall": 0.8438133874239351, |
| "train_runtime": 33.5625, |
| "train_samples_per_second": 29.795, |
| "train_specificity": 0.883629191321499, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 448, |
| "train_true_positives": 416 |
| }, |
| { |
| "epoch": 1.5180265654648957, |
| "grad_norm": 26.25, |
| "learning_rate": 2.681770284510011e-06, |
| "loss": 0.3818, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.5654648956356736, |
| "grad_norm": 23.75, |
| "learning_rate": 2.4183350895679664e-06, |
| "loss": 0.4057, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "grad_norm": 31.375, |
| "learning_rate": 2.1548998946259223e-06, |
| "loss": 0.3949, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.6603415559772297, |
| "grad_norm": 22.625, |
| "learning_rate": 1.8914646996838779e-06, |
| "loss": 0.3875, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.7077798861480076, |
| "grad_norm": 26.375, |
| "learning_rate": 1.6280295047418338e-06, |
| "loss": 0.38, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.7552182163187857, |
| "grad_norm": 26.75, |
| "learning_rate": 1.3645943097997893e-06, |
| "loss": 0.3754, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.793168880455408, |
| "eval_accuracy": 0.7868375764049611, |
| "eval_auc": 0.8702927979882322, |
| "eval_f1": 0.7751064362634611, |
| "eval_false_negatives": 1791, |
| "eval_false_positives": 1801, |
| "eval_loss": 0.45739424228668213, |
| "eval_precision": 0.7746214491302715, |
| "eval_recall": 0.7755920310738003, |
| "eval_runtime": 565.5123, |
| "eval_samples_per_second": 29.798, |
| "eval_specificity": 0.7969560315670801, |
| "eval_steps_per_second": 0.233, |
| "eval_true_negatives": 7069, |
| "eval_true_positives": 6190, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.793168880455408, |
| "step": 1890, |
| "train_accuracy": 0.869, |
| "train_auc": 0.9383460241118122, |
| "train_f1": 0.8659160696008188, |
| "train_false_negatives": 68, |
| "train_false_positives": 63, |
| "train_loss": 0.32318422198295593, |
| "train_precision": 0.8703703703703703, |
| "train_recall": 0.8615071283095723, |
| "train_runtime": 33.5363, |
| "train_samples_per_second": 29.818, |
| "train_specificity": 0.8762278978388998, |
| "train_steps_per_second": 0.239, |
| "train_true_negatives": 446, |
| "train_true_positives": 423 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2108, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 315, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.036803595299088e+19, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|