{ "best_global_step": 1218, "best_metric": 0.9136507936507936, "best_model_checkpoint": "./my_unified_model_classification_latest_only_eng/checkpoint-1218", "epoch": 2.0, "eval_steps": 203, "global_step": 1360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07352941176470588, "grad_norm": 61.0, "learning_rate": 3.6296296296296302e-06, "loss": 0.9813, "step": 50 }, { "epoch": 0.14705882352941177, "grad_norm": 60.5, "learning_rate": 7.333333333333333e-06, "loss": 0.6174, "step": 100 }, { "epoch": 0.22058823529411764, "grad_norm": 63.0, "learning_rate": 9.885714285714287e-06, "loss": 0.5302, "step": 150 }, { "epoch": 0.29411764705882354, "grad_norm": 53.0, "learning_rate": 9.477551020408164e-06, "loss": 0.4742, "step": 200 }, { "epoch": 0.2985294117647059, "eval_accuracy": 0.7888735632183907, "eval_auc": 0.8929358689736048, "eval_f1": 0.7856609410007468, "eval_false_negatives": 562, "eval_false_positives": 1734, "eval_loss": 0.4537831246852875, "eval_precision": 0.7081790642881185, "eval_recall": 0.8821802935010482, "eval_runtime": 366.4094, "eval_samples_per_second": 29.68, "eval_specificity": 0.715970515970516, "eval_steps_per_second": 0.232, "eval_true_negatives": 4371, "eval_true_positives": 4208, "step": 203 }, { "epoch": 0.2985294117647059, "step": 203, "train_accuracy": 0.811, "train_auc": 0.8978184538533006, "train_f1": 0.8152492668621701, "train_false_negatives": 46, "train_false_positives": 143, "train_loss": 0.44783344864845276, "train_precision": 0.7446428571428572, "train_recall": 0.9006479481641468, "train_runtime": 33.6776, "train_samples_per_second": 29.693, "train_specificity": 0.7337057728119181, "train_steps_per_second": 0.238, "train_true_negatives": 394, "train_true_positives": 417 }, { "epoch": 0.36764705882352944, "grad_norm": 28.5, "learning_rate": 9.069387755102042e-06, "loss": 0.4275, "step": 250 }, { "epoch": 0.4411764705882353, "grad_norm": 45.75, "learning_rate": 8.661224489795919e-06, "loss": 0.4036, "step": 300 }, { "epoch": 0.5147058823529411, "grad_norm": 51.5, "learning_rate": 8.253061224489797e-06, "loss": 0.3776, "step": 350 }, { "epoch": 0.5882352941176471, "grad_norm": 16.375, "learning_rate": 7.844897959183674e-06, "loss": 0.3343, "step": 400 }, { "epoch": 0.5970588235294118, "eval_accuracy": 0.8777931034482759, "eval_auc": 0.9478171481945067, "eval_f1": 0.8573881317737955, "eval_false_negatives": 775, "eval_false_positives": 554, "eval_loss": 0.2918412387371063, "eval_precision": 0.8782149923060013, "eval_recall": 0.8375262054507338, "eval_runtime": 366.4435, "eval_samples_per_second": 29.677, "eval_specificity": 0.9092547092547093, "eval_steps_per_second": 0.232, "eval_true_negatives": 5551, "eval_true_positives": 3995, "step": 406 }, { "epoch": 0.5970588235294118, "step": 406, "train_accuracy": 0.903, "train_auc": 0.9626825785393716, "train_f1": 0.8843861740166865, "train_false_negatives": 59, "train_false_positives": 38, "train_loss": 0.24663816392421722, "train_precision": 0.9070904645476773, "train_recall": 0.8627906976744186, "train_runtime": 33.7261, "train_samples_per_second": 29.651, "train_specificity": 0.9333333333333333, "train_steps_per_second": 0.237, "train_true_negatives": 532, "train_true_positives": 371 }, { "epoch": 0.6617647058823529, "grad_norm": 42.0, "learning_rate": 7.436734693877552e-06, "loss": 0.3004, "step": 450 }, { "epoch": 0.7352941176470589, "grad_norm": 15.875, "learning_rate": 7.028571428571429e-06, "loss": 0.2891, "step": 500 }, { "epoch": 0.8088235294117647, "grad_norm": 52.75, "learning_rate": 6.620408163265306e-06, "loss": 0.279, "step": 550 }, { "epoch": 0.8823529411764706, "grad_norm": 29.5, "learning_rate": 6.2122448979591845e-06, "loss": 0.2745, "step": 600 }, { "epoch": 0.8955882352941177, "eval_accuracy": 0.9005057471264368, "eval_auc": 0.9620825284976229, "eval_f1": 0.8891166222586596, "eval_false_negatives": 432, "eval_false_positives": 650, "eval_loss": 0.2513802945613861, "eval_precision": 0.8696872493985566, "eval_recall": 0.909433962264151, "eval_runtime": 366.5333, "eval_samples_per_second": 29.67, "eval_specificity": 0.8935298935298935, "eval_steps_per_second": 0.232, "eval_true_negatives": 5455, "eval_true_positives": 4338, "step": 609 }, { "epoch": 0.8955882352941177, "step": 609, "train_accuracy": 0.914, "train_auc": 0.9714460029236588, "train_f1": 0.908315565031983, "train_false_negatives": 33, "train_false_positives": 53, "train_loss": 0.21909336745738983, "train_precision": 0.8893528183716075, "train_recall": 0.9281045751633987, "train_runtime": 33.7192, "train_samples_per_second": 29.657, "train_specificity": 0.9020332717190388, "train_steps_per_second": 0.237, "train_true_negatives": 488, "train_true_positives": 426 }, { "epoch": 0.9558823529411765, "grad_norm": 12.5625, "learning_rate": 5.804081632653061e-06, "loss": 0.2479, "step": 650 }, { "epoch": 1.0294117647058822, "grad_norm": 22.375, "learning_rate": 5.395918367346939e-06, "loss": 0.2153, "step": 700 }, { "epoch": 1.1029411764705883, "grad_norm": 31.75, "learning_rate": 4.987755102040817e-06, "loss": 0.1901, "step": 750 }, { "epoch": 1.1764705882352942, "grad_norm": 17.25, "learning_rate": 4.579591836734694e-06, "loss": 0.1702, "step": 800 }, { "epoch": 1.1941176470588235, "eval_accuracy": 0.918528735632184, "eval_auc": 0.9723367106385974, "eval_f1": 0.905179794520548, "eval_false_negatives": 541, "eval_false_positives": 345, "eval_loss": 0.21427848935127258, "eval_precision": 0.9245736773065151, "eval_recall": 0.8865828092243186, "eval_runtime": 366.4767, "eval_samples_per_second": 29.674, "eval_specificity": 0.9434889434889435, "eval_steps_per_second": 0.232, "eval_true_negatives": 5760, "eval_true_positives": 4229, "step": 812 }, { "epoch": 1.1941176470588235, "step": 812, "train_accuracy": 0.947, "train_auc": 0.9857519084432687, "train_f1": 0.9399773499433749, "train_false_negatives": 32, "train_false_positives": 21, "train_loss": 0.15449950098991394, "train_precision": 0.9518348623853211, "train_recall": 0.9284116331096197, "train_runtime": 33.6205, "train_samples_per_second": 29.744, "train_specificity": 0.9620253164556962, "train_steps_per_second": 0.238, "train_true_negatives": 532, "train_true_positives": 415 }, { "epoch": 1.25, "grad_norm": 16.5, "learning_rate": 4.1714285714285715e-06, "loss": 0.1766, "step": 850 }, { "epoch": 1.3235294117647058, "grad_norm": 12.5625, "learning_rate": 3.7632653061224494e-06, "loss": 0.1553, "step": 900 }, { "epoch": 1.3970588235294117, "grad_norm": 19.625, "learning_rate": 3.3551020408163272e-06, "loss": 0.1473, "step": 950 }, { "epoch": 1.4705882352941178, "grad_norm": 24.125, "learning_rate": 2.946938775510204e-06, "loss": 0.153, "step": 1000 }, { "epoch": 1.4926470588235294, "eval_accuracy": 0.9233103448275862, "eval_auc": 0.9760859143878011, "eval_f1": 0.9123028391167193, "eval_false_negatives": 432, "eval_false_positives": 402, "eval_loss": 0.1964733898639679, "eval_precision": 0.9151898734177215, "eval_recall": 0.909433962264151, "eval_runtime": 366.6051, "eval_samples_per_second": 29.664, "eval_specificity": 0.9341523341523341, "eval_steps_per_second": 0.232, "eval_true_negatives": 5703, "eval_true_positives": 4338, "step": 1015 }, { "epoch": 1.4926470588235294, "step": 1015, "train_accuracy": 0.961, "train_auc": 0.9915747155931913, "train_f1": 0.9545983701979045, "train_false_negatives": 23, "train_false_positives": 16, "train_loss": 0.1129259318113327, "train_precision": 0.9624413145539906, "train_recall": 0.9468822170900693, "train_runtime": 33.7414, "train_samples_per_second": 29.637, "train_specificity": 0.9717813051146384, "train_steps_per_second": 0.237, "train_true_negatives": 551, "train_true_positives": 410 }, { "epoch": 1.5441176470588234, "grad_norm": 14.875, "learning_rate": 2.5387755102040816e-06, "loss": 0.1513, "step": 1050 }, { "epoch": 1.6176470588235294, "grad_norm": 20.375, "learning_rate": 2.1306122448979595e-06, "loss": 0.1593, "step": 1100 }, { "epoch": 1.6911764705882353, "grad_norm": 16.5, "learning_rate": 1.722448979591837e-06, "loss": 0.1478, "step": 1150 }, { "epoch": 1.7647058823529411, "grad_norm": 13.25, "learning_rate": 1.3142857142857143e-06, "loss": 0.1399, "step": 1200 }, { "epoch": 1.7911764705882351, "eval_accuracy": 0.9249655172413793, "eval_auc": 0.9771551482872238, "eval_f1": 0.9136507936507936, "eval_false_negatives": 453, "eval_false_positives": 363, "eval_loss": 0.19318822026252747, "eval_precision": 0.9224358974358975, "eval_recall": 0.9050314465408805, "eval_runtime": 366.7824, "eval_samples_per_second": 29.65, "eval_specificity": 0.9405405405405406, "eval_steps_per_second": 0.232, "eval_true_negatives": 5742, "eval_true_positives": 4317, "step": 1218 }, { "epoch": 1.7911764705882351, "step": 1218, "train_accuracy": 0.957, "train_auc": 0.9929256882262804, "train_f1": 0.9502890173410404, "train_false_negatives": 26, "train_false_positives": 17, "train_loss": 0.11246936023235321, "train_precision": 0.9602803738317757, "train_recall": 0.9405034324942791, "train_runtime": 33.6494, "train_samples_per_second": 29.718, "train_specificity": 0.9698046181172292, "train_steps_per_second": 0.238, "train_true_negatives": 546, "train_true_positives": 411 }, { "epoch": 1.8382352941176472, "grad_norm": 22.25, "learning_rate": 9.061224489795919e-07, "loss": 0.1532, "step": 1250 }, { "epoch": 1.9117647058823528, "grad_norm": 13.375, "learning_rate": 4.979591836734694e-07, "loss": 0.1489, "step": 1300 }, { "epoch": 1.9852941176470589, "grad_norm": 18.5, "learning_rate": 8.979591836734695e-08, "loss": 0.1443, "step": 1350 } ], "logging_steps": 50, "max_steps": 1360, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 203, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.460630510863122e+18, "train_batch_size": 128, "trial_name": null, "trial_params": null }