| { |
| "best_global_step": 1218, |
| "best_metric": 0.9136507936507936, |
| "best_model_checkpoint": "./my_unified_model_classification_latest_only_eng/checkpoint-1218", |
| "epoch": 2.0, |
| "eval_steps": 203, |
| "global_step": 1360, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07352941176470588, |
| "grad_norm": 61.0, |
| "learning_rate": 3.6296296296296302e-06, |
| "loss": 0.9813, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14705882352941177, |
| "grad_norm": 60.5, |
| "learning_rate": 7.333333333333333e-06, |
| "loss": 0.6174, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.22058823529411764, |
| "grad_norm": 63.0, |
| "learning_rate": 9.885714285714287e-06, |
| "loss": 0.5302, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 53.0, |
| "learning_rate": 9.477551020408164e-06, |
| "loss": 0.4742, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2985294117647059, |
| "eval_accuracy": 0.7888735632183907, |
| "eval_auc": 0.8929358689736048, |
| "eval_f1": 0.7856609410007468, |
| "eval_false_negatives": 562, |
| "eval_false_positives": 1734, |
| "eval_loss": 0.4537831246852875, |
| "eval_precision": 0.7081790642881185, |
| "eval_recall": 0.8821802935010482, |
| "eval_runtime": 366.4094, |
| "eval_samples_per_second": 29.68, |
| "eval_specificity": 0.715970515970516, |
| "eval_steps_per_second": 0.232, |
| "eval_true_negatives": 4371, |
| "eval_true_positives": 4208, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.2985294117647059, |
| "step": 203, |
| "train_accuracy": 0.811, |
| "train_auc": 0.8978184538533006, |
| "train_f1": 0.8152492668621701, |
| "train_false_negatives": 46, |
| "train_false_positives": 143, |
| "train_loss": 0.44783344864845276, |
| "train_precision": 0.7446428571428572, |
| "train_recall": 0.9006479481641468, |
| "train_runtime": 33.6776, |
| "train_samples_per_second": 29.693, |
| "train_specificity": 0.7337057728119181, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 394, |
| "train_true_positives": 417 |
| }, |
| { |
| "epoch": 0.36764705882352944, |
| "grad_norm": 28.5, |
| "learning_rate": 9.069387755102042e-06, |
| "loss": 0.4275, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4411764705882353, |
| "grad_norm": 45.75, |
| "learning_rate": 8.661224489795919e-06, |
| "loss": 0.4036, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5147058823529411, |
| "grad_norm": 51.5, |
| "learning_rate": 8.253061224489797e-06, |
| "loss": 0.3776, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 16.375, |
| "learning_rate": 7.844897959183674e-06, |
| "loss": 0.3343, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5970588235294118, |
| "eval_accuracy": 0.8777931034482759, |
| "eval_auc": 0.9478171481945067, |
| "eval_f1": 0.8573881317737955, |
| "eval_false_negatives": 775, |
| "eval_false_positives": 554, |
| "eval_loss": 0.2918412387371063, |
| "eval_precision": 0.8782149923060013, |
| "eval_recall": 0.8375262054507338, |
| "eval_runtime": 366.4435, |
| "eval_samples_per_second": 29.677, |
| "eval_specificity": 0.9092547092547093, |
| "eval_steps_per_second": 0.232, |
| "eval_true_negatives": 5551, |
| "eval_true_positives": 3995, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.5970588235294118, |
| "step": 406, |
| "train_accuracy": 0.903, |
| "train_auc": 0.9626825785393716, |
| "train_f1": 0.8843861740166865, |
| "train_false_negatives": 59, |
| "train_false_positives": 38, |
| "train_loss": 0.24663816392421722, |
| "train_precision": 0.9070904645476773, |
| "train_recall": 0.8627906976744186, |
| "train_runtime": 33.7261, |
| "train_samples_per_second": 29.651, |
| "train_specificity": 0.9333333333333333, |
| "train_steps_per_second": 0.237, |
| "train_true_negatives": 532, |
| "train_true_positives": 371 |
| }, |
| { |
| "epoch": 0.6617647058823529, |
| "grad_norm": 42.0, |
| "learning_rate": 7.436734693877552e-06, |
| "loss": 0.3004, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7352941176470589, |
| "grad_norm": 15.875, |
| "learning_rate": 7.028571428571429e-06, |
| "loss": 0.2891, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8088235294117647, |
| "grad_norm": 52.75, |
| "learning_rate": 6.620408163265306e-06, |
| "loss": 0.279, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 29.5, |
| "learning_rate": 6.2122448979591845e-06, |
| "loss": 0.2745, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8955882352941177, |
| "eval_accuracy": 0.9005057471264368, |
| "eval_auc": 0.9620825284976229, |
| "eval_f1": 0.8891166222586596, |
| "eval_false_negatives": 432, |
| "eval_false_positives": 650, |
| "eval_loss": 0.2513802945613861, |
| "eval_precision": 0.8696872493985566, |
| "eval_recall": 0.909433962264151, |
| "eval_runtime": 366.5333, |
| "eval_samples_per_second": 29.67, |
| "eval_specificity": 0.8935298935298935, |
| "eval_steps_per_second": 0.232, |
| "eval_true_negatives": 5455, |
| "eval_true_positives": 4338, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.8955882352941177, |
| "step": 609, |
| "train_accuracy": 0.914, |
| "train_auc": 0.9714460029236588, |
| "train_f1": 0.908315565031983, |
| "train_false_negatives": 33, |
| "train_false_positives": 53, |
| "train_loss": 0.21909336745738983, |
| "train_precision": 0.8893528183716075, |
| "train_recall": 0.9281045751633987, |
| "train_runtime": 33.7192, |
| "train_samples_per_second": 29.657, |
| "train_specificity": 0.9020332717190388, |
| "train_steps_per_second": 0.237, |
| "train_true_negatives": 488, |
| "train_true_positives": 426 |
| }, |
| { |
| "epoch": 0.9558823529411765, |
| "grad_norm": 12.5625, |
| "learning_rate": 5.804081632653061e-06, |
| "loss": 0.2479, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.0294117647058822, |
| "grad_norm": 22.375, |
| "learning_rate": 5.395918367346939e-06, |
| "loss": 0.2153, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.1029411764705883, |
| "grad_norm": 31.75, |
| "learning_rate": 4.987755102040817e-06, |
| "loss": 0.1901, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 17.25, |
| "learning_rate": 4.579591836734694e-06, |
| "loss": 0.1702, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.1941176470588235, |
| "eval_accuracy": 0.918528735632184, |
| "eval_auc": 0.9723367106385974, |
| "eval_f1": 0.905179794520548, |
| "eval_false_negatives": 541, |
| "eval_false_positives": 345, |
| "eval_loss": 0.21427848935127258, |
| "eval_precision": 0.9245736773065151, |
| "eval_recall": 0.8865828092243186, |
| "eval_runtime": 366.4767, |
| "eval_samples_per_second": 29.674, |
| "eval_specificity": 0.9434889434889435, |
| "eval_steps_per_second": 0.232, |
| "eval_true_negatives": 5760, |
| "eval_true_positives": 4229, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.1941176470588235, |
| "step": 812, |
| "train_accuracy": 0.947, |
| "train_auc": 0.9857519084432687, |
| "train_f1": 0.9399773499433749, |
| "train_false_negatives": 32, |
| "train_false_positives": 21, |
| "train_loss": 0.15449950098991394, |
| "train_precision": 0.9518348623853211, |
| "train_recall": 0.9284116331096197, |
| "train_runtime": 33.6205, |
| "train_samples_per_second": 29.744, |
| "train_specificity": 0.9620253164556962, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 532, |
| "train_true_positives": 415 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 16.5, |
| "learning_rate": 4.1714285714285715e-06, |
| "loss": 0.1766, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.3235294117647058, |
| "grad_norm": 12.5625, |
| "learning_rate": 3.7632653061224494e-06, |
| "loss": 0.1553, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.3970588235294117, |
| "grad_norm": 19.625, |
| "learning_rate": 3.3551020408163272e-06, |
| "loss": 0.1473, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 24.125, |
| "learning_rate": 2.946938775510204e-06, |
| "loss": 0.153, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.4926470588235294, |
| "eval_accuracy": 0.9233103448275862, |
| "eval_auc": 0.9760859143878011, |
| "eval_f1": 0.9123028391167193, |
| "eval_false_negatives": 432, |
| "eval_false_positives": 402, |
| "eval_loss": 0.1964733898639679, |
| "eval_precision": 0.9151898734177215, |
| "eval_recall": 0.909433962264151, |
| "eval_runtime": 366.6051, |
| "eval_samples_per_second": 29.664, |
| "eval_specificity": 0.9341523341523341, |
| "eval_steps_per_second": 0.232, |
| "eval_true_negatives": 5703, |
| "eval_true_positives": 4338, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.4926470588235294, |
| "step": 1015, |
| "train_accuracy": 0.961, |
| "train_auc": 0.9915747155931913, |
| "train_f1": 0.9545983701979045, |
| "train_false_negatives": 23, |
| "train_false_positives": 16, |
| "train_loss": 0.1129259318113327, |
| "train_precision": 0.9624413145539906, |
| "train_recall": 0.9468822170900693, |
| "train_runtime": 33.7414, |
| "train_samples_per_second": 29.637, |
| "train_specificity": 0.9717813051146384, |
| "train_steps_per_second": 0.237, |
| "train_true_negatives": 551, |
| "train_true_positives": 410 |
| }, |
| { |
| "epoch": 1.5441176470588234, |
| "grad_norm": 14.875, |
| "learning_rate": 2.5387755102040816e-06, |
| "loss": 0.1513, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.6176470588235294, |
| "grad_norm": 20.375, |
| "learning_rate": 2.1306122448979595e-06, |
| "loss": 0.1593, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.6911764705882353, |
| "grad_norm": 16.5, |
| "learning_rate": 1.722448979591837e-06, |
| "loss": 0.1478, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "grad_norm": 13.25, |
| "learning_rate": 1.3142857142857143e-06, |
| "loss": 0.1399, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.7911764705882351, |
| "eval_accuracy": 0.9249655172413793, |
| "eval_auc": 0.9771551482872238, |
| "eval_f1": 0.9136507936507936, |
| "eval_false_negatives": 453, |
| "eval_false_positives": 363, |
| "eval_loss": 0.19318822026252747, |
| "eval_precision": 0.9224358974358975, |
| "eval_recall": 0.9050314465408805, |
| "eval_runtime": 366.7824, |
| "eval_samples_per_second": 29.65, |
| "eval_specificity": 0.9405405405405406, |
| "eval_steps_per_second": 0.232, |
| "eval_true_negatives": 5742, |
| "eval_true_positives": 4317, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.7911764705882351, |
| "step": 1218, |
| "train_accuracy": 0.957, |
| "train_auc": 0.9929256882262804, |
| "train_f1": 0.9502890173410404, |
| "train_false_negatives": 26, |
| "train_false_positives": 17, |
| "train_loss": 0.11246936023235321, |
| "train_precision": 0.9602803738317757, |
| "train_recall": 0.9405034324942791, |
| "train_runtime": 33.6494, |
| "train_samples_per_second": 29.718, |
| "train_specificity": 0.9698046181172292, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 546, |
| "train_true_positives": 411 |
| }, |
| { |
| "epoch": 1.8382352941176472, |
| "grad_norm": 22.25, |
| "learning_rate": 9.061224489795919e-07, |
| "loss": 0.1532, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.9117647058823528, |
| "grad_norm": 13.375, |
| "learning_rate": 4.979591836734694e-07, |
| "loss": 0.1489, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.9852941176470589, |
| "grad_norm": 18.5, |
| "learning_rate": 8.979591836734695e-08, |
| "loss": 0.1443, |
| "step": 1350 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 1360, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 203, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.460630510863122e+18, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|