{ "best_metric": 0.9359891558099255, "best_model_checkpoint": "./my_unified_model_classification/checkpoint-1974", "epoch": 2.0, "eval_steps": 329, "global_step": 2200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.045454545454545456, "grad_norm": 72.5, "learning_rate": 2.2831050228310503e-06, "loss": 1.0312, "step": 50 }, { "epoch": 0.09090909090909091, "grad_norm": 64.5, "learning_rate": 4.566210045662101e-06, "loss": 0.6885, "step": 100 }, { "epoch": 0.13636363636363635, "grad_norm": 65.0, "learning_rate": 6.849315068493151e-06, "loss": 0.6293, "step": 150 }, { "epoch": 0.18181818181818182, "grad_norm": 28.75, "learning_rate": 9.132420091324201e-06, "loss": 0.6143, "step": 200 }, { "epoch": 0.22727272727272727, "grad_norm": 11.125, "learning_rate": 9.843513377082282e-06, "loss": 0.5555, "step": 250 }, { "epoch": 0.2727272727272727, "grad_norm": 12.75, "learning_rate": 9.591115598182736e-06, "loss": 0.5559, "step": 300 }, { "epoch": 0.2990909090909091, "eval_accuracy": 0.7053038485589221, "eval_auc": 0.6867176541550628, "eval_f1": 0.7980049875311721, "eval_false_negatives": 2798, "eval_false_positives": 2386, "eval_loss": 0.5816290378570557, "eval_precision": 0.8110248693172818, "eval_recall": 0.7853965332106151, "eval_runtime": 619.0057, "eval_samples_per_second": 28.418, "eval_specificity": 0.47594992312760814, "eval_steps_per_second": 0.223, "eval_true_negatives": 2167, "eval_true_positives": 10240, "step": 329 }, { "epoch": 0.2990909090909091, "step": 329, "train_accuracy": 0.716, "train_auc": 0.7171233056981181, "train_f1": 0.8075880758807588, "train_false_negatives": 149, "train_false_positives": 135, "train_loss": 0.5534917116165161, "train_precision": 0.8153214774281806, "train_recall": 0.8, "train_runtime": 35.2538, "train_samples_per_second": 28.366, "train_specificity": 0.47058823529411764, "train_steps_per_second": 0.227, "train_true_negatives": 120, "train_true_positives": 596 }, { "epoch": 0.3181818181818182, "grad_norm": 40.75, "learning_rate": 9.338717819283191e-06, "loss": 0.5417, "step": 350 }, { "epoch": 0.36363636363636365, "grad_norm": 33.5, "learning_rate": 9.086320040383645e-06, "loss": 0.5218, "step": 400 }, { "epoch": 0.4090909090909091, "grad_norm": 28.625, "learning_rate": 8.8339222614841e-06, "loss": 0.4994, "step": 450 }, { "epoch": 0.45454545454545453, "grad_norm": 24.125, "learning_rate": 8.581524482584555e-06, "loss": 0.4921, "step": 500 }, { "epoch": 0.5, "grad_norm": 74.5, "learning_rate": 8.329126703685009e-06, "loss": 0.4891, "step": 550 }, { "epoch": 0.5454545454545454, "grad_norm": 33.25, "learning_rate": 8.076728924785463e-06, "loss": 0.4794, "step": 600 }, { "epoch": 0.5909090909090909, "grad_norm": 30.625, "learning_rate": 7.824331145885916e-06, "loss": 0.4549, "step": 650 }, { "epoch": 0.5981818181818181, "eval_accuracy": 0.7972827013813882, "eval_auc": 0.8168990273813824, "eval_f1": 0.864852573334344, "eval_false_negatives": 1628, "eval_false_positives": 1938, "eval_loss": 0.4511999785900116, "eval_precision": 0.8548097093197483, "eval_recall": 0.8751342230403436, "eval_runtime": 619.1094, "eval_samples_per_second": 28.413, "eval_specificity": 0.5743465846694488, "eval_steps_per_second": 0.223, "eval_true_negatives": 2615, "eval_true_positives": 11410, "step": 658 }, { "epoch": 0.5981818181818181, "step": 658, "train_accuracy": 0.799, "train_auc": 0.8304007002911323, "train_f1": 0.8620452985586822, "train_false_negatives": 88, "train_false_positives": 113, "train_loss": 0.44616425037384033, "train_precision": 0.8475033738191633, "train_recall": 0.8770949720670391, "train_runtime": 35.294, "train_samples_per_second": 28.333, "train_specificity": 0.602112676056338, "train_steps_per_second": 0.227, "train_true_negatives": 171, "train_true_positives": 628 }, { "epoch": 0.6363636363636364, "grad_norm": 17.375, "learning_rate": 7.571933366986371e-06, "loss": 0.4336, "step": 700 }, { "epoch": 0.6818181818181818, "grad_norm": 23.25, "learning_rate": 7.319535588086826e-06, "loss": 0.4089, "step": 750 }, { "epoch": 0.7272727272727273, "grad_norm": 26.25, "learning_rate": 7.06713780918728e-06, "loss": 0.3864, "step": 800 }, { "epoch": 0.7727272727272727, "grad_norm": 22.5, "learning_rate": 6.8147400302877344e-06, "loss": 0.37, "step": 850 }, { "epoch": 0.8181818181818182, "grad_norm": 37.0, "learning_rate": 6.562342251388188e-06, "loss": 0.3612, "step": 900 }, { "epoch": 0.8636363636363636, "grad_norm": 20.5, "learning_rate": 6.309944472488643e-06, "loss": 0.3456, "step": 950 }, { "epoch": 0.8972727272727272, "eval_accuracy": 0.8711841282473992, "eval_auc": 0.9114312226670747, "eval_f1": 0.9161113579150008, "eval_false_negatives": 665, "eval_false_positives": 1601, "eval_loss": 0.3193175494670868, "eval_precision": 0.8854300844425361, "eval_recall": 0.9489952446694279, "eval_runtime": 618.9502, "eval_samples_per_second": 28.421, "eval_specificity": 0.6483637162310565, "eval_steps_per_second": 0.223, "eval_true_negatives": 2952, "eval_true_positives": 12373, "step": 987 }, { "epoch": 0.8972727272727272, "step": 987, "train_accuracy": 0.893, "train_auc": 0.945685678230828, "train_f1": 0.92909211398277, "train_false_negatives": 37, "train_false_positives": 70, "train_loss": 0.256592333316803, "train_precision": 0.9092088197146563, "train_recall": 0.9498644986449865, "train_runtime": 35.2309, "train_samples_per_second": 28.384, "train_specificity": 0.732824427480916, "train_steps_per_second": 0.227, "train_true_negatives": 192, "train_true_positives": 701 }, { "epoch": 0.9090909090909091, "grad_norm": 28.75, "learning_rate": 6.057546693589097e-06, "loss": 0.3191, "step": 1000 }, { "epoch": 0.9545454545454546, "grad_norm": 30.125, "learning_rate": 5.805148914689552e-06, "loss": 0.3099, "step": 1050 }, { "epoch": 1.0, "grad_norm": 47.75, "learning_rate": 5.5527511357900055e-06, "loss": 0.2908, "step": 1100 }, { "epoch": 1.0454545454545454, "grad_norm": 29.875, "learning_rate": 5.300353356890459e-06, "loss": 0.2196, "step": 1150 }, { "epoch": 1.0909090909090908, "grad_norm": 24.875, "learning_rate": 5.047955577990914e-06, "loss": 0.2147, "step": 1200 }, { "epoch": 1.1363636363636362, "grad_norm": 19.875, "learning_rate": 4.795557799091368e-06, "loss": 0.2107, "step": 1250 }, { "epoch": 1.1818181818181819, "grad_norm": 20.5, "learning_rate": 4.543160020191823e-06, "loss": 0.212, "step": 1300 }, { "epoch": 1.1963636363636363, "eval_accuracy": 0.8913649025069638, "eval_auc": 0.9333465842314582, "eval_f1": 0.9275834628064724, "eval_false_negatives": 799, "eval_false_positives": 1112, "eval_loss": 0.28013044595718384, "eval_precision": 0.9167103587746236, "eval_recall": 0.938717594723117, "eval_runtime": 618.7723, "eval_samples_per_second": 28.429, "eval_specificity": 0.7557654293872172, "eval_steps_per_second": 0.223, "eval_true_negatives": 3441, "eval_true_positives": 12239, "step": 1316 }, { "epoch": 1.1963636363636363, "step": 1316, "train_accuracy": 0.924, "train_auc": 0.966124817361522, "train_f1": 0.9491978609625669, "train_false_negatives": 33, "train_false_positives": 43, "train_loss": 0.1971082091331482, "train_precision": 0.9428950863213812, "train_recall": 0.955585464333782, "train_runtime": 35.2125, "train_samples_per_second": 28.399, "train_specificity": 0.8326848249027238, "train_steps_per_second": 0.227, "train_true_negatives": 214, "train_true_positives": 710 }, { "epoch": 1.2272727272727273, "grad_norm": 21.5, "learning_rate": 4.290762241292277e-06, "loss": 0.2041, "step": 1350 }, { "epoch": 1.2727272727272727, "grad_norm": 24.875, "learning_rate": 4.038364462392731e-06, "loss": 0.2047, "step": 1400 }, { "epoch": 1.3181818181818181, "grad_norm": 14.1875, "learning_rate": 3.7859666834931856e-06, "loss": 0.1941, "step": 1450 }, { "epoch": 1.3636363636363638, "grad_norm": 21.0, "learning_rate": 3.53356890459364e-06, "loss": 0.1948, "step": 1500 }, { "epoch": 1.4090909090909092, "grad_norm": 24.625, "learning_rate": 3.281171125694094e-06, "loss": 0.1905, "step": 1550 }, { "epoch": 1.4545454545454546, "grad_norm": 24.125, "learning_rate": 3.0287733467945485e-06, "loss": 0.1943, "step": 1600 }, { "epoch": 1.4954545454545456, "eval_accuracy": 0.9007446989938036, "eval_auc": 0.9430535156708126, "eval_f1": 0.9344643795510847, "eval_false_negatives": 590, "eval_false_positives": 1156, "eval_loss": 0.2589464485645294, "eval_precision": 0.9150249926492208, "eval_recall": 0.9547476606841541, "eval_runtime": 619.3355, "eval_samples_per_second": 28.403, "eval_specificity": 0.7461014715572151, "eval_steps_per_second": 0.223, "eval_true_negatives": 3397, "eval_true_positives": 12448, "step": 1645 }, { "epoch": 1.4954545454545456, "step": 1645, "train_accuracy": 0.953, "train_auc": 0.9871008686077808, "train_f1": 0.9681787406905891, "train_false_negatives": 17, "train_false_positives": 30, "train_loss": 0.13334013521671295, "train_precision": 0.959731543624161, "train_recall": 0.976775956284153, "train_runtime": 35.2541, "train_samples_per_second": 28.365, "train_specificity": 0.8880597014925373, "train_steps_per_second": 0.227, "train_true_negatives": 238, "train_true_positives": 715 }, { "epoch": 1.5, "grad_norm": 23.75, "learning_rate": 2.7763755678950027e-06, "loss": 0.191, "step": 1650 }, { "epoch": 1.5454545454545454, "grad_norm": 20.625, "learning_rate": 2.523977788995457e-06, "loss": 0.1878, "step": 1700 }, { "epoch": 1.5909090909090908, "grad_norm": 19.75, "learning_rate": 2.2715800100959113e-06, "loss": 0.1813, "step": 1750 }, { "epoch": 1.6363636363636362, "grad_norm": 17.75, "learning_rate": 2.0191822311963656e-06, "loss": 0.1894, "step": 1800 }, { "epoch": 1.6818181818181817, "grad_norm": 23.75, "learning_rate": 1.76678445229682e-06, "loss": 0.1794, "step": 1850 }, { "epoch": 1.7272727272727273, "grad_norm": 24.125, "learning_rate": 1.5143866733972742e-06, "loss": 0.1862, "step": 1900 }, { "epoch": 1.7727272727272727, "grad_norm": 18.375, "learning_rate": 1.2619888944977285e-06, "loss": 0.1736, "step": 1950 }, { "epoch": 1.7945454545454544, "eval_accuracy": 0.9033596725598317, "eval_auc": 0.9469119511342725, "eval_f1": 0.9359891558099255, "eval_false_negatives": 609, "eval_false_positives": 1091, "eval_loss": 0.2513716220855713, "eval_precision": 0.9193047337278106, "eval_recall": 0.9532903819604234, "eval_runtime": 618.7166, "eval_samples_per_second": 28.431, "eval_specificity": 0.760377772896991, "eval_steps_per_second": 0.223, "eval_true_negatives": 3462, "eval_true_positives": 12429, "step": 1974 }, { "epoch": 1.7945454545454544, "step": 1974, "train_accuracy": 0.959, "train_auc": 0.9890839364523575, "train_f1": 0.9726848767488341, "train_false_negatives": 11, "train_false_positives": 30, "train_loss": 0.11946262419223785, "train_precision": 0.9605263157894737, "train_recall": 0.9851551956815114, "train_runtime": 35.2738, "train_samples_per_second": 28.35, "train_specificity": 0.8841698841698842, "train_steps_per_second": 0.227, "train_true_negatives": 229, "train_true_positives": 730 }, { "epoch": 1.8181818181818183, "grad_norm": 33.75, "learning_rate": 1.0095911155981828e-06, "loss": 0.1764, "step": 2000 }, { "epoch": 1.8636363636363638, "grad_norm": 24.25, "learning_rate": 7.571933366986371e-07, "loss": 0.1744, "step": 2050 }, { "epoch": 1.9090909090909092, "grad_norm": 31.875, "learning_rate": 5.047955577990914e-07, "loss": 0.1796, "step": 2100 }, { "epoch": 1.9545454545454546, "grad_norm": 23.25, "learning_rate": 2.523977788995457e-07, "loss": 0.1867, "step": 2150 }, { "epoch": 2.0, "grad_norm": 36.0, "learning_rate": 0.0, "loss": 0.179, "step": 2200 } ], "logging_steps": 50, "max_steps": 2200, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 329, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2065282585130959e+19, "train_batch_size": 128, "trial_name": null, "trial_params": null }