| { | |
| "best_metric": 0.9359891558099255, | |
| "best_model_checkpoint": "./my_unified_model_classification/checkpoint-1974", | |
| "epoch": 2.0, | |
| "eval_steps": 329, | |
| "global_step": 2200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.045454545454545456, | |
| "grad_norm": 72.5, | |
| "learning_rate": 2.2831050228310503e-06, | |
| "loss": 1.0312, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 64.5, | |
| "learning_rate": 4.566210045662101e-06, | |
| "loss": 0.6885, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13636363636363635, | |
| "grad_norm": 65.0, | |
| "learning_rate": 6.849315068493151e-06, | |
| "loss": 0.6293, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 28.75, | |
| "learning_rate": 9.132420091324201e-06, | |
| "loss": 0.6143, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 11.125, | |
| "learning_rate": 9.843513377082282e-06, | |
| "loss": 0.5555, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2727272727272727, | |
| "grad_norm": 12.75, | |
| "learning_rate": 9.591115598182736e-06, | |
| "loss": 0.5559, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2990909090909091, | |
| "eval_accuracy": 0.7053038485589221, | |
| "eval_auc": 0.6867176541550628, | |
| "eval_f1": 0.7980049875311721, | |
| "eval_false_negatives": 2798, | |
| "eval_false_positives": 2386, | |
| "eval_loss": 0.5816290378570557, | |
| "eval_precision": 0.8110248693172818, | |
| "eval_recall": 0.7853965332106151, | |
| "eval_runtime": 619.0057, | |
| "eval_samples_per_second": 28.418, | |
| "eval_specificity": 0.47594992312760814, | |
| "eval_steps_per_second": 0.223, | |
| "eval_true_negatives": 2167, | |
| "eval_true_positives": 10240, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.2990909090909091, | |
| "step": 329, | |
| "train_accuracy": 0.716, | |
| "train_auc": 0.7171233056981181, | |
| "train_f1": 0.8075880758807588, | |
| "train_false_negatives": 149, | |
| "train_false_positives": 135, | |
| "train_loss": 0.5534917116165161, | |
| "train_precision": 0.8153214774281806, | |
| "train_recall": 0.8, | |
| "train_runtime": 35.2538, | |
| "train_samples_per_second": 28.366, | |
| "train_specificity": 0.47058823529411764, | |
| "train_steps_per_second": 0.227, | |
| "train_true_negatives": 120, | |
| "train_true_positives": 596 | |
| }, | |
| { | |
| "epoch": 0.3181818181818182, | |
| "grad_norm": 40.75, | |
| "learning_rate": 9.338717819283191e-06, | |
| "loss": 0.5417, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 33.5, | |
| "learning_rate": 9.086320040383645e-06, | |
| "loss": 0.5218, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4090909090909091, | |
| "grad_norm": 28.625, | |
| "learning_rate": 8.8339222614841e-06, | |
| "loss": 0.4994, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 24.125, | |
| "learning_rate": 8.581524482584555e-06, | |
| "loss": 0.4921, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 74.5, | |
| "learning_rate": 8.329126703685009e-06, | |
| "loss": 0.4891, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 33.25, | |
| "learning_rate": 8.076728924785463e-06, | |
| "loss": 0.4794, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5909090909090909, | |
| "grad_norm": 30.625, | |
| "learning_rate": 7.824331145885916e-06, | |
| "loss": 0.4549, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5981818181818181, | |
| "eval_accuracy": 0.7972827013813882, | |
| "eval_auc": 0.8168990273813824, | |
| "eval_f1": 0.864852573334344, | |
| "eval_false_negatives": 1628, | |
| "eval_false_positives": 1938, | |
| "eval_loss": 0.4511999785900116, | |
| "eval_precision": 0.8548097093197483, | |
| "eval_recall": 0.8751342230403436, | |
| "eval_runtime": 619.1094, | |
| "eval_samples_per_second": 28.413, | |
| "eval_specificity": 0.5743465846694488, | |
| "eval_steps_per_second": 0.223, | |
| "eval_true_negatives": 2615, | |
| "eval_true_positives": 11410, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.5981818181818181, | |
| "step": 658, | |
| "train_accuracy": 0.799, | |
| "train_auc": 0.8304007002911323, | |
| "train_f1": 0.8620452985586822, | |
| "train_false_negatives": 88, | |
| "train_false_positives": 113, | |
| "train_loss": 0.44616425037384033, | |
| "train_precision": 0.8475033738191633, | |
| "train_recall": 0.8770949720670391, | |
| "train_runtime": 35.294, | |
| "train_samples_per_second": 28.333, | |
| "train_specificity": 0.602112676056338, | |
| "train_steps_per_second": 0.227, | |
| "train_true_negatives": 171, | |
| "train_true_positives": 628 | |
| }, | |
| { | |
| "epoch": 0.6363636363636364, | |
| "grad_norm": 17.375, | |
| "learning_rate": 7.571933366986371e-06, | |
| "loss": 0.4336, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 23.25, | |
| "learning_rate": 7.319535588086826e-06, | |
| "loss": 0.4089, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 26.25, | |
| "learning_rate": 7.06713780918728e-06, | |
| "loss": 0.3864, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7727272727272727, | |
| "grad_norm": 22.5, | |
| "learning_rate": 6.8147400302877344e-06, | |
| "loss": 0.37, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8181818181818182, | |
| "grad_norm": 37.0, | |
| "learning_rate": 6.562342251388188e-06, | |
| "loss": 0.3612, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8636363636363636, | |
| "grad_norm": 20.5, | |
| "learning_rate": 6.309944472488643e-06, | |
| "loss": 0.3456, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8972727272727272, | |
| "eval_accuracy": 0.8711841282473992, | |
| "eval_auc": 0.9114312226670747, | |
| "eval_f1": 0.9161113579150008, | |
| "eval_false_negatives": 665, | |
| "eval_false_positives": 1601, | |
| "eval_loss": 0.3193175494670868, | |
| "eval_precision": 0.8854300844425361, | |
| "eval_recall": 0.9489952446694279, | |
| "eval_runtime": 618.9502, | |
| "eval_samples_per_second": 28.421, | |
| "eval_specificity": 0.6483637162310565, | |
| "eval_steps_per_second": 0.223, | |
| "eval_true_negatives": 2952, | |
| "eval_true_positives": 12373, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.8972727272727272, | |
| "step": 987, | |
| "train_accuracy": 0.893, | |
| "train_auc": 0.945685678230828, | |
| "train_f1": 0.92909211398277, | |
| "train_false_negatives": 37, | |
| "train_false_positives": 70, | |
| "train_loss": 0.256592333316803, | |
| "train_precision": 0.9092088197146563, | |
| "train_recall": 0.9498644986449865, | |
| "train_runtime": 35.2309, | |
| "train_samples_per_second": 28.384, | |
| "train_specificity": 0.732824427480916, | |
| "train_steps_per_second": 0.227, | |
| "train_true_negatives": 192, | |
| "train_true_positives": 701 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 28.75, | |
| "learning_rate": 6.057546693589097e-06, | |
| "loss": 0.3191, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9545454545454546, | |
| "grad_norm": 30.125, | |
| "learning_rate": 5.805148914689552e-06, | |
| "loss": 0.3099, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 47.75, | |
| "learning_rate": 5.5527511357900055e-06, | |
| "loss": 0.2908, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.0454545454545454, | |
| "grad_norm": 29.875, | |
| "learning_rate": 5.300353356890459e-06, | |
| "loss": 0.2196, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 24.875, | |
| "learning_rate": 5.047955577990914e-06, | |
| "loss": 0.2147, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.1363636363636362, | |
| "grad_norm": 19.875, | |
| "learning_rate": 4.795557799091368e-06, | |
| "loss": 0.2107, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.1818181818181819, | |
| "grad_norm": 20.5, | |
| "learning_rate": 4.543160020191823e-06, | |
| "loss": 0.212, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.1963636363636363, | |
| "eval_accuracy": 0.8913649025069638, | |
| "eval_auc": 0.9333465842314582, | |
| "eval_f1": 0.9275834628064724, | |
| "eval_false_negatives": 799, | |
| "eval_false_positives": 1112, | |
| "eval_loss": 0.28013044595718384, | |
| "eval_precision": 0.9167103587746236, | |
| "eval_recall": 0.938717594723117, | |
| "eval_runtime": 618.7723, | |
| "eval_samples_per_second": 28.429, | |
| "eval_specificity": 0.7557654293872172, | |
| "eval_steps_per_second": 0.223, | |
| "eval_true_negatives": 3441, | |
| "eval_true_positives": 12239, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.1963636363636363, | |
| "step": 1316, | |
| "train_accuracy": 0.924, | |
| "train_auc": 0.966124817361522, | |
| "train_f1": 0.9491978609625669, | |
| "train_false_negatives": 33, | |
| "train_false_positives": 43, | |
| "train_loss": 0.1971082091331482, | |
| "train_precision": 0.9428950863213812, | |
| "train_recall": 0.955585464333782, | |
| "train_runtime": 35.2125, | |
| "train_samples_per_second": 28.399, | |
| "train_specificity": 0.8326848249027238, | |
| "train_steps_per_second": 0.227, | |
| "train_true_negatives": 214, | |
| "train_true_positives": 710 | |
| }, | |
| { | |
| "epoch": 1.2272727272727273, | |
| "grad_norm": 21.5, | |
| "learning_rate": 4.290762241292277e-06, | |
| "loss": 0.2041, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 24.875, | |
| "learning_rate": 4.038364462392731e-06, | |
| "loss": 0.2047, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.3181818181818181, | |
| "grad_norm": 14.1875, | |
| "learning_rate": 3.7859666834931856e-06, | |
| "loss": 0.1941, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 21.0, | |
| "learning_rate": 3.53356890459364e-06, | |
| "loss": 0.1948, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4090909090909092, | |
| "grad_norm": 24.625, | |
| "learning_rate": 3.281171125694094e-06, | |
| "loss": 0.1905, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 24.125, | |
| "learning_rate": 3.0287733467945485e-06, | |
| "loss": 0.1943, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.4954545454545456, | |
| "eval_accuracy": 0.9007446989938036, | |
| "eval_auc": 0.9430535156708126, | |
| "eval_f1": 0.9344643795510847, | |
| "eval_false_negatives": 590, | |
| "eval_false_positives": 1156, | |
| "eval_loss": 0.2589464485645294, | |
| "eval_precision": 0.9150249926492208, | |
| "eval_recall": 0.9547476606841541, | |
| "eval_runtime": 619.3355, | |
| "eval_samples_per_second": 28.403, | |
| "eval_specificity": 0.7461014715572151, | |
| "eval_steps_per_second": 0.223, | |
| "eval_true_negatives": 3397, | |
| "eval_true_positives": 12448, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 1.4954545454545456, | |
| "step": 1645, | |
| "train_accuracy": 0.953, | |
| "train_auc": 0.9871008686077808, | |
| "train_f1": 0.9681787406905891, | |
| "train_false_negatives": 17, | |
| "train_false_positives": 30, | |
| "train_loss": 0.13334013521671295, | |
| "train_precision": 0.959731543624161, | |
| "train_recall": 0.976775956284153, | |
| "train_runtime": 35.2541, | |
| "train_samples_per_second": 28.365, | |
| "train_specificity": 0.8880597014925373, | |
| "train_steps_per_second": 0.227, | |
| "train_true_negatives": 238, | |
| "train_true_positives": 715 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 23.75, | |
| "learning_rate": 2.7763755678950027e-06, | |
| "loss": 0.191, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.5454545454545454, | |
| "grad_norm": 20.625, | |
| "learning_rate": 2.523977788995457e-06, | |
| "loss": 0.1878, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.5909090909090908, | |
| "grad_norm": 19.75, | |
| "learning_rate": 2.2715800100959113e-06, | |
| "loss": 0.1813, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 17.75, | |
| "learning_rate": 2.0191822311963656e-06, | |
| "loss": 0.1894, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.6818181818181817, | |
| "grad_norm": 23.75, | |
| "learning_rate": 1.76678445229682e-06, | |
| "loss": 0.1794, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 24.125, | |
| "learning_rate": 1.5143866733972742e-06, | |
| "loss": 0.1862, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.7727272727272727, | |
| "grad_norm": 18.375, | |
| "learning_rate": 1.2619888944977285e-06, | |
| "loss": 0.1736, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.7945454545454544, | |
| "eval_accuracy": 0.9033596725598317, | |
| "eval_auc": 0.9469119511342725, | |
| "eval_f1": 0.9359891558099255, | |
| "eval_false_negatives": 609, | |
| "eval_false_positives": 1091, | |
| "eval_loss": 0.2513716220855713, | |
| "eval_precision": 0.9193047337278106, | |
| "eval_recall": 0.9532903819604234, | |
| "eval_runtime": 618.7166, | |
| "eval_samples_per_second": 28.431, | |
| "eval_specificity": 0.760377772896991, | |
| "eval_steps_per_second": 0.223, | |
| "eval_true_negatives": 3462, | |
| "eval_true_positives": 12429, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 1.7945454545454544, | |
| "step": 1974, | |
| "train_accuracy": 0.959, | |
| "train_auc": 0.9890839364523575, | |
| "train_f1": 0.9726848767488341, | |
| "train_false_negatives": 11, | |
| "train_false_positives": 30, | |
| "train_loss": 0.11946262419223785, | |
| "train_precision": 0.9605263157894737, | |
| "train_recall": 0.9851551956815114, | |
| "train_runtime": 35.2738, | |
| "train_samples_per_second": 28.35, | |
| "train_specificity": 0.8841698841698842, | |
| "train_steps_per_second": 0.227, | |
| "train_true_negatives": 229, | |
| "train_true_positives": 730 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 33.75, | |
| "learning_rate": 1.0095911155981828e-06, | |
| "loss": 0.1764, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.8636363636363638, | |
| "grad_norm": 24.25, | |
| "learning_rate": 7.571933366986371e-07, | |
| "loss": 0.1744, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.9090909090909092, | |
| "grad_norm": 31.875, | |
| "learning_rate": 5.047955577990914e-07, | |
| "loss": 0.1796, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.9545454545454546, | |
| "grad_norm": 23.25, | |
| "learning_rate": 2.523977788995457e-07, | |
| "loss": 0.1867, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 36.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.179, | |
| "step": 2200 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 329, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2065282585130959e+19, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |