{ "best_global_step": 1728, "best_metric": 0.9399001064439532, "best_model_checkpoint": "./my_unified_model_classification_4_6_10/checkpoint-1728", "epoch": 1.7962577962577964, "eval_steps": 288, "global_step": 1728, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05197505197505198, "grad_norm": 42.5, "learning_rate": 2.5520833333333334e-06, "loss": 0.955, "step": 50 }, { "epoch": 0.10395010395010396, "grad_norm": 37.5, "learning_rate": 5.156250000000001e-06, "loss": 0.6218, "step": 100 }, { "epoch": 0.15592515592515593, "grad_norm": 154.0, "learning_rate": 7.760416666666666e-06, "loss": 0.5542, "step": 150 }, { "epoch": 0.2079002079002079, "grad_norm": 107.0, "learning_rate": 9.95958429561201e-06, "loss": 0.581, "step": 200 }, { "epoch": 0.2598752598752599, "grad_norm": 14.75, "learning_rate": 9.670900692840648e-06, "loss": 0.5363, "step": 250 }, { "epoch": 0.2993762993762994, "eval_accuracy": 0.7968526466380543, "eval_auc": 0.723546321417172, "eval_f1": 0.882882207392967, "eval_false_negatives": 275, "eval_false_positives": 2849, "eval_loss": 0.479750394821167, "eval_precision": 0.8051832603938731, "eval_recall": 0.9771784232365145, "eval_runtime": 515.9489, "eval_samples_per_second": 29.805, "eval_specificity": 0.14393028846153846, "eval_steps_per_second": 0.235, "eval_true_negatives": 479, "eval_true_positives": 11775, "step": 288 }, { "epoch": 0.2993762993762994, "step": 288, "train_accuracy": 0.803, "train_auc": 0.7153718677156178, "train_f1": 0.8873642081189251, "train_false_negatives": 16, "train_false_positives": 181, "train_loss": 0.4671786427497864, "train_precision": 0.8108672936259144, "train_recall": 0.9797979797979798, "train_runtime": 33.5789, "train_samples_per_second": 29.781, "train_specificity": 0.12980769230769232, "train_steps_per_second": 0.238, "train_true_negatives": 27, "train_true_positives": 776 }, { "epoch": 0.31185031185031187, "grad_norm": 37.75, "learning_rate": 9.382217090069284e-06, "loss": 0.4916, "step": 300 }, { "epoch": 0.36382536382536385, "grad_norm": 8.75, "learning_rate": 9.093533487297921e-06, "loss": 0.4755, "step": 350 }, { "epoch": 0.4158004158004158, "grad_norm": 48.0, "learning_rate": 8.804849884526559e-06, "loss": 0.4985, "step": 400 }, { "epoch": 0.4677754677754678, "grad_norm": 18.625, "learning_rate": 8.516166281755197e-06, "loss": 0.4602, "step": 450 }, { "epoch": 0.5197505197505198, "grad_norm": 33.0, "learning_rate": 8.227482678983834e-06, "loss": 0.4592, "step": 500 }, { "epoch": 0.5717255717255717, "grad_norm": 45.5, "learning_rate": 7.938799076212472e-06, "loss": 0.4701, "step": 550 }, { "epoch": 0.5987525987525988, "eval_accuracy": 0.807192092599818, "eval_auc": 0.7876185465209066, "eval_f1": 0.8887387894480093, "eval_false_negatives": 208, "eval_false_positives": 2757, "eval_loss": 0.4651535153388977, "eval_precision": 0.8111514487293651, "eval_recall": 0.9827385892116183, "eval_runtime": 516.3506, "eval_samples_per_second": 29.782, "eval_specificity": 0.17157451923076922, "eval_steps_per_second": 0.234, "eval_true_negatives": 571, "eval_true_positives": 11842, "step": 576 }, { "epoch": 0.5987525987525988, "step": 576, "train_accuracy": 0.824, "train_auc": 0.8292374924653406, "train_f1": 0.8983833718244804, "train_false_negatives": 12, "train_false_positives": 164, "train_loss": 0.4171445071697235, "train_precision": 0.8259023354564756, "train_recall": 0.9848101265822785, "train_runtime": 33.5723, "train_samples_per_second": 29.786, "train_specificity": 0.21904761904761905, "train_steps_per_second": 0.238, "train_true_negatives": 46, "train_true_positives": 778 }, { "epoch": 0.6237006237006237, "grad_norm": 9.5625, "learning_rate": 7.650115473441108e-06, "loss": 0.4696, "step": 600 }, { "epoch": 0.6756756756756757, "grad_norm": 28.125, "learning_rate": 7.3614318706697466e-06, "loss": 0.4301, "step": 650 }, { "epoch": 0.7276507276507277, "grad_norm": 20.5, "learning_rate": 7.072748267898384e-06, "loss": 0.4081, "step": 700 }, { "epoch": 0.7796257796257796, "grad_norm": 23.375, "learning_rate": 6.784064665127021e-06, "loss": 0.3879, "step": 750 }, { "epoch": 0.8316008316008316, "grad_norm": 21.5, "learning_rate": 6.495381062355659e-06, "loss": 0.3572, "step": 800 }, { "epoch": 0.8835758835758836, "grad_norm": 62.25, "learning_rate": 6.2066974595842965e-06, "loss": 0.3478, "step": 850 }, { "epoch": 0.8981288981288982, "eval_accuracy": 0.862205748471843, "eval_auc": 0.8848185644749442, "eval_f1": 0.915668404505114, "eval_false_negatives": 546, "eval_false_positives": 1573, "eval_loss": 0.3326202929019928, "eval_precision": 0.8797124722795748, "eval_recall": 0.9546887966804979, "eval_runtime": 516.3368, "eval_samples_per_second": 29.783, "eval_specificity": 0.52734375, "eval_steps_per_second": 0.234, "eval_true_negatives": 1755, "eval_true_positives": 11504, "step": 864 }, { "epoch": 0.8981288981288982, "step": 864, "train_accuracy": 0.902, "train_auc": 0.9281183226495727, "train_f1": 0.939877300613497, "train_false_negatives": 26, "train_false_positives": 72, "train_loss": 0.2601640224456787, "train_precision": 0.9140811455847255, "train_recall": 0.9671717171717171, "train_runtime": 33.5805, "train_samples_per_second": 29.779, "train_specificity": 0.6538461538461539, "train_steps_per_second": 0.238, "train_true_negatives": 136, "train_true_positives": 766 }, { "epoch": 0.9355509355509356, "grad_norm": 16.25, "learning_rate": 5.918013856812933e-06, "loss": 0.3391, "step": 900 }, { "epoch": 0.9875259875259875, "grad_norm": 20.75, "learning_rate": 5.629330254041571e-06, "loss": 0.3058, "step": 950 }, { "epoch": 1.0395010395010396, "grad_norm": 18.5, "learning_rate": 5.340646651270208e-06, "loss": 0.2592, "step": 1000 }, { "epoch": 1.0914760914760915, "grad_norm": 21.875, "learning_rate": 5.0519630484988455e-06, "loss": 0.2312, "step": 1050 }, { "epoch": 1.1434511434511434, "grad_norm": 17.375, "learning_rate": 4.763279445727483e-06, "loss": 0.2124, "step": 1100 }, { "epoch": 1.1954261954261955, "grad_norm": 13.5625, "learning_rate": 4.47459584295612e-06, "loss": 0.2181, "step": 1150 }, { "epoch": 1.1975051975051976, "eval_accuracy": 0.889192352711666, "eval_auc": 0.9218037947853494, "eval_f1": 0.9310902620511161, "eval_false_negatives": 538, "eval_false_positives": 1166, "eval_loss": 0.283740758895874, "eval_precision": 0.9080296576747121, "eval_recall": 0.9553526970954357, "eval_runtime": 516.4754, "eval_samples_per_second": 29.775, "eval_specificity": 0.6496394230769231, "eval_steps_per_second": 0.234, "eval_true_negatives": 2162, "eval_true_positives": 11512, "step": 1152 }, { "epoch": 1.1975051975051976, "step": 1152, "train_accuracy": 0.935, "train_auc": 0.9640254927047857, "train_f1": 0.9596523898199876, "train_false_negatives": 16, "train_false_positives": 49, "train_loss": 0.1742788702249527, "train_precision": 0.940389294403893, "train_recall": 0.9797211660329531, "train_runtime": 33.4133, "train_samples_per_second": 29.928, "train_specificity": 0.7677725118483413, "train_steps_per_second": 0.239, "train_true_negatives": 162, "train_true_positives": 773 }, { "epoch": 1.2474012474012475, "grad_norm": 21.5, "learning_rate": 4.185912240184758e-06, "loss": 0.2036, "step": 1200 }, { "epoch": 1.2993762993762994, "grad_norm": 20.125, "learning_rate": 3.897228637413395e-06, "loss": 0.2052, "step": 1250 }, { "epoch": 1.3513513513513513, "grad_norm": 13.9375, "learning_rate": 3.6085450346420327e-06, "loss": 0.2054, "step": 1300 }, { "epoch": 1.4033264033264032, "grad_norm": 25.125, "learning_rate": 3.31986143187067e-06, "loss": 0.196, "step": 1350 }, { "epoch": 1.4553014553014554, "grad_norm": 16.75, "learning_rate": 3.0311778290993072e-06, "loss": 0.1987, "step": 1400 }, { "epoch": 1.496881496881497, "eval_accuracy": 0.9001820782936663, "eval_auc": 0.9344650070818704, "eval_f1": 0.9372829417773239, "eval_false_negatives": 580, "eval_false_positives": 955, "eval_loss": 0.256587952375412, "eval_precision": 0.9231388329979879, "eval_recall": 0.9518672199170124, "eval_runtime": 516.6281, "eval_samples_per_second": 29.766, "eval_specificity": 0.7130408653846154, "eval_steps_per_second": 0.234, "eval_true_negatives": 2373, "eval_true_positives": 11470, "step": 1440 }, { "epoch": 1.496881496881497, "step": 1440, "train_accuracy": 0.931, "train_auc": 0.9717205013621509, "train_f1": 0.9559105431309904, "train_false_negatives": 23, "train_false_positives": 46, "train_loss": 0.17544881999492645, "train_precision": 0.9420654911838791, "train_recall": 0.9701686121919585, "train_runtime": 33.4556, "train_samples_per_second": 29.89, "train_specificity": 0.7991266375545851, "train_steps_per_second": 0.239, "train_true_negatives": 183, "train_true_positives": 748 }, { "epoch": 1.5072765072765073, "grad_norm": 21.625, "learning_rate": 2.742494226327945e-06, "loss": 0.1915, "step": 1450 }, { "epoch": 1.5592515592515592, "grad_norm": 27.625, "learning_rate": 2.453810623556582e-06, "loss": 0.1904, "step": 1500 }, { "epoch": 1.6112266112266114, "grad_norm": 19.625, "learning_rate": 2.1651270207852194e-06, "loss": 0.1888, "step": 1550 }, { "epoch": 1.6632016632016633, "grad_norm": 18.5, "learning_rate": 1.876443418013857e-06, "loss": 0.1863, "step": 1600 }, { "epoch": 1.7151767151767152, "grad_norm": 23.25, "learning_rate": 1.5877598152424944e-06, "loss": 0.1854, "step": 1650 }, { "epoch": 1.7671517671517671, "grad_norm": 12.5625, "learning_rate": 1.2990762124711317e-06, "loss": 0.1911, "step": 1700 }, { "epoch": 1.7962577962577964, "eval_accuracy": 0.9045389517492521, "eval_auc": 0.9388098093879668, "eval_f1": 0.9399001064439532, "eval_false_negatives": 571, "eval_false_positives": 897, "eval_loss": 0.24762538075447083, "eval_precision": 0.9275210084033614, "eval_recall": 0.9526141078838174, "eval_runtime": 516.4828, "eval_samples_per_second": 29.774, "eval_specificity": 0.73046875, "eval_steps_per_second": 0.234, "eval_true_negatives": 2431, "eval_true_positives": 11479, "step": 1728 }, { "epoch": 1.7962577962577964, "step": 1728, "train_accuracy": 0.952, "train_auc": 0.9798224148344634, "train_f1": 0.9702970297029703, "train_false_negatives": 15, "train_false_positives": 33, "train_loss": 0.13925302028656006, "train_precision": 0.9596083231334149, "train_recall": 0.981226533166458, "train_runtime": 33.5792, "train_samples_per_second": 29.78, "train_specificity": 0.835820895522388, "train_steps_per_second": 0.238, "train_true_negatives": 168, "train_true_positives": 784 } ], "logging_steps": 50, "max_steps": 1924, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 288, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.478290865715675e+18, "train_batch_size": 128, "trial_name": null, "trial_params": null }