| { | |
| "best_global_step": 1728, | |
| "best_metric": 0.9399001064439532, | |
| "best_model_checkpoint": "./my_unified_model_classification_4_6_10/checkpoint-1728", | |
| "epoch": 1.7962577962577964, | |
| "eval_steps": 288, | |
| "global_step": 1728, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05197505197505198, | |
| "grad_norm": 42.5, | |
| "learning_rate": 2.5520833333333334e-06, | |
| "loss": 0.955, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10395010395010396, | |
| "grad_norm": 37.5, | |
| "learning_rate": 5.156250000000001e-06, | |
| "loss": 0.6218, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15592515592515593, | |
| "grad_norm": 154.0, | |
| "learning_rate": 7.760416666666666e-06, | |
| "loss": 0.5542, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2079002079002079, | |
| "grad_norm": 107.0, | |
| "learning_rate": 9.95958429561201e-06, | |
| "loss": 0.581, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2598752598752599, | |
| "grad_norm": 14.75, | |
| "learning_rate": 9.670900692840648e-06, | |
| "loss": 0.5363, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2993762993762994, | |
| "eval_accuracy": 0.7968526466380543, | |
| "eval_auc": 0.723546321417172, | |
| "eval_f1": 0.882882207392967, | |
| "eval_false_negatives": 275, | |
| "eval_false_positives": 2849, | |
| "eval_loss": 0.479750394821167, | |
| "eval_precision": 0.8051832603938731, | |
| "eval_recall": 0.9771784232365145, | |
| "eval_runtime": 515.9489, | |
| "eval_samples_per_second": 29.805, | |
| "eval_specificity": 0.14393028846153846, | |
| "eval_steps_per_second": 0.235, | |
| "eval_true_negatives": 479, | |
| "eval_true_positives": 11775, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.2993762993762994, | |
| "step": 288, | |
| "train_accuracy": 0.803, | |
| "train_auc": 0.7153718677156178, | |
| "train_f1": 0.8873642081189251, | |
| "train_false_negatives": 16, | |
| "train_false_positives": 181, | |
| "train_loss": 0.4671786427497864, | |
| "train_precision": 0.8108672936259144, | |
| "train_recall": 0.9797979797979798, | |
| "train_runtime": 33.5789, | |
| "train_samples_per_second": 29.781, | |
| "train_specificity": 0.12980769230769232, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 27, | |
| "train_true_positives": 776 | |
| }, | |
| { | |
| "epoch": 0.31185031185031187, | |
| "grad_norm": 37.75, | |
| "learning_rate": 9.382217090069284e-06, | |
| "loss": 0.4916, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.36382536382536385, | |
| "grad_norm": 8.75, | |
| "learning_rate": 9.093533487297921e-06, | |
| "loss": 0.4755, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4158004158004158, | |
| "grad_norm": 48.0, | |
| "learning_rate": 8.804849884526559e-06, | |
| "loss": 0.4985, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4677754677754678, | |
| "grad_norm": 18.625, | |
| "learning_rate": 8.516166281755197e-06, | |
| "loss": 0.4602, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5197505197505198, | |
| "grad_norm": 33.0, | |
| "learning_rate": 8.227482678983834e-06, | |
| "loss": 0.4592, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5717255717255717, | |
| "grad_norm": 45.5, | |
| "learning_rate": 7.938799076212472e-06, | |
| "loss": 0.4701, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5987525987525988, | |
| "eval_accuracy": 0.807192092599818, | |
| "eval_auc": 0.7876185465209066, | |
| "eval_f1": 0.8887387894480093, | |
| "eval_false_negatives": 208, | |
| "eval_false_positives": 2757, | |
| "eval_loss": 0.4651535153388977, | |
| "eval_precision": 0.8111514487293651, | |
| "eval_recall": 0.9827385892116183, | |
| "eval_runtime": 516.3506, | |
| "eval_samples_per_second": 29.782, | |
| "eval_specificity": 0.17157451923076922, | |
| "eval_steps_per_second": 0.234, | |
| "eval_true_negatives": 571, | |
| "eval_true_positives": 11842, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.5987525987525988, | |
| "step": 576, | |
| "train_accuracy": 0.824, | |
| "train_auc": 0.8292374924653406, | |
| "train_f1": 0.8983833718244804, | |
| "train_false_negatives": 12, | |
| "train_false_positives": 164, | |
| "train_loss": 0.4171445071697235, | |
| "train_precision": 0.8259023354564756, | |
| "train_recall": 0.9848101265822785, | |
| "train_runtime": 33.5723, | |
| "train_samples_per_second": 29.786, | |
| "train_specificity": 0.21904761904761905, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 46, | |
| "train_true_positives": 778 | |
| }, | |
| { | |
| "epoch": 0.6237006237006237, | |
| "grad_norm": 9.5625, | |
| "learning_rate": 7.650115473441108e-06, | |
| "loss": 0.4696, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 28.125, | |
| "learning_rate": 7.3614318706697466e-06, | |
| "loss": 0.4301, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7276507276507277, | |
| "grad_norm": 20.5, | |
| "learning_rate": 7.072748267898384e-06, | |
| "loss": 0.4081, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7796257796257796, | |
| "grad_norm": 23.375, | |
| "learning_rate": 6.784064665127021e-06, | |
| "loss": 0.3879, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8316008316008316, | |
| "grad_norm": 21.5, | |
| "learning_rate": 6.495381062355659e-06, | |
| "loss": 0.3572, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8835758835758836, | |
| "grad_norm": 62.25, | |
| "learning_rate": 6.2066974595842965e-06, | |
| "loss": 0.3478, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8981288981288982, | |
| "eval_accuracy": 0.862205748471843, | |
| "eval_auc": 0.8848185644749442, | |
| "eval_f1": 0.915668404505114, | |
| "eval_false_negatives": 546, | |
| "eval_false_positives": 1573, | |
| "eval_loss": 0.3326202929019928, | |
| "eval_precision": 0.8797124722795748, | |
| "eval_recall": 0.9546887966804979, | |
| "eval_runtime": 516.3368, | |
| "eval_samples_per_second": 29.783, | |
| "eval_specificity": 0.52734375, | |
| "eval_steps_per_second": 0.234, | |
| "eval_true_negatives": 1755, | |
| "eval_true_positives": 11504, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.8981288981288982, | |
| "step": 864, | |
| "train_accuracy": 0.902, | |
| "train_auc": 0.9281183226495727, | |
| "train_f1": 0.939877300613497, | |
| "train_false_negatives": 26, | |
| "train_false_positives": 72, | |
| "train_loss": 0.2601640224456787, | |
| "train_precision": 0.9140811455847255, | |
| "train_recall": 0.9671717171717171, | |
| "train_runtime": 33.5805, | |
| "train_samples_per_second": 29.779, | |
| "train_specificity": 0.6538461538461539, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 136, | |
| "train_true_positives": 766 | |
| }, | |
| { | |
| "epoch": 0.9355509355509356, | |
| "grad_norm": 16.25, | |
| "learning_rate": 5.918013856812933e-06, | |
| "loss": 0.3391, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9875259875259875, | |
| "grad_norm": 20.75, | |
| "learning_rate": 5.629330254041571e-06, | |
| "loss": 0.3058, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.0395010395010396, | |
| "grad_norm": 18.5, | |
| "learning_rate": 5.340646651270208e-06, | |
| "loss": 0.2592, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0914760914760915, | |
| "grad_norm": 21.875, | |
| "learning_rate": 5.0519630484988455e-06, | |
| "loss": 0.2312, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.1434511434511434, | |
| "grad_norm": 17.375, | |
| "learning_rate": 4.763279445727483e-06, | |
| "loss": 0.2124, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.1954261954261955, | |
| "grad_norm": 13.5625, | |
| "learning_rate": 4.47459584295612e-06, | |
| "loss": 0.2181, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.1975051975051976, | |
| "eval_accuracy": 0.889192352711666, | |
| "eval_auc": 0.9218037947853494, | |
| "eval_f1": 0.9310902620511161, | |
| "eval_false_negatives": 538, | |
| "eval_false_positives": 1166, | |
| "eval_loss": 0.283740758895874, | |
| "eval_precision": 0.9080296576747121, | |
| "eval_recall": 0.9553526970954357, | |
| "eval_runtime": 516.4754, | |
| "eval_samples_per_second": 29.775, | |
| "eval_specificity": 0.6496394230769231, | |
| "eval_steps_per_second": 0.234, | |
| "eval_true_negatives": 2162, | |
| "eval_true_positives": 11512, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.1975051975051976, | |
| "step": 1152, | |
| "train_accuracy": 0.935, | |
| "train_auc": 0.9640254927047857, | |
| "train_f1": 0.9596523898199876, | |
| "train_false_negatives": 16, | |
| "train_false_positives": 49, | |
| "train_loss": 0.1742788702249527, | |
| "train_precision": 0.940389294403893, | |
| "train_recall": 0.9797211660329531, | |
| "train_runtime": 33.4133, | |
| "train_samples_per_second": 29.928, | |
| "train_specificity": 0.7677725118483413, | |
| "train_steps_per_second": 0.239, | |
| "train_true_negatives": 162, | |
| "train_true_positives": 773 | |
| }, | |
| { | |
| "epoch": 1.2474012474012475, | |
| "grad_norm": 21.5, | |
| "learning_rate": 4.185912240184758e-06, | |
| "loss": 0.2036, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.2993762993762994, | |
| "grad_norm": 20.125, | |
| "learning_rate": 3.897228637413395e-06, | |
| "loss": 0.2052, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "grad_norm": 13.9375, | |
| "learning_rate": 3.6085450346420327e-06, | |
| "loss": 0.2054, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.4033264033264032, | |
| "grad_norm": 25.125, | |
| "learning_rate": 3.31986143187067e-06, | |
| "loss": 0.196, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.4553014553014554, | |
| "grad_norm": 16.75, | |
| "learning_rate": 3.0311778290993072e-06, | |
| "loss": 0.1987, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.496881496881497, | |
| "eval_accuracy": 0.9001820782936663, | |
| "eval_auc": 0.9344650070818704, | |
| "eval_f1": 0.9372829417773239, | |
| "eval_false_negatives": 580, | |
| "eval_false_positives": 955, | |
| "eval_loss": 0.256587952375412, | |
| "eval_precision": 0.9231388329979879, | |
| "eval_recall": 0.9518672199170124, | |
| "eval_runtime": 516.6281, | |
| "eval_samples_per_second": 29.766, | |
| "eval_specificity": 0.7130408653846154, | |
| "eval_steps_per_second": 0.234, | |
| "eval_true_negatives": 2373, | |
| "eval_true_positives": 11470, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.496881496881497, | |
| "step": 1440, | |
| "train_accuracy": 0.931, | |
| "train_auc": 0.9717205013621509, | |
| "train_f1": 0.9559105431309904, | |
| "train_false_negatives": 23, | |
| "train_false_positives": 46, | |
| "train_loss": 0.17544881999492645, | |
| "train_precision": 0.9420654911838791, | |
| "train_recall": 0.9701686121919585, | |
| "train_runtime": 33.4556, | |
| "train_samples_per_second": 29.89, | |
| "train_specificity": 0.7991266375545851, | |
| "train_steps_per_second": 0.239, | |
| "train_true_negatives": 183, | |
| "train_true_positives": 748 | |
| }, | |
| { | |
| "epoch": 1.5072765072765073, | |
| "grad_norm": 21.625, | |
| "learning_rate": 2.742494226327945e-06, | |
| "loss": 0.1915, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.5592515592515592, | |
| "grad_norm": 27.625, | |
| "learning_rate": 2.453810623556582e-06, | |
| "loss": 0.1904, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.6112266112266114, | |
| "grad_norm": 19.625, | |
| "learning_rate": 2.1651270207852194e-06, | |
| "loss": 0.1888, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.6632016632016633, | |
| "grad_norm": 18.5, | |
| "learning_rate": 1.876443418013857e-06, | |
| "loss": 0.1863, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.7151767151767152, | |
| "grad_norm": 23.25, | |
| "learning_rate": 1.5877598152424944e-06, | |
| "loss": 0.1854, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.7671517671517671, | |
| "grad_norm": 12.5625, | |
| "learning_rate": 1.2990762124711317e-06, | |
| "loss": 0.1911, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.7962577962577964, | |
| "eval_accuracy": 0.9045389517492521, | |
| "eval_auc": 0.9388098093879668, | |
| "eval_f1": 0.9399001064439532, | |
| "eval_false_negatives": 571, | |
| "eval_false_positives": 897, | |
| "eval_loss": 0.24762538075447083, | |
| "eval_precision": 0.9275210084033614, | |
| "eval_recall": 0.9526141078838174, | |
| "eval_runtime": 516.4828, | |
| "eval_samples_per_second": 29.774, | |
| "eval_specificity": 0.73046875, | |
| "eval_steps_per_second": 0.234, | |
| "eval_true_negatives": 2431, | |
| "eval_true_positives": 11479, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 1.7962577962577964, | |
| "step": 1728, | |
| "train_accuracy": 0.952, | |
| "train_auc": 0.9798224148344634, | |
| "train_f1": 0.9702970297029703, | |
| "train_false_negatives": 15, | |
| "train_false_positives": 33, | |
| "train_loss": 0.13925302028656006, | |
| "train_precision": 0.9596083231334149, | |
| "train_recall": 0.981226533166458, | |
| "train_runtime": 33.5792, | |
| "train_samples_per_second": 29.78, | |
| "train_specificity": 0.835820895522388, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 168, | |
| "train_true_positives": 784 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1924, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 288, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.478290865715675e+18, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |