| { | |
| "best_metric": 0.8728268768516966, | |
| "best_model_checkpoint": "stool-condition-classification/checkpoint-500", | |
| "epoch": 10.0, | |
| "eval_steps": 100, | |
| "global_step": 1020, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 7.584359169006348, | |
| "learning_rate": 0.00019803921568627454, | |
| "loss": 0.6059, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 3.984663486480713, | |
| "learning_rate": 0.000196078431372549, | |
| "loss": 0.69, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 5.750972270965576, | |
| "learning_rate": 0.00019411764705882354, | |
| "loss": 0.652, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 5.540163993835449, | |
| "learning_rate": 0.00019215686274509807, | |
| "loss": 0.567, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 12.890495300292969, | |
| "learning_rate": 0.00019019607843137254, | |
| "loss": 0.5255, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 11.914591789245605, | |
| "learning_rate": 0.00018823529411764707, | |
| "loss": 0.5776, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 8.995306015014648, | |
| "learning_rate": 0.00018627450980392157, | |
| "loss": 0.4868, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 7.732036590576172, | |
| "learning_rate": 0.00018431372549019607, | |
| "loss": 0.6819, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 9.876835823059082, | |
| "learning_rate": 0.0001823529411764706, | |
| "loss": 0.5809, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 6.603707790374756, | |
| "learning_rate": 0.0001803921568627451, | |
| "loss": 0.5076, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.7730870712401056, | |
| "eval_auroc": 0.8537928335848846, | |
| "eval_f1": 0.6906474820143884, | |
| "eval_loss": 0.5360854268074036, | |
| "eval_model_selection": 0.559226340209067, | |
| "eval_npv": 0.7060931899641577, | |
| "eval_ppv": 0.96, | |
| "eval_runtime": 21.3071, | |
| "eval_samples_per_second": 17.787, | |
| "eval_sensitivity": 0.5393258426966292, | |
| "eval_specificty": 0.9800995024875622, | |
| "eval_steps_per_second": 2.253, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 2.871717929840088, | |
| "learning_rate": 0.00017843137254901963, | |
| "loss": 0.6088, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 7.064380645751953, | |
| "learning_rate": 0.00017647058823529413, | |
| "loss": 0.5249, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 4.083934307098389, | |
| "learning_rate": 0.00017450980392156863, | |
| "loss": 0.5367, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 1.9436535835266113, | |
| "learning_rate": 0.00017254901960784316, | |
| "loss": 0.4674, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 2.449876308441162, | |
| "learning_rate": 0.00017058823529411766, | |
| "loss": 0.5506, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 2.2175521850585938, | |
| "learning_rate": 0.00016862745098039216, | |
| "loss": 0.4266, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 1.2798587083816528, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 0.483, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 4.6635847091674805, | |
| "learning_rate": 0.0001647058823529412, | |
| "loss": 0.5022, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 10.006440162658691, | |
| "learning_rate": 0.0001627450980392157, | |
| "loss": 0.5173, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 43.053401947021484, | |
| "learning_rate": 0.00016078431372549022, | |
| "loss": 0.4086, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_accuracy": 0.783641160949868, | |
| "eval_auroc": 0.8727989267147409, | |
| "eval_f1": 0.722972972972973, | |
| "eval_loss": 0.48566874861717224, | |
| "eval_model_selection": 0.655849963664822, | |
| "eval_npv": 0.7279693486590039, | |
| "eval_ppv": 0.9067796610169492, | |
| "eval_runtime": 20.6546, | |
| "eval_samples_per_second": 18.349, | |
| "eval_sensitivity": 0.601123595505618, | |
| "eval_specificty": 0.945273631840796, | |
| "eval_steps_per_second": 2.324, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 6.759799957275391, | |
| "learning_rate": 0.0001588235294117647, | |
| "loss": 0.4347, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 1.868139386177063, | |
| "learning_rate": 0.00015686274509803922, | |
| "loss": 0.4856, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 6.924500465393066, | |
| "learning_rate": 0.00015490196078431375, | |
| "loss": 0.6559, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 3.674975872039795, | |
| "learning_rate": 0.00015294117647058822, | |
| "loss": 0.4086, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 3.160930633544922, | |
| "learning_rate": 0.00015098039215686275, | |
| "loss": 0.4172, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 5.0961785316467285, | |
| "learning_rate": 0.00014901960784313728, | |
| "loss": 0.3948, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 3.1396780014038086, | |
| "learning_rate": 0.00014705882352941178, | |
| "loss": 0.3086, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 8.720426559448242, | |
| "learning_rate": 0.00014509803921568628, | |
| "loss": 0.5149, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 2.7797322273254395, | |
| "learning_rate": 0.00014313725490196078, | |
| "loss": 0.6524, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 8.662543296813965, | |
| "learning_rate": 0.0001411764705882353, | |
| "loss": 0.5208, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_accuracy": 0.7598944591029023, | |
| "eval_auroc": 0.8058862988428643, | |
| "eval_f1": 0.7055016181229774, | |
| "eval_loss": 0.5108699798583984, | |
| "eval_model_selection": 0.7218122868802057, | |
| "eval_npv": 0.7217741935483871, | |
| "eval_ppv": 0.8320610687022901, | |
| "eval_runtime": 20.396, | |
| "eval_samples_per_second": 18.582, | |
| "eval_sensitivity": 0.6123595505617978, | |
| "eval_specificty": 0.8905472636815921, | |
| "eval_steps_per_second": 2.353, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 5.801065444946289, | |
| "learning_rate": 0.0001392156862745098, | |
| "loss": 0.4872, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "grad_norm": 4.54748010635376, | |
| "learning_rate": 0.0001372549019607843, | |
| "loss": 0.4804, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 6.328420639038086, | |
| "learning_rate": 0.00013529411764705884, | |
| "loss": 0.5523, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "grad_norm": 2.860734462738037, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.4061, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 5.824751377105713, | |
| "learning_rate": 0.00013137254901960784, | |
| "loss": 0.4895, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "grad_norm": 1.802435278892517, | |
| "learning_rate": 0.00012941176470588237, | |
| "loss": 0.4011, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "grad_norm": 2.1423304080963135, | |
| "learning_rate": 0.00012745098039215687, | |
| "loss": 0.3881, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 2.6464078426361084, | |
| "learning_rate": 0.00012549019607843137, | |
| "loss": 0.4371, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "grad_norm": 1.5666183233261108, | |
| "learning_rate": 0.0001235294117647059, | |
| "loss": 0.4256, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 7.859738349914551, | |
| "learning_rate": 0.00012156862745098039, | |
| "loss": 0.474, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "eval_accuracy": 0.7994722955145118, | |
| "eval_auroc": 0.8600536642629548, | |
| "eval_f1": 0.7432432432432432, | |
| "eval_loss": 0.5211557149887085, | |
| "eval_model_selection": 0.6577785231147633, | |
| "eval_npv": 0.7394636015325671, | |
| "eval_ppv": 0.9322033898305084, | |
| "eval_runtime": 20.1111, | |
| "eval_samples_per_second": 18.845, | |
| "eval_sensitivity": 0.6179775280898876, | |
| "eval_specificty": 0.9601990049751243, | |
| "eval_steps_per_second": 2.387, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 5.231843948364258, | |
| "learning_rate": 0.0001196078431372549, | |
| "loss": 0.3969, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 1.5469104051589966, | |
| "learning_rate": 0.00011764705882352942, | |
| "loss": 0.4781, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "grad_norm": 5.2077765464782715, | |
| "learning_rate": 0.00011568627450980394, | |
| "loss": 0.4261, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "grad_norm": 0.9990509748458862, | |
| "learning_rate": 0.00011372549019607843, | |
| "loss": 0.3773, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "grad_norm": 5.277149677276611, | |
| "learning_rate": 0.00011176470588235294, | |
| "loss": 0.4551, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "grad_norm": 1.3224270343780518, | |
| "learning_rate": 0.00010980392156862746, | |
| "loss": 0.3533, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 2.360970973968506, | |
| "learning_rate": 0.00010784313725490196, | |
| "loss": 0.4317, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 2.176283836364746, | |
| "learning_rate": 0.00010588235294117647, | |
| "loss": 0.4718, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 1.7329970598220825, | |
| "learning_rate": 0.00010392156862745099, | |
| "loss": 0.426, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 7.07577657699585, | |
| "learning_rate": 0.00010196078431372549, | |
| "loss": 0.4285, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "eval_accuracy": 0.7757255936675461, | |
| "eval_auroc": 0.8728268768516966, | |
| "eval_f1": 0.7578347578347578, | |
| "eval_loss": 0.4510786235332489, | |
| "eval_model_selection": 0.9461959863603332, | |
| "eval_npv": 0.7815533980582524, | |
| "eval_ppv": 0.7687861271676301, | |
| "eval_runtime": 20.6194, | |
| "eval_samples_per_second": 18.381, | |
| "eval_sensitivity": 0.7471910112359551, | |
| "eval_specificty": 0.8009950248756219, | |
| "eval_steps_per_second": 2.328, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 6.841150760650635, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3839, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "grad_norm": 2.587709903717041, | |
| "learning_rate": 9.80392156862745e-05, | |
| "loss": 0.396, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 2.148972272872925, | |
| "learning_rate": 9.607843137254903e-05, | |
| "loss": 0.3475, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "grad_norm": 6.0918803215026855, | |
| "learning_rate": 9.411764705882353e-05, | |
| "loss": 0.3433, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "grad_norm": 1.9835032224655151, | |
| "learning_rate": 9.215686274509804e-05, | |
| "loss": 0.3289, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "grad_norm": 1.912084698677063, | |
| "learning_rate": 9.019607843137255e-05, | |
| "loss": 0.3887, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "grad_norm": 2.5347495079040527, | |
| "learning_rate": 8.823529411764706e-05, | |
| "loss": 0.4516, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "grad_norm": 3.123342990875244, | |
| "learning_rate": 8.627450980392158e-05, | |
| "loss": 0.3973, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "grad_norm": 6.096330165863037, | |
| "learning_rate": 8.431372549019608e-05, | |
| "loss": 0.3895, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 1.6143405437469482, | |
| "learning_rate": 8.23529411764706e-05, | |
| "loss": 0.3506, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "eval_accuracy": 0.8047493403693932, | |
| "eval_auroc": 0.8691095086365922, | |
| "eval_f1": 0.7658227848101267, | |
| "eval_loss": 0.47164368629455566, | |
| "eval_model_selection": 0.764352395326737, | |
| "eval_npv": 0.7634854771784232, | |
| "eval_ppv": 0.8768115942028986, | |
| "eval_runtime": 20.5548, | |
| "eval_samples_per_second": 18.438, | |
| "eval_sensitivity": 0.6797752808988764, | |
| "eval_specificty": 0.9154228855721394, | |
| "eval_steps_per_second": 2.335, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "grad_norm": 2.3068525791168213, | |
| "learning_rate": 8.039215686274511e-05, | |
| "loss": 0.3566, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 3.027449369430542, | |
| "learning_rate": 7.843137254901961e-05, | |
| "loss": 0.3662, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "grad_norm": 3.0861737728118896, | |
| "learning_rate": 7.647058823529411e-05, | |
| "loss": 0.4272, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "grad_norm": 2.3498377799987793, | |
| "learning_rate": 7.450980392156864e-05, | |
| "loss": 0.2825, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "grad_norm": 2.3852789402008057, | |
| "learning_rate": 7.254901960784314e-05, | |
| "loss": 0.361, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "grad_norm": 2.655799627304077, | |
| "learning_rate": 7.058823529411765e-05, | |
| "loss": 0.291, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "grad_norm": 4.058769226074219, | |
| "learning_rate": 6.862745098039216e-05, | |
| "loss": 0.3045, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "grad_norm": 4.20621919631958, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.3931, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "grad_norm": 4.316690444946289, | |
| "learning_rate": 6.470588235294118e-05, | |
| "loss": 0.2858, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "grad_norm": 3.3390917778015137, | |
| "learning_rate": 6.274509803921569e-05, | |
| "loss": 0.4239, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "eval_accuracy": 0.8100263852242744, | |
| "eval_auroc": 0.8517245234501649, | |
| "eval_f1": 0.7677419354838709, | |
| "eval_loss": 0.504310131072998, | |
| "eval_model_selection": 0.7332159427581195, | |
| "eval_npv": 0.7611336032388664, | |
| "eval_ppv": 0.9015151515151515, | |
| "eval_runtime": 20.9343, | |
| "eval_samples_per_second": 18.104, | |
| "eval_sensitivity": 0.6685393258426966, | |
| "eval_specificty": 0.9353233830845771, | |
| "eval_steps_per_second": 2.293, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 3.034562587738037, | |
| "learning_rate": 6.078431372549019e-05, | |
| "loss": 0.3033, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "grad_norm": 0.7752771973609924, | |
| "learning_rate": 5.882352941176471e-05, | |
| "loss": 0.2963, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "grad_norm": 1.2533752918243408, | |
| "learning_rate": 5.6862745098039215e-05, | |
| "loss": 0.2974, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "grad_norm": 1.748043179512024, | |
| "learning_rate": 5.490196078431373e-05, | |
| "loss": 0.3549, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "grad_norm": 2.4874391555786133, | |
| "learning_rate": 5.294117647058824e-05, | |
| "loss": 0.413, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "grad_norm": 3.017951726913452, | |
| "learning_rate": 5.0980392156862745e-05, | |
| "loss": 0.2809, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "grad_norm": 5.023625373840332, | |
| "learning_rate": 4.901960784313725e-05, | |
| "loss": 0.3603, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "grad_norm": 2.754984140396118, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 0.2837, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "grad_norm": 1.0377743244171143, | |
| "learning_rate": 4.5098039215686275e-05, | |
| "loss": 0.3169, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "grad_norm": 4.163206100463867, | |
| "learning_rate": 4.313725490196079e-05, | |
| "loss": 0.2447, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "eval_accuracy": 0.8073878627968337, | |
| "eval_auroc": 0.8591872100173291, | |
| "eval_f1": 0.7711598746081505, | |
| "eval_loss": 0.580413818359375, | |
| "eval_model_selection": 0.7805634747610264, | |
| "eval_npv": 0.7689075630252101, | |
| "eval_ppv": 0.8723404255319149, | |
| "eval_runtime": 20.1671, | |
| "eval_samples_per_second": 18.793, | |
| "eval_sensitivity": 0.6910112359550562, | |
| "eval_specificty": 0.9104477611940298, | |
| "eval_steps_per_second": 2.38, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "grad_norm": 3.30311918258667, | |
| "learning_rate": 4.11764705882353e-05, | |
| "loss": 0.3555, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "grad_norm": 4.967993259429932, | |
| "learning_rate": 3.9215686274509805e-05, | |
| "loss": 0.2679, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "grad_norm": 4.838258266448975, | |
| "learning_rate": 3.725490196078432e-05, | |
| "loss": 0.2416, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "grad_norm": 4.202385425567627, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": 0.1758, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "grad_norm": 3.0037460327148438, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.3192, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "grad_norm": 4.337649822235107, | |
| "learning_rate": 3.137254901960784e-05, | |
| "loss": 0.2669, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "grad_norm": 6.011264801025391, | |
| "learning_rate": 2.9411764705882354e-05, | |
| "loss": 0.228, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "grad_norm": 0.5027784109115601, | |
| "learning_rate": 2.7450980392156865e-05, | |
| "loss": 0.3023, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "grad_norm": 1.8822238445281982, | |
| "learning_rate": 2.5490196078431373e-05, | |
| "loss": 0.3109, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "grad_norm": 3.6861536502838135, | |
| "learning_rate": 2.3529411764705884e-05, | |
| "loss": 0.1739, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "eval_accuracy": 0.8073878627968337, | |
| "eval_auroc": 0.856224495500028, | |
| "eval_f1": 0.7767584097859327, | |
| "eval_loss": 0.6224877238273621, | |
| "eval_model_selection": 0.8229358823858236, | |
| "eval_npv": 0.7782608695652173, | |
| "eval_ppv": 0.8523489932885906, | |
| "eval_runtime": 20.4294, | |
| "eval_samples_per_second": 18.552, | |
| "eval_sensitivity": 0.7134831460674157, | |
| "eval_specificty": 0.8905472636815921, | |
| "eval_steps_per_second": 2.35, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "grad_norm": 5.0469770431518555, | |
| "learning_rate": 2.1568627450980395e-05, | |
| "loss": 0.4354, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "grad_norm": 4.443832874298096, | |
| "learning_rate": 1.9607843137254903e-05, | |
| "loss": 0.334, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "grad_norm": 2.0471107959747314, | |
| "learning_rate": 1.7647058823529414e-05, | |
| "loss": 0.2051, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "grad_norm": 3.442962169647217, | |
| "learning_rate": 1.568627450980392e-05, | |
| "loss": 0.2112, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "grad_norm": 2.31436824798584, | |
| "learning_rate": 1.3725490196078432e-05, | |
| "loss": 0.3268, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "grad_norm": 2.2342071533203125, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": 0.3107, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "grad_norm": 3.2926645278930664, | |
| "learning_rate": 9.803921568627451e-06, | |
| "loss": 0.2284, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "grad_norm": 2.639843702316284, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 0.2269, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "grad_norm": 2.7015938758850098, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 0.1987, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "grad_norm": 8.358205795288086, | |
| "learning_rate": 3.92156862745098e-06, | |
| "loss": 0.2888, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "eval_accuracy": 0.8047493403693932, | |
| "eval_auroc": 0.856979149197831, | |
| "eval_f1": 0.783625730994152, | |
| "eval_loss": 0.5807113647460938, | |
| "eval_model_selection": 0.9020627201073285, | |
| "eval_npv": 0.7953488372093023, | |
| "eval_ppv": 0.8170731707317073, | |
| "eval_runtime": 20.5189, | |
| "eval_samples_per_second": 18.471, | |
| "eval_sensitivity": 0.7528089887640449, | |
| "eval_specificty": 0.8507462686567164, | |
| "eval_steps_per_second": 2.339, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "grad_norm": 3.527721881866455, | |
| "learning_rate": 1.96078431372549e-06, | |
| "loss": 0.3236, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 6.274048805236816, | |
| "learning_rate": 0.0, | |
| "loss": 0.26, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 1020, | |
| "total_flos": 1.25924483123712e+18, | |
| "train_loss": 0.40091259911948557, | |
| "train_runtime": 1906.0636, | |
| "train_samples_per_second": 8.525, | |
| "train_steps_per_second": 0.535 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1020, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 100, | |
| "total_flos": 1.25924483123712e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |