| { | |
| "best_metric": 0.8400702987697716, | |
| "best_model_checkpoint": "results/qa-suffix-roberta-large-2e-5-32/checkpoint-330", | |
| "epoch": 3.0, | |
| "global_step": 363, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.702702702702703e-06, | |
| "loss": 0.656, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.6743119266055045, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.638568639755249, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_runtime": 7.0752, | |
| "eval_samples_per_second": 123.248, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 5.405405405405406e-06, | |
| "loss": 0.6251, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.6743119266055045, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.6357823610305786, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_runtime": 6.4162, | |
| "eval_samples_per_second": 135.907, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8.108108108108109e-06, | |
| "loss": 0.6534, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.6743119266055045, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.6329530477523804, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_runtime": 5.2772, | |
| "eval_samples_per_second": 165.241, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.0810810810810812e-05, | |
| "loss": 0.6362, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.6743119266055045, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.62663733959198, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_runtime": 7.1397, | |
| "eval_samples_per_second": 122.133, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.3513513513513515e-05, | |
| "loss": 0.6653, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.6743119266055045, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.614637017250061, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_runtime": 5.5244, | |
| "eval_samples_per_second": 157.845, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.6216216216216218e-05, | |
| "loss": 0.5946, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.6926605504587156, | |
| "eval_f1": 0.1518987341772152, | |
| "eval_loss": 0.5719833374023438, | |
| "eval_precision": 0.75, | |
| "eval_recall": 0.08450704225352113, | |
| "eval_runtime": 6.0074, | |
| "eval_samples_per_second": 145.154, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.891891891891892e-05, | |
| "loss": 0.5597, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.783256880733945, | |
| "eval_f1": 0.5900216919739696, | |
| "eval_loss": 0.4864228665828705, | |
| "eval_precision": 0.768361581920904, | |
| "eval_recall": 0.4788732394366197, | |
| "eval_runtime": 7.545, | |
| "eval_samples_per_second": 115.574, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.98159509202454e-05, | |
| "loss": 0.5029, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.7259174311926605, | |
| "eval_f1": 0.31123919308357345, | |
| "eval_loss": 0.5254184603691101, | |
| "eval_precision": 0.8571428571428571, | |
| "eval_recall": 0.19014084507042253, | |
| "eval_runtime": 7.5551, | |
| "eval_samples_per_second": 115.418, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.9509202453987733e-05, | |
| "loss": 0.5271, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.8360091743119266, | |
| "eval_f1": 0.7265774378585086, | |
| "eval_loss": 0.41283223032951355, | |
| "eval_precision": 0.7949790794979079, | |
| "eval_recall": 0.6690140845070423, | |
| "eval_runtime": 6.2573, | |
| "eval_samples_per_second": 139.357, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.9202453987730062e-05, | |
| "loss": 0.4492, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.8532110091743119, | |
| "eval_f1": 0.7714285714285714, | |
| "eval_loss": 0.3917687237262726, | |
| "eval_precision": 0.782608695652174, | |
| "eval_recall": 0.7605633802816901, | |
| "eval_runtime": 5.3023, | |
| "eval_samples_per_second": 164.456, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.8895705521472395e-05, | |
| "loss": 0.4193, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.8394495412844036, | |
| "eval_f1": 0.7188755020080323, | |
| "eval_loss": 0.35428276658058167, | |
| "eval_precision": 0.8364485981308412, | |
| "eval_recall": 0.6302816901408451, | |
| "eval_runtime": 7.0485, | |
| "eval_samples_per_second": 123.714, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.8588957055214724e-05, | |
| "loss": 0.369, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.8428899082568807, | |
| "eval_f1": 0.7674023769100169, | |
| "eval_loss": 0.37413832545280457, | |
| "eval_precision": 0.740983606557377, | |
| "eval_recall": 0.795774647887324, | |
| "eval_runtime": 7.2795, | |
| "eval_samples_per_second": 119.788, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.828220858895706e-05, | |
| "loss": 0.4063, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.8096330275229358, | |
| "eval_f1": 0.7558823529411764, | |
| "eval_loss": 0.4283739924430847, | |
| "eval_precision": 0.648989898989899, | |
| "eval_recall": 0.9049295774647887, | |
| "eval_runtime": 6.9732, | |
| "eval_samples_per_second": 125.05, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.797546012269939e-05, | |
| "loss": 0.4608, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.8348623853211009, | |
| "eval_f1": 0.6869565217391304, | |
| "eval_loss": 0.37825068831443787, | |
| "eval_precision": 0.8977272727272727, | |
| "eval_recall": 0.5563380281690141, | |
| "eval_runtime": 5.0, | |
| "eval_samples_per_second": 174.401, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.766871165644172e-05, | |
| "loss": 0.4506, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.8681192660550459, | |
| "eval_f1": 0.8067226890756302, | |
| "eval_loss": 0.3225836753845215, | |
| "eval_precision": 0.7717041800643086, | |
| "eval_recall": 0.8450704225352113, | |
| "eval_runtime": 6.6965, | |
| "eval_samples_per_second": 130.217, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.736196319018405e-05, | |
| "loss": 0.3557, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.8497706422018348, | |
| "eval_f1": 0.7883683360258481, | |
| "eval_loss": 0.34664297103881836, | |
| "eval_precision": 0.7283582089552239, | |
| "eval_recall": 0.8591549295774648, | |
| "eval_runtime": 7.1969, | |
| "eval_samples_per_second": 121.162, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.7055214723926382e-05, | |
| "loss": 0.4147, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.8600917431192661, | |
| "eval_f1": 0.7680608365019012, | |
| "eval_loss": 0.3466174304485321, | |
| "eval_precision": 0.8347107438016529, | |
| "eval_recall": 0.7112676056338029, | |
| "eval_runtime": 7.3038, | |
| "eval_samples_per_second": 119.39, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.6748466257668714e-05, | |
| "loss": 0.4009, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.8394495412844036, | |
| "eval_f1": 0.779874213836478, | |
| "eval_loss": 0.35522547364234924, | |
| "eval_precision": 0.7045454545454546, | |
| "eval_recall": 0.8732394366197183, | |
| "eval_runtime": 5.9612, | |
| "eval_samples_per_second": 146.278, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.6441717791411043e-05, | |
| "loss": 0.355, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.856651376146789, | |
| "eval_f1": 0.7731397459165154, | |
| "eval_loss": 0.33357954025268555, | |
| "eval_precision": 0.797752808988764, | |
| "eval_recall": 0.75, | |
| "eval_runtime": 5.1139, | |
| "eval_samples_per_second": 170.515, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6134969325153376e-05, | |
| "loss": 0.3783, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.8543577981651376, | |
| "eval_f1": 0.7814113597246126, | |
| "eval_loss": 0.3445069193840027, | |
| "eval_precision": 0.7643097643097643, | |
| "eval_recall": 0.7992957746478874, | |
| "eval_runtime": 7.3177, | |
| "eval_samples_per_second": 119.163, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.5828220858895708e-05, | |
| "loss": 0.3407, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.8612385321100917, | |
| "eval_f1": 0.7858407079646017, | |
| "eval_loss": 0.3221452236175537, | |
| "eval_precision": 0.7900355871886121, | |
| "eval_recall": 0.7816901408450704, | |
| "eval_runtime": 7.2195, | |
| "eval_samples_per_second": 120.784, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.5521472392638037e-05, | |
| "loss": 0.3886, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 0.8532110091743119, | |
| "eval_f1": 0.7942122186495175, | |
| "eval_loss": 0.3414439558982849, | |
| "eval_precision": 0.7307692307692307, | |
| "eval_recall": 0.8697183098591549, | |
| "eval_runtime": 6.7246, | |
| "eval_samples_per_second": 129.673, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5214723926380371e-05, | |
| "loss": 0.3325, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.8646788990825688, | |
| "eval_f1": 0.7999999999999999, | |
| "eval_loss": 0.3119550347328186, | |
| "eval_precision": 0.7712418300653595, | |
| "eval_recall": 0.8309859154929577, | |
| "eval_runtime": 5.1226, | |
| "eval_samples_per_second": 170.227, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.4907975460122702e-05, | |
| "loss": 0.3489, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.8669724770642202, | |
| "eval_f1": 0.802721088435374, | |
| "eval_loss": 0.3048677444458008, | |
| "eval_precision": 0.7763157894736842, | |
| "eval_recall": 0.8309859154929577, | |
| "eval_runtime": 5.8542, | |
| "eval_samples_per_second": 148.952, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 1.4601226993865032e-05, | |
| "loss": 0.308, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_accuracy": 0.8772935779816514, | |
| "eval_f1": 0.8112874779541447, | |
| "eval_loss": 0.2938537299633026, | |
| "eval_precision": 0.8127208480565371, | |
| "eval_recall": 0.8098591549295775, | |
| "eval_runtime": 7.2512, | |
| "eval_samples_per_second": 120.256, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.4294478527607363e-05, | |
| "loss": 0.3356, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_accuracy": 0.8704128440366973, | |
| "eval_f1": 0.8027923211169284, | |
| "eval_loss": 0.3023342192173004, | |
| "eval_precision": 0.7958477508650519, | |
| "eval_recall": 0.8098591549295775, | |
| "eval_runtime": 7.3245, | |
| "eval_samples_per_second": 119.053, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.3987730061349694e-05, | |
| "loss": 0.3, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_accuracy": 0.8727064220183486, | |
| "eval_f1": 0.8035398230088495, | |
| "eval_loss": 0.3078989088535309, | |
| "eval_precision": 0.8078291814946619, | |
| "eval_recall": 0.7992957746478874, | |
| "eval_runtime": 6.5413, | |
| "eval_samples_per_second": 133.306, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.3680981595092026e-05, | |
| "loss": 0.3106, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_accuracy": 0.8658256880733946, | |
| "eval_f1": 0.8040201005025125, | |
| "eval_loss": 0.3164271116256714, | |
| "eval_precision": 0.7667731629392971, | |
| "eval_recall": 0.8450704225352113, | |
| "eval_runtime": 5.1365, | |
| "eval_samples_per_second": 169.766, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.3374233128834357e-05, | |
| "loss": 0.2401, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_accuracy": 0.8761467889908257, | |
| "eval_f1": 0.8131487889273357, | |
| "eval_loss": 0.3013850152492523, | |
| "eval_precision": 0.7993197278911565, | |
| "eval_recall": 0.8274647887323944, | |
| "eval_runtime": 5.3284, | |
| "eval_samples_per_second": 163.653, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.3067484662576687e-05, | |
| "loss": 0.3167, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_accuracy": 0.856651376146789, | |
| "eval_f1": 0.7980613893376413, | |
| "eval_loss": 0.32322707772254944, | |
| "eval_precision": 0.7373134328358208, | |
| "eval_recall": 0.8697183098591549, | |
| "eval_runtime": 7.1641, | |
| "eval_samples_per_second": 121.717, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.276073619631902e-05, | |
| "loss": 0.3227, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_accuracy": 0.8761467889908257, | |
| "eval_f1": 0.7954545454545454, | |
| "eval_loss": 0.3056000769138336, | |
| "eval_precision": 0.860655737704918, | |
| "eval_recall": 0.7394366197183099, | |
| "eval_runtime": 7.0886, | |
| "eval_samples_per_second": 123.014, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.245398773006135e-05, | |
| "loss": 0.3401, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_accuracy": 0.8646788990825688, | |
| "eval_f1": 0.805921052631579, | |
| "eval_loss": 0.30931735038757324, | |
| "eval_precision": 0.7561728395061729, | |
| "eval_recall": 0.8626760563380281, | |
| "eval_runtime": 7.2974, | |
| "eval_samples_per_second": 119.494, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.2147239263803683e-05, | |
| "loss": 0.272, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_accuracy": 0.8772935779816514, | |
| "eval_f1": 0.8158347676419967, | |
| "eval_loss": 0.2903553247451782, | |
| "eval_precision": 0.797979797979798, | |
| "eval_recall": 0.8345070422535211, | |
| "eval_runtime": 6.7437, | |
| "eval_samples_per_second": 129.307, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.1840490797546013e-05, | |
| "loss": 0.2701, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_accuracy": 0.8795871559633027, | |
| "eval_f1": 0.8205128205128205, | |
| "eval_loss": 0.29034143686294556, | |
| "eval_precision": 0.7973421926910299, | |
| "eval_recall": 0.8450704225352113, | |
| "eval_runtime": 5.3852, | |
| "eval_samples_per_second": 161.925, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.1533742331288344e-05, | |
| "loss": 0.2579, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_accuracy": 0.8864678899082569, | |
| "eval_f1": 0.8241563055062167, | |
| "eval_loss": 0.28575021028518677, | |
| "eval_precision": 0.8315412186379928, | |
| "eval_recall": 0.8169014084507042, | |
| "eval_runtime": 5.1529, | |
| "eval_samples_per_second": 169.224, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.1226993865030675e-05, | |
| "loss": 0.2523, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_accuracy": 0.8864678899082569, | |
| "eval_f1": 0.8241563055062167, | |
| "eval_loss": 0.2819433808326721, | |
| "eval_precision": 0.8315412186379928, | |
| "eval_recall": 0.8169014084507042, | |
| "eval_runtime": 6.7646, | |
| "eval_samples_per_second": 128.907, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.0920245398773005e-05, | |
| "loss": 0.3262, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_accuracy": 0.8841743119266054, | |
| "eval_f1": 0.8206039076376554, | |
| "eval_loss": 0.27894383668899536, | |
| "eval_precision": 0.8279569892473119, | |
| "eval_recall": 0.8133802816901409, | |
| "eval_runtime": 7.2116, | |
| "eval_samples_per_second": 120.915, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.061349693251534e-05, | |
| "loss": 0.2973, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_accuracy": 0.8795871559633027, | |
| "eval_f1": 0.8154657293497363, | |
| "eval_loss": 0.2783905565738678, | |
| "eval_precision": 0.8140350877192982, | |
| "eval_recall": 0.8169014084507042, | |
| "eval_runtime": 7.1139, | |
| "eval_samples_per_second": 122.577, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 1.030674846625767e-05, | |
| "loss": 0.3122, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_accuracy": 0.8772935779816514, | |
| "eval_f1": 0.8112874779541447, | |
| "eval_loss": 0.28692659735679626, | |
| "eval_precision": 0.8127208480565371, | |
| "eval_recall": 0.8098591549295775, | |
| "eval_runtime": 7.2602, | |
| "eval_samples_per_second": 120.107, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 1e-05, | |
| "loss": 0.3195, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_accuracy": 0.8772935779816514, | |
| "eval_f1": 0.8078994614003591, | |
| "eval_loss": 0.2904263436794281, | |
| "eval_precision": 0.8241758241758241, | |
| "eval_recall": 0.7922535211267606, | |
| "eval_runtime": 6.5577, | |
| "eval_samples_per_second": 132.972, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 9.693251533742331e-06, | |
| "loss": 0.2864, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_accuracy": 0.8669724770642202, | |
| "eval_f1": 0.8020477815699658, | |
| "eval_loss": 0.29111164808273315, | |
| "eval_precision": 0.7781456953642384, | |
| "eval_recall": 0.8274647887323944, | |
| "eval_runtime": 5.8177, | |
| "eval_samples_per_second": 149.888, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 9.386503067484664e-06, | |
| "loss": 0.3185, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_accuracy": 0.8635321100917431, | |
| "eval_f1": 0.8006700167504189, | |
| "eval_loss": 0.29158464074134827, | |
| "eval_precision": 0.7635782747603834, | |
| "eval_recall": 0.8415492957746479, | |
| "eval_runtime": 5.1013, | |
| "eval_samples_per_second": 170.936, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 9.079754601226994e-06, | |
| "loss": 0.2743, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_accuracy": 0.8704128440366973, | |
| "eval_f1": 0.804159445407279, | |
| "eval_loss": 0.2849805951118469, | |
| "eval_precision": 0.7918088737201365, | |
| "eval_recall": 0.8169014084507042, | |
| "eval_runtime": 6.2932, | |
| "eval_samples_per_second": 138.562, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 8.773006134969327e-06, | |
| "loss": 0.3158, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_accuracy": 0.8669724770642202, | |
| "eval_f1": 0.8110749185667752, | |
| "eval_loss": 0.301179438829422, | |
| "eval_precision": 0.7545454545454545, | |
| "eval_recall": 0.8767605633802817, | |
| "eval_runtime": 7.2876, | |
| "eval_samples_per_second": 119.656, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 8.466257668711658e-06, | |
| "loss": 0.2714, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_accuracy": 0.8853211009174312, | |
| "eval_f1": 0.8161764705882353, | |
| "eval_loss": 0.28371983766555786, | |
| "eval_precision": 0.8538461538461538, | |
| "eval_recall": 0.7816901408450704, | |
| "eval_runtime": 7.0861, | |
| "eval_samples_per_second": 123.058, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 8.159509202453988e-06, | |
| "loss": 0.2978, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_accuracy": 0.8738532110091743, | |
| "eval_f1": 0.8141891891891891, | |
| "eval_loss": 0.2848930060863495, | |
| "eval_precision": 0.7824675324675324, | |
| "eval_recall": 0.8485915492957746, | |
| "eval_runtime": 7.1621, | |
| "eval_samples_per_second": 121.752, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 7.85276073619632e-06, | |
| "loss": 0.2857, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_accuracy": 0.8772935779816514, | |
| "eval_f1": 0.8189509306260575, | |
| "eval_loss": 0.28239545226097107, | |
| "eval_precision": 0.7882736156351792, | |
| "eval_recall": 0.852112676056338, | |
| "eval_runtime": 6.4537, | |
| "eval_samples_per_second": 135.115, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 7.54601226993865e-06, | |
| "loss": 0.2669, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_accuracy": 0.8876146788990825, | |
| "eval_f1": 0.8150943396226414, | |
| "eval_loss": 0.29343676567077637, | |
| "eval_precision": 0.8780487804878049, | |
| "eval_recall": 0.7605633802816901, | |
| "eval_runtime": 5.6057, | |
| "eval_samples_per_second": 155.557, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 7.239263803680983e-06, | |
| "loss": 0.2786, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_accuracy": 0.8772935779816514, | |
| "eval_f1": 0.8189509306260575, | |
| "eval_loss": 0.28877732157707214, | |
| "eval_precision": 0.7882736156351792, | |
| "eval_recall": 0.852112676056338, | |
| "eval_runtime": 5.106, | |
| "eval_samples_per_second": 170.781, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 6.932515337423313e-06, | |
| "loss": 0.2484, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_accuracy": 0.8807339449541285, | |
| "eval_f1": 0.8237288135593219, | |
| "eval_loss": 0.285278856754303, | |
| "eval_precision": 0.7941176470588235, | |
| "eval_recall": 0.8556338028169014, | |
| "eval_runtime": 5.1204, | |
| "eval_samples_per_second": 170.3, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 6.625766871165644e-06, | |
| "loss": 0.2006, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_accuracy": 0.8899082568807339, | |
| "eval_f1": 0.8241758241758241, | |
| "eval_loss": 0.2904672622680664, | |
| "eval_precision": 0.8587786259541985, | |
| "eval_recall": 0.7922535211267606, | |
| "eval_runtime": 7.1075, | |
| "eval_samples_per_second": 122.688, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 6.319018404907976e-06, | |
| "loss": 0.2564, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_accuracy": 0.8807339449541285, | |
| "eval_f1": 0.8272425249169436, | |
| "eval_loss": 0.31366848945617676, | |
| "eval_precision": 0.7830188679245284, | |
| "eval_recall": 0.8767605633802817, | |
| "eval_runtime": 7.0515, | |
| "eval_samples_per_second": 123.661, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 6.012269938650307e-06, | |
| "loss": 0.2652, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_accuracy": 0.8795871559633027, | |
| "eval_f1": 0.8247078464106845, | |
| "eval_loss": 0.29686439037323, | |
| "eval_precision": 0.7841269841269841, | |
| "eval_recall": 0.8697183098591549, | |
| "eval_runtime": 7.3289, | |
| "eval_samples_per_second": 118.981, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 5.7055214723926385e-06, | |
| "loss": 0.2267, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_accuracy": 0.8876146788990825, | |
| "eval_f1": 0.8256227758007119, | |
| "eval_loss": 0.2745262682437897, | |
| "eval_precision": 0.8345323741007195, | |
| "eval_recall": 0.8169014084507042, | |
| "eval_runtime": 6.8361, | |
| "eval_samples_per_second": 127.558, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 5.39877300613497e-06, | |
| "loss": 0.2089, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_accuracy": 0.8772935779816514, | |
| "eval_f1": 0.81195079086116, | |
| "eval_loss": 0.28398409485816956, | |
| "eval_precision": 0.8105263157894737, | |
| "eval_recall": 0.8133802816901409, | |
| "eval_runtime": 5.1533, | |
| "eval_samples_per_second": 169.213, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 5.092024539877301e-06, | |
| "loss": 0.2417, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_accuracy": 0.8784403669724771, | |
| "eval_f1": 0.8239202657807309, | |
| "eval_loss": 0.30549222230911255, | |
| "eval_precision": 0.779874213836478, | |
| "eval_recall": 0.8732394366197183, | |
| "eval_runtime": 5.0137, | |
| "eval_samples_per_second": 173.924, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 4.785276073619632e-06, | |
| "loss": 0.1859, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 0.875, | |
| "eval_f1": 0.8174204355108877, | |
| "eval_loss": 0.2901022434234619, | |
| "eval_precision": 0.7795527156549521, | |
| "eval_recall": 0.8591549295774648, | |
| "eval_runtime": 5.0546, | |
| "eval_samples_per_second": 172.515, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 4.478527607361964e-06, | |
| "loss": 0.162, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_accuracy": 0.8841743119266054, | |
| "eval_f1": 0.8186714542190306, | |
| "eval_loss": 0.2908870279788971, | |
| "eval_precision": 0.8351648351648352, | |
| "eval_recall": 0.8028169014084507, | |
| "eval_runtime": 5.0477, | |
| "eval_samples_per_second": 172.752, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 4.171779141104294e-06, | |
| "loss": 0.2655, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_accuracy": 0.8853211009174312, | |
| "eval_f1": 0.8220640569395018, | |
| "eval_loss": 0.2885466516017914, | |
| "eval_precision": 0.8309352517985612, | |
| "eval_recall": 0.8133802816901409, | |
| "eval_runtime": 5.0462, | |
| "eval_samples_per_second": 172.803, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.865030674846626e-06, | |
| "loss": 0.2198, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_accuracy": 0.8772935779816514, | |
| "eval_f1": 0.8183361629881155, | |
| "eval_loss": 0.2900920510292053, | |
| "eval_precision": 0.7901639344262295, | |
| "eval_recall": 0.8485915492957746, | |
| "eval_runtime": 5.0183, | |
| "eval_samples_per_second": 173.764, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.5582822085889574e-06, | |
| "loss": 0.1997, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_accuracy": 0.875, | |
| "eval_f1": 0.8174204355108877, | |
| "eval_loss": 0.29444408416748047, | |
| "eval_precision": 0.7795527156549521, | |
| "eval_recall": 0.8591549295774648, | |
| "eval_runtime": 4.9821, | |
| "eval_samples_per_second": 175.028, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.251533742331289e-06, | |
| "loss": 0.2164, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_accuracy": 0.8876146788990825, | |
| "eval_f1": 0.8286713286713286, | |
| "eval_loss": 0.2741115391254425, | |
| "eval_precision": 0.8229166666666666, | |
| "eval_recall": 0.8345070422535211, | |
| "eval_runtime": 5.0084, | |
| "eval_samples_per_second": 174.109, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.94478527607362e-06, | |
| "loss": 0.2352, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_accuracy": 0.8922018348623854, | |
| "eval_f1": 0.8362369337979093, | |
| "eval_loss": 0.2667850852012634, | |
| "eval_precision": 0.8275862068965517, | |
| "eval_recall": 0.8450704225352113, | |
| "eval_runtime": 5.0123, | |
| "eval_samples_per_second": 173.972, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.638036809815951e-06, | |
| "loss": 0.1848, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_accuracy": 0.8853211009174312, | |
| "eval_f1": 0.8310810810810811, | |
| "eval_loss": 0.2768068015575409, | |
| "eval_precision": 0.7987012987012987, | |
| "eval_recall": 0.8661971830985915, | |
| "eval_runtime": 4.9976, | |
| "eval_samples_per_second": 174.484, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.331288343558282e-06, | |
| "loss": 0.2405, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_accuracy": 0.8887614678899083, | |
| "eval_f1": 0.8324697754749567, | |
| "eval_loss": 0.2706994116306305, | |
| "eval_precision": 0.8169491525423729, | |
| "eval_recall": 0.8485915492957746, | |
| "eval_runtime": 4.9789, | |
| "eval_samples_per_second": 175.138, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.0245398773006137e-06, | |
| "loss": 0.2175, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "eval_accuracy": 0.8956422018348624, | |
| "eval_f1": 0.8400702987697716, | |
| "eval_loss": 0.2676824629306793, | |
| "eval_precision": 0.8385964912280702, | |
| "eval_recall": 0.8415492957746479, | |
| "eval_runtime": 4.9748, | |
| "eval_samples_per_second": 175.283, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 1.717791411042945e-06, | |
| "loss": 0.2392, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_accuracy": 0.8864678899082569, | |
| "eval_f1": 0.82960413080895, | |
| "eval_loss": 0.2721306383609772, | |
| "eval_precision": 0.8114478114478114, | |
| "eval_recall": 0.8485915492957746, | |
| "eval_runtime": 4.9711, | |
| "eval_samples_per_second": 175.412, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.411042944785276e-06, | |
| "loss": 0.2155, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_accuracy": 0.8818807339449541, | |
| "eval_f1": 0.8263069139966273, | |
| "eval_loss": 0.2795032560825348, | |
| "eval_precision": 0.7928802588996764, | |
| "eval_recall": 0.8626760563380281, | |
| "eval_runtime": 4.9888, | |
| "eval_samples_per_second": 174.791, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.1042944785276075e-06, | |
| "loss": 0.2833, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_accuracy": 0.8818807339449541, | |
| "eval_f1": 0.826890756302521, | |
| "eval_loss": 0.28040066361427307, | |
| "eval_precision": 0.7909967845659164, | |
| "eval_recall": 0.8661971830985915, | |
| "eval_runtime": 4.9996, | |
| "eval_samples_per_second": 174.413, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 7.975460122699387e-07, | |
| "loss": 0.2018, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_accuracy": 0.8864678899082569, | |
| "eval_f1": 0.82960413080895, | |
| "eval_loss": 0.27195218205451965, | |
| "eval_precision": 0.8114478114478114, | |
| "eval_recall": 0.8485915492957746, | |
| "eval_runtime": 4.9745, | |
| "eval_samples_per_second": 175.295, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 4.9079754601227e-07, | |
| "loss": 0.2184, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_accuracy": 0.8899082568807339, | |
| "eval_f1": 0.832752613240418, | |
| "eval_loss": 0.26814883947372437, | |
| "eval_precision": 0.8241379310344827, | |
| "eval_recall": 0.8415492957746479, | |
| "eval_runtime": 5.1102, | |
| "eval_samples_per_second": 170.638, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.8404907975460125e-07, | |
| "loss": 0.2232, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_accuracy": 0.8910550458715596, | |
| "eval_f1": 0.8342059336823734, | |
| "eval_loss": 0.26773521304130554, | |
| "eval_precision": 0.8269896193771626, | |
| "eval_recall": 0.8415492957746479, | |
| "eval_runtime": 4.9742, | |
| "eval_samples_per_second": 175.303, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 363, | |
| "total_flos": 1.259652372638976e+16, | |
| "train_runtime": 1963.3463, | |
| "train_samples_per_second": 0.185 | |
| } | |
| ], | |
| "max_steps": 363, | |
| "num_train_epochs": 3, | |
| "total_flos": 1.259652372638976e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |