diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,49 +1,49 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 100.0, + "epoch": 5.0, "eval_steps": 500, - "global_step": 9600, + "global_step": 480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, - "grad_norm": 2.2828588485717773, - "learning_rate": 4.9500000000000004e-05, - "loss": 0.2474, + "grad_norm": 2.027179718017578, + "learning_rate": 4e-05, + "loss": 0.241, "step": 96 }, { "epoch": 1.0, - "eval_LOCATION_f1": 0.8877005347593583, + "eval_LOCATION_f1": 0.8723404255319148, "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8217821782178217, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.8514285714285714, + "eval_LOCATION_precision": 0.803921568627451, + "eval_LOCATION_recall": 0.9534883720930233, + "eval_ORGANIZATION_f1": 0.9008498583569405, "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.8662790697674418, - "eval_ORGANIZATION_recall": 0.8370786516853933, - "eval_PERSON_f1": 0.96875, + "eval_ORGANIZATION_precision": 0.9085714285714286, + "eval_ORGANIZATION_recall": 0.8932584269662921, + "eval_PERSON_f1": 0.9612403100775193, "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.96875, + "eval_PERSON_precision": 0.9538461538461539, "eval_PERSON_recall": 0.96875, - "eval_loss": 0.061649635434150696, - "eval_overall_accuracy": 0.9811066126855601, - "eval_overall_f1": 0.8978562421185372, - "eval_overall_precision": 0.8877805486284289, - "eval_overall_recall": 0.9081632653061225, - "eval_runtime": 0.6066, - "eval_samples_per_second": 280.25, - "eval_steps_per_second": 4.946, + "eval_loss": 0.05585619434714317, + "eval_overall_accuracy": 0.9832658569500675, + "eval_overall_f1": 0.9136420525657072, + "eval_overall_precision": 0.8968058968058968, + "eval_overall_recall": 0.9311224489795918, + "eval_runtime": 1.3404, + "eval_samples_per_second": 126.825, + "eval_steps_per_second": 2.238, "step": 96 }, { "epoch": 2.0, - "grad_norm": 2.451382875442505, - "learning_rate": 4.9e-05, - "loss": 0.0576, + "grad_norm": 1.5001760721206665, + "learning_rate": 3e-05, + "loss": 0.0545, "step": 192 }, { @@ -52,3078 +52,133 @@ "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.845360824742268, "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.892128279883382, + "eval_ORGANIZATION_f1": 0.9111111111111112, "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9272727272727272, - "eval_ORGANIZATION_recall": 0.8595505617977528, - "eval_PERSON_f1": 0.9571984435797667, + "eval_ORGANIZATION_precision": 0.9010989010989011, + "eval_ORGANIZATION_recall": 0.9213483146067416, + "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9534883720930233, - "eval_PERSON_recall": 0.9609375, - "eval_loss": 0.06888098269701004, - "eval_overall_accuracy": 0.9819163292847504, - "eval_overall_f1": 0.9144316730523626, - "eval_overall_precision": 0.9156010230179028, - "eval_overall_recall": 0.9132653061224489, - "eval_runtime": 0.6409, - "eval_samples_per_second": 265.252, - "eval_steps_per_second": 4.681, + "eval_PERSON_precision": 0.9841269841269841, + "eval_PERSON_recall": 0.96875, + "eval_loss": 0.05511007830500603, + "eval_overall_accuracy": 0.9827260458839406, + "eval_overall_f1": 0.9284818067754077, + "eval_overall_precision": 0.9135802469135802, + "eval_overall_recall": 0.9438775510204082, + "eval_runtime": 1.2378, + "eval_samples_per_second": 137.339, + "eval_steps_per_second": 2.424, "step": 192 }, { "epoch": 3.0, - "grad_norm": 0.8167328238487244, - "learning_rate": 4.85e-05, - "loss": 0.0325, + "grad_norm": 3.457658290863037, + "learning_rate": 2e-05, + "loss": 0.0286, "step": 288 }, { "epoch": 3.0, - "eval_LOCATION_f1": 0.9010989010989011, + "eval_LOCATION_f1": 0.9189189189189189, "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8541666666666666, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.8882521489971347, + "eval_LOCATION_precision": 0.8585858585858586, + "eval_LOCATION_recall": 0.9883720930232558, + "eval_ORGANIZATION_f1": 0.9344729344729344, "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9064327485380117, - "eval_ORGANIZATION_recall": 0.8707865168539326, - "eval_PERSON_f1": 0.9609375, + "eval_ORGANIZATION_precision": 0.9479768786127167, + "eval_ORGANIZATION_recall": 0.9213483146067416, + "eval_PERSON_f1": 0.9763779527559054, "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9609375, - "eval_PERSON_recall": 0.9609375, - "eval_loss": 0.05915472283959389, - "eval_overall_accuracy": 0.9838056680161943, - "eval_overall_f1": 0.914866581956798, - "eval_overall_precision": 0.9113924050632911, - "eval_overall_recall": 0.9183673469387755, - "eval_runtime": 0.6242, - "eval_samples_per_second": 272.329, - "eval_steps_per_second": 4.806, + "eval_PERSON_precision": 0.9841269841269841, + "eval_PERSON_recall": 0.96875, + "eval_loss": 0.04845225811004639, + "eval_overall_accuracy": 0.9870445344129555, + "eval_overall_f1": 0.9443037974683544, + "eval_overall_precision": 0.9371859296482412, + "eval_overall_recall": 0.951530612244898, + "eval_runtime": 1.2338, + "eval_samples_per_second": 137.782, + "eval_steps_per_second": 2.431, "step": 288 }, { "epoch": 4.0, - "grad_norm": 1.6919182538986206, - "learning_rate": 4.8e-05, - "loss": 0.0196, + "grad_norm": 2.5664279460906982, + "learning_rate": 1e-05, + "loss": 0.0151, "step": 384 }, { "epoch": 4.0, - "eval_LOCATION_f1": 0.9111111111111112, + "eval_LOCATION_f1": 0.9378531073446328, "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8723404255319149, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9169054441260744, + "eval_LOCATION_precision": 0.9120879120879121, + "eval_LOCATION_recall": 0.9651162790697675, + "eval_ORGANIZATION_f1": 0.9322033898305084, "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.935672514619883, - "eval_ORGANIZATION_recall": 0.898876404494382, - "eval_PERSON_f1": 0.96875, + "eval_ORGANIZATION_precision": 0.9375, + "eval_ORGANIZATION_recall": 0.9269662921348315, + "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.96875, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.05859009176492691, - "eval_overall_accuracy": 0.9865047233468286, - "eval_overall_f1": 0.9324840764331209, - "eval_overall_precision": 0.9312977099236641, - "eval_overall_recall": 0.9336734693877551, - "eval_runtime": 0.595, - "eval_samples_per_second": 285.698, - "eval_steps_per_second": 5.042, + "eval_PERSON_precision": 0.9765625, + "eval_PERSON_recall": 0.9765625, + "eval_loss": 0.057044416666030884, + "eval_overall_accuracy": 0.9873144399460189, + "eval_overall_f1": 0.9479034307496824, + "eval_overall_precision": 0.9443037974683545, + "eval_overall_recall": 0.951530612244898, + "eval_runtime": 1.1835, + "eval_samples_per_second": 143.642, + "eval_steps_per_second": 2.535, "step": 384 }, { "epoch": 5.0, - "grad_norm": 0.15864279866218567, - "learning_rate": 4.75e-05, - "loss": 0.0138, + "grad_norm": 0.14279630780220032, + "learning_rate": 0.0, + "loss": 0.0088, "step": 480 }, { "epoch": 5.0, - "eval_LOCATION_f1": 0.9213483146067417, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8913043478260869, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9142857142857143, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9302325581395349, - "eval_ORGANIZATION_recall": 0.898876404494382, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.0675107017159462, - "eval_overall_accuracy": 0.9859649122807017, - "eval_overall_f1": 0.9348659003831418, - "eval_overall_precision": 0.9360613810741688, - "eval_overall_recall": 0.9336734693877551, - "eval_runtime": 0.612, - "eval_samples_per_second": 277.787, - "eval_steps_per_second": 4.902, - "step": 480 - }, - { - "epoch": 6.0, - "grad_norm": 0.16338849067687988, - "learning_rate": 4.7e-05, - "loss": 0.0126, - "step": 576 - }, - { - "epoch": 6.0, - "eval_LOCATION_f1": 0.8977272727272728, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8777777777777778, - "eval_LOCATION_recall": 0.9186046511627907, - "eval_ORGANIZATION_f1": 0.9281767955801105, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9130434782608695, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9571984435797667, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9534883720930233, - "eval_PERSON_recall": 0.9609375, - "eval_loss": 0.04975934699177742, - "eval_overall_accuracy": 0.9865047233468286, - "eval_overall_f1": 0.9308176100628932, - "eval_overall_precision": 0.9181141439205955, - "eval_overall_recall": 0.9438775510204082, - "eval_runtime": 0.6474, - "eval_samples_per_second": 262.602, - "eval_steps_per_second": 4.634, - "step": 576 - }, - { - "epoch": 7.0, - "grad_norm": 0.2481822818517685, - "learning_rate": 4.6500000000000005e-05, - "loss": 0.007, - "step": 672 - }, - { - "epoch": 7.0, - "eval_LOCATION_f1": 0.9101123595505618, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8804347826086957, - "eval_LOCATION_recall": 0.9418604651162791, - "eval_ORGANIZATION_f1": 0.9371428571428573, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9534883720930233, - "eval_ORGANIZATION_recall": 0.9213483146067416, - "eval_PERSON_f1": 0.9689922480620154, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9615384615384616, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.0726550817489624, - "eval_overall_accuracy": 0.9859649122807017, - "eval_overall_f1": 0.94147582697201, - "eval_overall_precision": 0.9390862944162437, - "eval_overall_recall": 0.9438775510204082, - "eval_runtime": 0.6915, - "eval_samples_per_second": 245.852, - "eval_steps_per_second": 4.339, - "step": 672 - }, - { - "epoch": 8.0, - "grad_norm": 0.0069261714816093445, - "learning_rate": 4.600000000000001e-05, - "loss": 0.008, - "step": 768 - }, - { - "epoch": 8.0, - "eval_LOCATION_f1": 0.9152542372881357, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8901098901098901, - "eval_LOCATION_recall": 0.9418604651162791, - "eval_ORGANIZATION_f1": 0.9435028248587571, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9488636363636364, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9727626459143969, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9689922480620154, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.06536342948675156, - "eval_overall_accuracy": 0.9889338731443995, - "eval_overall_f1": 0.9467005076142132, - "eval_overall_precision": 0.9419191919191919, - "eval_overall_recall": 0.951530612244898, - "eval_runtime": 0.6108, - "eval_samples_per_second": 278.303, - "eval_steps_per_second": 4.911, - "step": 768 - }, - { - "epoch": 9.0, - "grad_norm": 0.02850804105401039, - "learning_rate": 4.55e-05, - "loss": 0.0052, - "step": 864 - }, - { - "epoch": 9.0, - "eval_LOCATION_f1": 0.9325842696629213, + "eval_LOCATION_f1": 0.9431818181818181, "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, + "eval_LOCATION_precision": 0.9222222222222223, "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.951841359773371, + "eval_ORGANIZATION_f1": 0.9385474860335196, "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.96, + "eval_ORGANIZATION_precision": 0.9333333333333333, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.0606556162238121, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9567430025445292, - "eval_overall_precision": 0.9543147208121827, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6305, - "eval_samples_per_second": 269.613, - "eval_steps_per_second": 4.758, - "step": 864 - }, - { - "epoch": 10.0, - "grad_norm": 0.05026896297931671, - "learning_rate": 4.5e-05, - "loss": 0.0077, - "step": 960 - }, - { - "epoch": 10.0, - "eval_LOCATION_f1": 0.8999999999999999, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8617021276595744, - "eval_LOCATION_recall": 0.9418604651162791, - "eval_ORGANIZATION_f1": 0.9337175792507205, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9585798816568047, - "eval_ORGANIZATION_recall": 0.9101123595505618, - "eval_PERSON_f1": 0.9609375, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9609375, - "eval_PERSON_recall": 0.9609375, - "eval_loss": 0.06473550200462341, - "eval_overall_accuracy": 0.9873144399460189, - "eval_overall_f1": 0.9348659003831418, - "eval_overall_precision": 0.9360613810741688, - "eval_overall_recall": 0.9336734693877551, - "eval_runtime": 0.6137, - "eval_samples_per_second": 277.003, - "eval_steps_per_second": 4.888, - "step": 960 - }, - { - "epoch": 11.0, - "grad_norm": 1.136472463607788, - "learning_rate": 4.4500000000000004e-05, - "loss": 0.0058, - "step": 1056 - }, - { - "epoch": 11.0, - "eval_LOCATION_f1": 0.907103825136612, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8556701030927835, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9491525423728814, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9545454545454546, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9647058823529412, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.968503937007874, - "eval_PERSON_recall": 0.9609375, - "eval_loss": 0.06800268590450287, + "eval_loss": 0.05175752565264702, "eval_overall_accuracy": 0.9889338731443995, - "eval_overall_f1": 0.9444444444444445, - "eval_overall_precision": 0.935, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6063, - "eval_samples_per_second": 280.411, - "eval_steps_per_second": 4.948, - "step": 1056 - }, - { - "epoch": 12.0, - "grad_norm": 0.022789908573031425, - "learning_rate": 4.4000000000000006e-05, - "loss": 0.0039, - "step": 1152 - }, - { - "epoch": 12.0, - "eval_LOCATION_f1": 0.9213483146067417, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8913043478260869, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9239436619718311, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9265536723163842, - "eval_ORGANIZATION_recall": 0.9213483146067416, - "eval_PERSON_f1": 0.9647058823529412, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.968503937007874, - "eval_PERSON_recall": 0.9609375, - "eval_loss": 0.07052551954984665, - "eval_overall_accuracy": 0.9865047233468286, - "eval_overall_f1": 0.9365482233502538, - "eval_overall_precision": 0.9318181818181818, - "eval_overall_recall": 0.9413265306122449, - "eval_runtime": 0.6185, - "eval_samples_per_second": 274.877, - "eval_steps_per_second": 4.851, - "step": 1152 - }, - { - "epoch": 13.0, - "grad_norm": 0.0842265710234642, - "learning_rate": 4.35e-05, - "loss": 0.0034, - "step": 1248 - }, - { - "epoch": 13.0, - "eval_LOCATION_f1": 0.9340659340659341, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8854166666666666, - "eval_LOCATION_recall": 0.9883720930232558, - "eval_ORGANIZATION_f1": 0.9488636363636365, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9597701149425287, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.0649752989411354, - "eval_overall_accuracy": 0.988663967611336, "eval_overall_f1": 0.9531051964512041, "eval_overall_precision": 0.947103274559194, "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6172, - "eval_samples_per_second": 275.437, - "eval_steps_per_second": 4.861, - "step": 1248 - }, - { - "epoch": 14.0, - "grad_norm": 0.020714962854981422, - "learning_rate": 4.3e-05, - "loss": 0.0033, - "step": 1344 - }, - { - "epoch": 14.0, - "eval_LOCATION_f1": 0.945054945054945, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8958333333333334, - "eval_LOCATION_recall": 1.0, - "eval_ORGANIZATION_f1": 0.9435028248587571, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9488636363636364, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9765625, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9765625, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.0574415884912014, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9545454545454546, - "eval_overall_precision": 0.945, - "eval_overall_recall": 0.9642857142857143, - "eval_runtime": 0.6171, - "eval_samples_per_second": 275.468, - "eval_steps_per_second": 4.861, - "step": 1344 - }, - { - "epoch": 15.0, - "grad_norm": 0.009655606932938099, - "learning_rate": 4.25e-05, - "loss": 0.0022, - "step": 1440 - }, - { - "epoch": 15.0, - "eval_LOCATION_f1": 0.9545454545454545, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9333333333333333, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9461756373937678, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9542857142857143, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.96875, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.96875, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.06475672125816345, - "eval_overall_accuracy": 0.9897435897435898, - "eval_overall_f1": 0.9554140127388535, - "eval_overall_precision": 0.9541984732824428, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6138, - "eval_samples_per_second": 276.961, - "eval_steps_per_second": 4.888, - "step": 1440 - }, - { - "epoch": 16.0, - "grad_norm": 0.012555771507322788, - "learning_rate": 4.2e-05, - "loss": 0.003, - "step": 1536 - }, - { - "epoch": 16.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9279538904899135, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9526627218934911, - "eval_ORGANIZATION_recall": 0.9044943820224719, - "eval_PERSON_f1": 0.96875, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.96875, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.07581795752048492, - "eval_overall_accuracy": 0.9873144399460189, - "eval_overall_f1": 0.9410256410256409, - "eval_overall_precision": 0.9458762886597938, - "eval_overall_recall": 0.9362244897959183, - "eval_runtime": 0.6106, - "eval_samples_per_second": 278.404, - "eval_steps_per_second": 4.913, - "step": 1536 - }, - { - "epoch": 17.0, - "grad_norm": 0.38364461064338684, - "learning_rate": 4.15e-05, - "loss": 0.0036, - "step": 1632 - }, - { - "epoch": 17.0, - "eval_LOCATION_f1": 0.9273743016759777, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8924731182795699, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9515669515669515, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9653179190751445, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9727626459143969, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9689922480620154, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.05730000510811806, - "eval_overall_accuracy": 0.9900134952766532, - "eval_overall_f1": 0.9529860228716647, - "eval_overall_precision": 0.9493670886075949, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6284, - "eval_samples_per_second": 270.525, - "eval_steps_per_second": 4.774, - "step": 1632 - }, - { - "epoch": 18.0, - "grad_norm": 0.002933623967692256, - "learning_rate": 4.1e-05, - "loss": 0.0027, - "step": 1728 - }, - { - "epoch": 18.0, - "eval_LOCATION_f1": 0.9171270718232045, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8736842105263158, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9485714285714285, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9651162790697675, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9727626459143969, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9689922480620154, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.06547566503286362, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.949238578680203, - "eval_overall_precision": 0.9444444444444444, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6157, - "eval_samples_per_second": 276.125, - "eval_steps_per_second": 4.873, - "step": 1728 - }, - { - "epoch": 19.0, - "grad_norm": 0.007115746848285198, - "learning_rate": 4.05e-05, - "loss": 0.0019, - "step": 1824 - }, - { - "epoch": 19.0, - "eval_LOCATION_f1": 0.923076923076923, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.875, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9075144508670521, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9345238095238095, - "eval_ORGANIZATION_recall": 0.8820224719101124, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08035985380411148, - "eval_overall_accuracy": 0.9878542510121457, - "eval_overall_f1": 0.9348659003831418, - "eval_overall_precision": 0.9360613810741688, - "eval_overall_recall": 0.9336734693877551, - "eval_runtime": 0.6198, - "eval_samples_per_second": 274.27, - "eval_steps_per_second": 4.84, - "step": 1824 - }, - { - "epoch": 20.0, - "grad_norm": 0.27215883135795593, - "learning_rate": 4e-05, - "loss": 0.0018, - "step": 1920 - }, - { - "epoch": 20.0, - "eval_LOCATION_f1": 0.9318181818181819, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9111111111111111, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9461756373937678, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9542857142857143, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.07430987805128098, - "eval_overall_accuracy": 0.9892037786774629, - "eval_overall_f1": 0.9540816326530612, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6166, - "eval_samples_per_second": 275.684, - "eval_steps_per_second": 4.865, - "step": 1920 - }, - { - "epoch": 21.0, - "grad_norm": 0.01730382815003395, - "learning_rate": 3.9500000000000005e-05, - "loss": 0.0014, - "step": 2016 - }, - { - "epoch": 21.0, - "eval_LOCATION_f1": 0.9371428571428573, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9213483146067416, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9438202247191011, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9438202247191011, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.96875, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.96875, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.07114380598068237, - "eval_overall_accuracy": 0.9897435897435898, - "eval_overall_f1": 0.9504447268106736, - "eval_overall_precision": 0.9468354430379747, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6139, - "eval_samples_per_second": 276.91, - "eval_steps_per_second": 4.887, - "step": 2016 - }, - { - "epoch": 22.0, - "grad_norm": 0.02176562510430813, - "learning_rate": 3.9000000000000006e-05, - "loss": 0.0018, - "step": 2112 - }, - { - "epoch": 22.0, - "eval_LOCATION_f1": 0.9180327868852459, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.865979381443299, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9455587392550143, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9649122807017544, - "eval_ORGANIZATION_recall": 0.9269662921348315, - "eval_PERSON_f1": 0.9727626459143969, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9689922480620154, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.07050614058971405, - "eval_overall_accuracy": 0.9881241565452091, - "eval_overall_f1": 0.9480354879594423, - "eval_overall_precision": 0.9420654911838791, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6221, - "eval_samples_per_second": 273.255, - "eval_steps_per_second": 4.822, - "step": 2112 - }, - { - "epoch": 23.0, - "grad_norm": 0.014040918089449406, - "learning_rate": 3.85e-05, - "loss": 0.0035, - "step": 2208 - }, - { - "epoch": 23.0, - "eval_LOCATION_f1": 0.9333333333333332, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8936170212765957, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9714285714285714, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9883720930232558, - "eval_ORGANIZATION_recall": 0.9550561797752809, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.060489047318696976, - "eval_overall_accuracy": 0.992442645074224, - "eval_overall_f1": 0.9656050955414013, - "eval_overall_precision": 0.9643765903307888, - "eval_overall_recall": 0.9668367346938775, - "eval_runtime": 0.637, - "eval_samples_per_second": 266.886, - "eval_steps_per_second": 4.71, - "step": 2208 - }, - { - "epoch": 24.0, - "grad_norm": 0.1437457650899887, - "learning_rate": 3.8e-05, - "loss": 0.0022, - "step": 2304 - }, - { - "epoch": 24.0, - "eval_LOCATION_f1": 0.8852459016393444, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8350515463917526, - "eval_LOCATION_recall": 0.9418604651162791, - "eval_ORGANIZATION_f1": 0.9565217391304348, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9880239520958084, - "eval_ORGANIZATION_recall": 0.9269662921348315, - "eval_PERSON_f1": 0.9763779527559054, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9841269841269841, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.08504272252321243, - "eval_overall_accuracy": 0.9883940620782726, - "eval_overall_f1": 0.9462915601023018, - "eval_overall_precision": 0.9487179487179487, - "eval_overall_recall": 0.9438775510204082, - "eval_runtime": 0.6193, - "eval_samples_per_second": 274.501, - "eval_steps_per_second": 4.844, - "step": 2304 - }, - { - "epoch": 25.0, - "grad_norm": 0.002086610533297062, - "learning_rate": 3.7500000000000003e-05, - "loss": 0.0021, - "step": 2400 - }, - { - "epoch": 25.0, - "eval_LOCATION_f1": 0.9318181818181819, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9111111111111111, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.96045197740113, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9659090909090909, - "eval_ORGANIZATION_recall": 0.9550561797752809, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.05982187017798424, - "eval_overall_accuracy": 0.9913630229419703, - "eval_overall_f1": 0.9605095541401274, - "eval_overall_precision": 0.9592875318066157, - "eval_overall_recall": 0.9617346938775511, - "eval_runtime": 0.6295, - "eval_samples_per_second": 270.063, - "eval_steps_per_second": 4.766, - "step": 2400 - }, - { - "epoch": 26.0, - "grad_norm": 0.003268366912379861, - "learning_rate": 3.7e-05, - "loss": 0.0019, - "step": 2496 - }, - { - "epoch": 26.0, - "eval_LOCATION_f1": 0.9222222222222223, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8829787234042553, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9455587392550143, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9649122807017544, - "eval_ORGANIZATION_recall": 0.9269662921348315, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.061995964497327805, - "eval_overall_accuracy": 0.9910931174089069, - "eval_overall_f1": 0.951530612244898, - "eval_overall_precision": 0.951530612244898, - "eval_overall_recall": 0.951530612244898, - "eval_runtime": 0.6212, - "eval_samples_per_second": 273.665, - "eval_steps_per_second": 4.829, - "step": 2496 - }, - { - "epoch": 27.0, - "grad_norm": 0.004727003164589405, - "learning_rate": 3.65e-05, - "loss": 0.002, - "step": 2592 - }, - { - "epoch": 27.0, - "eval_LOCATION_f1": 0.9273743016759777, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8924731182795699, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9542857142857143, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9709302325581395, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.07399417459964752, - "eval_overall_accuracy": 0.9897435897435898, - "eval_overall_f1": 0.9540816326530612, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6235, - "eval_samples_per_second": 272.645, - "eval_steps_per_second": 4.811, - "step": 2592 - }, - { - "epoch": 28.0, - "grad_norm": 0.2153054177761078, - "learning_rate": 3.6e-05, - "loss": 0.0035, - "step": 2688 + "eval_runtime": 1.2661, + "eval_samples_per_second": 134.27, + "eval_steps_per_second": 2.369, + "step": 480 }, { - "epoch": 28.0, - "eval_LOCATION_f1": 0.9325842696629213, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9515669515669515, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9653179190751445, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9765625, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9765625, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.06874507665634155, - "eval_overall_accuracy": 0.9908232118758434, - "eval_overall_f1": 0.9554140127388535, - "eval_overall_precision": 0.9541984732824428, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6239, - "eval_samples_per_second": 272.487, - "eval_steps_per_second": 4.809, - "step": 2688 - }, - { - "epoch": 29.0, - "grad_norm": 0.00853231642395258, - "learning_rate": 3.55e-05, - "loss": 0.0028, - "step": 2784 - }, - { - "epoch": 29.0, - "eval_LOCATION_f1": 0.9398907103825136, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8865979381443299, - "eval_LOCATION_recall": 1.0, - "eval_ORGANIZATION_f1": 0.9371428571428573, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9534883720930233, - "eval_ORGANIZATION_recall": 0.9213483146067416, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.06898324936628342, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9517766497461929, - "eval_overall_precision": 0.946969696969697, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6122, - "eval_samples_per_second": 277.692, - "eval_steps_per_second": 4.9, - "step": 2784 - }, - { - "epoch": 30.0, - "grad_norm": 0.019609902054071426, - "learning_rate": 3.5e-05, - "loss": 0.0039, - "step": 2880 - }, - { - "epoch": 30.0, - "eval_LOCATION_f1": 0.8977272727272728, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8777777777777778, - "eval_LOCATION_recall": 0.9186046511627907, - "eval_ORGANIZATION_f1": 0.9523809523809524, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9497206703910615, - "eval_ORGANIZATION_recall": 0.9550561797752809, - "eval_PERSON_f1": 0.968503937007874, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9761904761904762, - "eval_PERSON_recall": 0.9609375, - "eval_loss": 0.05546905845403671, - "eval_overall_accuracy": 0.9897435897435898, - "eval_overall_f1": 0.9453621346886911, - "eval_overall_precision": 0.9417721518987342, - "eval_overall_recall": 0.9489795918367347, - "eval_runtime": 0.6741, - "eval_samples_per_second": 252.178, - "eval_steps_per_second": 4.45, - "step": 2880 - }, - { - "epoch": 31.0, - "grad_norm": 2.175267219543457, - "learning_rate": 3.45e-05, - "loss": 0.0022, - "step": 2976 - }, - { - "epoch": 31.0, - "eval_LOCATION_f1": 0.898876404494382, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8695652173913043, - "eval_LOCATION_recall": 0.9302325581395349, - "eval_ORGANIZATION_f1": 0.9378531073446328, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9431818181818182, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9644268774703557, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.976, - "eval_PERSON_recall": 0.953125, - "eval_loss": 0.07334984838962555, - "eval_overall_accuracy": 0.9883940620782726, - "eval_overall_f1": 0.937579617834395, - "eval_overall_precision": 0.9363867684478372, - "eval_overall_recall": 0.9387755102040817, - "eval_runtime": 0.6319, - "eval_samples_per_second": 269.014, - "eval_steps_per_second": 4.747, - "step": 2976 - }, - { - "epoch": 32.0, - "grad_norm": 0.003728946903720498, - "learning_rate": 3.4000000000000007e-05, - "loss": 0.0028, - "step": 3072 - }, - { - "epoch": 32.0, - "eval_LOCATION_f1": 0.9273743016759777, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8924731182795699, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9378531073446328, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9431818181818182, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9727626459143969, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9689922480620154, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08215653151273727, - "eval_overall_accuracy": 0.9878542510121457, - "eval_overall_f1": 0.9468354430379746, - "eval_overall_precision": 0.9396984924623115, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6113, - "eval_samples_per_second": 278.107, - "eval_steps_per_second": 4.908, - "step": 3072 - }, - { - "epoch": 33.0, - "grad_norm": 0.005732069723308086, - "learning_rate": 3.35e-05, - "loss": 0.0033, - "step": 3168 - }, - { - "epoch": 33.0, - "eval_LOCATION_f1": 0.923076923076923, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.875, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9482758620689655, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9705882352941176, - "eval_ORGANIZATION_recall": 0.9269662921348315, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08182308077812195, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9528662420382166, - "eval_overall_precision": 0.9516539440203562, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6173, - "eval_samples_per_second": 275.386, - "eval_steps_per_second": 4.86, - "step": 3168 - }, - { - "epoch": 34.0, - "grad_norm": 0.23260165750980377, - "learning_rate": 3.3e-05, - "loss": 0.0029, - "step": 3264 - }, - { - "epoch": 34.0, - "eval_LOCATION_f1": 0.9491525423728814, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9230769230769231, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9550561797752809, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9550561797752809, - "eval_ORGANIZATION_recall": 0.9550561797752809, - "eval_PERSON_f1": 0.9765625, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9765625, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08428438007831573, - "eval_overall_accuracy": 0.9883940620782726, - "eval_overall_f1": 0.9607097591888467, - "eval_overall_precision": 0.9546599496221663, - "eval_overall_recall": 0.9668367346938775, - "eval_runtime": 0.6159, - "eval_samples_per_second": 276.003, - "eval_steps_per_second": 4.871, - "step": 3264 - }, - { - "epoch": 35.0, - "grad_norm": 0.013327236287295818, - "learning_rate": 3.2500000000000004e-05, - "loss": 0.0019, - "step": 3360 - }, - { - "epoch": 35.0, - "eval_LOCATION_f1": 0.9392265193370165, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8947368421052632, - "eval_LOCATION_recall": 0.9883720930232558, - "eval_ORGANIZATION_f1": 0.9458689458689458, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9595375722543352, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9765625, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9765625, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.06944381445646286, - "eval_overall_accuracy": 0.9900134952766532, - "eval_overall_f1": 0.9543147208121828, - "eval_overall_precision": 0.9494949494949495, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.619, - "eval_samples_per_second": 274.646, - "eval_steps_per_second": 4.847, - "step": 3360 - }, - { - "epoch": 36.0, - "grad_norm": 0.007003632839769125, - "learning_rate": 3.2000000000000005e-05, - "loss": 0.0018, - "step": 3456 - }, - { - "epoch": 36.0, - "eval_LOCATION_f1": 0.9497206703910613, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9139784946236559, - "eval_LOCATION_recall": 0.9883720930232558, - "eval_ORGANIZATION_f1": 0.9458689458689458, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9595375722543352, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9765625, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9765625, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.07694654166698456, - "eval_overall_accuracy": 0.9892037786774629, - "eval_overall_f1": 0.9567430025445292, - "eval_overall_precision": 0.9543147208121827, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6187, - "eval_samples_per_second": 274.768, - "eval_steps_per_second": 4.849, - "step": 3456 - }, - { - "epoch": 37.0, - "grad_norm": 0.0038402078207582235, - "learning_rate": 3.15e-05, - "loss": 0.001, - "step": 3552 - }, - { - "epoch": 37.0, - "eval_LOCATION_f1": 0.9392265193370165, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8947368421052632, - "eval_LOCATION_recall": 0.9883720930232558, - "eval_ORGANIZATION_f1": 0.9435028248587571, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9488636363636364, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08463935554027557, - "eval_overall_accuracy": 0.9875843454790824, - "eval_overall_f1": 0.9544303797468354, - "eval_overall_precision": 0.9472361809045227, - "eval_overall_recall": 0.9617346938775511, - "eval_runtime": 0.6307, - "eval_samples_per_second": 269.557, - "eval_steps_per_second": 4.757, - "step": 3552 - }, - { - "epoch": 38.0, - "grad_norm": 0.0020391629077494144, - "learning_rate": 3.1e-05, - "loss": 0.0013, - "step": 3648 - }, - { - "epoch": 38.0, - "eval_LOCATION_f1": 0.9032258064516129, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.84, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9337175792507205, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9585798816568047, - "eval_ORGANIZATION_recall": 0.9101123595505618, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09401915222406387, - "eval_overall_accuracy": 0.9873144399460189, - "eval_overall_f1": 0.9390862944162437, - "eval_overall_precision": 0.9343434343434344, - "eval_overall_recall": 0.9438775510204082, - "eval_runtime": 0.6125, - "eval_samples_per_second": 277.53, - "eval_steps_per_second": 4.898, - "step": 3648 - }, - { - "epoch": 39.0, - "grad_norm": 0.003371034050360322, - "learning_rate": 3.05e-05, - "loss": 0.0015, - "step": 3744 - }, - { - "epoch": 39.0, - "eval_LOCATION_f1": 0.9171270718232045, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8736842105263158, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9314285714285714, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9476744186046512, - "eval_ORGANIZATION_recall": 0.9157303370786517, - "eval_PERSON_f1": 0.96875, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.96875, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09674811363220215, - "eval_overall_accuracy": 0.9854251012145749, - "eval_overall_f1": 0.9402795425667091, - "eval_overall_precision": 0.9367088607594937, - "eval_overall_recall": 0.9438775510204082, - "eval_runtime": 0.6082, - "eval_samples_per_second": 279.501, - "eval_steps_per_second": 4.932, - "step": 3744 - }, - { - "epoch": 40.0, - "grad_norm": 0.0035275197587907314, - "learning_rate": 3e-05, - "loss": 0.0011, - "step": 3840 - }, - { - "epoch": 40.0, - "eval_LOCATION_f1": 0.9039548022598871, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8791208791208791, - "eval_LOCATION_recall": 0.9302325581395349, - "eval_ORGANIZATION_f1": 0.9340974212034385, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9532163742690059, - "eval_ORGANIZATION_recall": 0.9157303370786517, - "eval_PERSON_f1": 0.9647058823529412, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.968503937007874, - "eval_PERSON_recall": 0.9609375, - "eval_loss": 0.09050867706537247, - "eval_overall_accuracy": 0.9870445344129555, - "eval_overall_f1": 0.9372599231754162, - "eval_overall_precision": 0.9408740359897172, - "eval_overall_recall": 0.9336734693877551, - "eval_runtime": 0.6259, - "eval_samples_per_second": 271.612, - "eval_steps_per_second": 4.793, - "step": 3840 - }, - { - "epoch": 41.0, - "grad_norm": 0.005569122266024351, - "learning_rate": 2.95e-05, - "loss": 0.0022, - "step": 3936 - }, - { - "epoch": 41.0, - "eval_LOCATION_f1": 0.942528735632184, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9318181818181818, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9214092140921409, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.8900523560209425, - "eval_ORGANIZATION_recall": 0.9550561797752809, - "eval_PERSON_f1": 0.96875, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.96875, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.11127809435129166, - "eval_overall_accuracy": 0.9848852901484481, - "eval_overall_f1": 0.9411764705882352, - "eval_overall_precision": 0.9238329238329238, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.622, - "eval_samples_per_second": 273.292, - "eval_steps_per_second": 4.823, - "step": 3936 - }, - { - "epoch": 42.0, - "grad_norm": 0.0007991730817593634, - "learning_rate": 2.9e-05, - "loss": 0.0014, - "step": 4032 - }, - { - "epoch": 42.0, - "eval_LOCATION_f1": 0.9545454545454545, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9333333333333333, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9497206703910613, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9444444444444444, - "eval_ORGANIZATION_recall": 0.9550561797752809, - "eval_PERSON_f1": 0.96875, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.96875, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09354653209447861, - "eval_overall_accuracy": 0.9881241565452091, - "eval_overall_f1": 0.9569620253164557, - "eval_overall_precision": 0.949748743718593, - "eval_overall_recall": 0.9642857142857143, - "eval_runtime": 0.6202, - "eval_samples_per_second": 274.121, - "eval_steps_per_second": 4.837, - "step": 4032 - }, - { - "epoch": 43.0, - "grad_norm": 0.0014058761298656464, - "learning_rate": 2.8499999999999998e-05, - "loss": 0.0013, - "step": 4128 - }, - { - "epoch": 43.0, - "eval_LOCATION_f1": 0.9318181818181819, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9111111111111111, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9431818181818182, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9540229885057471, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.0885852500796318, - "eval_overall_accuracy": 0.9883940620782726, - "eval_overall_f1": 0.9527458492975734, - "eval_overall_precision": 0.9539641943734015, - "eval_overall_recall": 0.951530612244898, - "eval_runtime": 0.6174, - "eval_samples_per_second": 275.335, - "eval_steps_per_second": 4.859, - "step": 4128 - }, - { - "epoch": 44.0, - "grad_norm": 0.0023765000514686108, - "learning_rate": 2.8000000000000003e-05, - "loss": 0.0006, - "step": 4224 - }, - { - "epoch": 44.0, - "eval_LOCATION_f1": 0.9325842696629213, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9461756373937678, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9542857142857143, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09644779562950134, - "eval_overall_accuracy": 0.9878542510121457, - "eval_overall_f1": 0.9541984732824428, - "eval_overall_precision": 0.9517766497461929, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6234, - "eval_samples_per_second": 272.715, - "eval_steps_per_second": 4.813, - "step": 4224 - }, - { - "epoch": 45.0, - "grad_norm": 0.017548931762576103, - "learning_rate": 2.7500000000000004e-05, - "loss": 0.0013, - "step": 4320 - }, - { - "epoch": 45.0, - "eval_LOCATION_f1": 0.9378531073446328, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9120879120879121, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9435028248587571, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9488636363636364, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.0950535386800766, - "eval_overall_accuracy": 0.9883940620782726, - "eval_overall_f1": 0.9516539440203563, - "eval_overall_precision": 0.949238578680203, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6178, - "eval_samples_per_second": 275.187, - "eval_steps_per_second": 4.856, - "step": 4320 - }, - { - "epoch": 46.0, - "grad_norm": 0.002911866409704089, - "learning_rate": 2.7000000000000002e-05, - "loss": 0.0012, - "step": 4416 - }, - { - "epoch": 46.0, - "eval_LOCATION_f1": 0.9444444444444444, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9042553191489362, - "eval_LOCATION_recall": 0.9883720930232558, - "eval_ORGANIZATION_f1": 0.9515669515669515, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9653179190751445, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09485064446926117, - "eval_overall_accuracy": 0.9881241565452091, - "eval_overall_f1": 0.9592875318066159, - "eval_overall_precision": 0.9568527918781726, - "eval_overall_recall": 0.9617346938775511, - "eval_runtime": 0.632, - "eval_samples_per_second": 268.976, - "eval_steps_per_second": 4.747, - "step": 4416 - }, - { - "epoch": 47.0, - "grad_norm": 0.0010636880761012435, - "learning_rate": 2.6500000000000004e-05, - "loss": 0.0021, - "step": 4512 - }, - { - "epoch": 47.0, - "eval_LOCATION_f1": 0.9385474860335195, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9032258064516129, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9371428571428573, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9534883720930233, - "eval_ORGANIZATION_recall": 0.9213483146067416, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09788823872804642, - "eval_overall_accuracy": 0.9865047233468286, - "eval_overall_f1": 0.9489795918367347, - "eval_overall_precision": 0.9489795918367347, - "eval_overall_recall": 0.9489795918367347, - "eval_runtime": 0.6241, - "eval_samples_per_second": 272.413, - "eval_steps_per_second": 4.807, - "step": 4512 - }, - { - "epoch": 48.0, - "grad_norm": 0.0045914603397250175, - "learning_rate": 2.6000000000000002e-05, - "loss": 0.0012, - "step": 4608 - }, - { - "epoch": 48.0, - "eval_LOCATION_f1": 0.9333333333333332, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8936170212765957, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9431818181818182, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9540229885057471, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09237338602542877, - "eval_overall_accuracy": 0.9870445344129555, - "eval_overall_f1": 0.9504447268106736, - "eval_overall_precision": 0.9468354430379747, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.623, - "eval_samples_per_second": 272.874, - "eval_steps_per_second": 4.815, - "step": 4608 - }, - { - "epoch": 49.0, - "grad_norm": 0.0008670548559166491, - "learning_rate": 2.5500000000000003e-05, - "loss": 0.0017, - "step": 4704 - }, - { - "epoch": 49.0, - "eval_LOCATION_f1": 0.9333333333333332, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8936170212765957, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9461756373937678, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9542857142857143, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08787988871335983, - "eval_overall_accuracy": 0.9870445344129555, - "eval_overall_f1": 0.9543147208121828, - "eval_overall_precision": 0.9494949494949495, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6347, - "eval_samples_per_second": 267.842, - "eval_steps_per_second": 4.727, - "step": 4704 - }, - { - "epoch": 50.0, - "grad_norm": 0.0007247235625982285, - "learning_rate": 2.5e-05, - "loss": 0.0008, - "step": 4800 - }, - { - "epoch": 50.0, - "eval_LOCATION_f1": 0.9152542372881357, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8901098901098901, - "eval_LOCATION_recall": 0.9418604651162791, - "eval_ORGANIZATION_f1": 0.951841359773371, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.96, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09165750443935394, - "eval_overall_accuracy": 0.9878542510121457, - "eval_overall_f1": 0.9503184713375795, - "eval_overall_precision": 0.9491094147582697, - "eval_overall_recall": 0.951530612244898, - "eval_runtime": 0.6032, - "eval_samples_per_second": 281.843, - "eval_steps_per_second": 4.974, - "step": 4800 - }, - { - "epoch": 51.0, - "grad_norm": 0.9623302221298218, - "learning_rate": 2.45e-05, - "loss": 0.0006, - "step": 4896 - }, - { - "epoch": 51.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9488636363636365, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9597701149425287, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9609375, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9609375, - "eval_PERSON_recall": 0.9609375, - "eval_loss": 0.08532703667879105, - "eval_overall_accuracy": 0.988663967611336, - "eval_overall_f1": 0.9477707006369427, - "eval_overall_precision": 0.9465648854961832, - "eval_overall_recall": 0.9489795918367347, - "eval_runtime": 0.6092, - "eval_samples_per_second": 279.073, - "eval_steps_per_second": 4.925, - "step": 4896 - }, - { - "epoch": 52.0, - "grad_norm": 0.0028847495559602976, - "learning_rate": 2.4e-05, - "loss": 0.0019, - "step": 4992 - }, - { - "epoch": 52.0, - "eval_LOCATION_f1": 0.9222222222222223, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8829787234042553, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9449275362318841, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9760479041916168, - "eval_ORGANIZATION_recall": 0.9157303370786517, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09849222749471664, - "eval_overall_accuracy": 0.9870445344129555, - "eval_overall_f1": 0.9487179487179488, - "eval_overall_precision": 0.9536082474226805, - "eval_overall_recall": 0.9438775510204082, - "eval_runtime": 0.6203, - "eval_samples_per_second": 274.082, - "eval_steps_per_second": 4.837, - "step": 4992 - }, - { - "epoch": 53.0, - "grad_norm": 0.01142776757478714, - "learning_rate": 2.35e-05, - "loss": 0.0008, - "step": 5088 - }, - { - "epoch": 53.0, - "eval_LOCATION_f1": 0.9111111111111112, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8723404255319149, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9418604651162792, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9759036144578314, - "eval_ORGANIZATION_recall": 0.9101123595505618, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.10701917856931686, - "eval_overall_accuracy": 0.9865047233468286, - "eval_overall_f1": 0.944801026957638, - "eval_overall_precision": 0.9509043927648578, - "eval_overall_recall": 0.9387755102040817, - "eval_runtime": 0.6158, - "eval_samples_per_second": 276.077, - "eval_steps_per_second": 4.872, - "step": 5088 - }, - { - "epoch": 54.0, - "grad_norm": 0.001288025639951229, - "learning_rate": 2.3000000000000003e-05, - "loss": 0.0014, - "step": 5184 - }, - { - "epoch": 54.0, - "eval_LOCATION_f1": 0.9325842696629213, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9455587392550143, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9649122807017544, - "eval_ORGANIZATION_recall": 0.9269662921348315, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09011322259902954, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9514066496163682, - "eval_overall_precision": 0.9538461538461539, - "eval_overall_recall": 0.9489795918367347, - "eval_runtime": 0.6146, - "eval_samples_per_second": 276.591, - "eval_steps_per_second": 4.881, - "step": 5184 - }, - { - "epoch": 55.0, - "grad_norm": 0.0010033083381131291, - "learning_rate": 2.25e-05, - "loss": 0.0006, - "step": 5280 - }, - { - "epoch": 55.0, - "eval_LOCATION_f1": 0.9162011173184358, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8817204301075269, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9435028248587571, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9488636363636364, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09055519849061966, - "eval_overall_accuracy": 0.9873144399460189, - "eval_overall_f1": 0.949238578680203, - "eval_overall_precision": 0.9444444444444444, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6307, - "eval_samples_per_second": 269.545, - "eval_steps_per_second": 4.757, - "step": 5280 - }, - { - "epoch": 56.0, - "grad_norm": 0.000921078200917691, - "learning_rate": 2.2000000000000003e-05, - "loss": 0.0005, - "step": 5376 - }, - { - "epoch": 56.0, - "eval_LOCATION_f1": 0.9333333333333332, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8936170212765957, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9303621169916434, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9226519337016574, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.10003948211669922, - "eval_overall_accuracy": 0.9862348178137652, - "eval_overall_f1": 0.9445843828715365, - "eval_overall_precision": 0.9328358208955224, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6179, - "eval_samples_per_second": 275.133, - "eval_steps_per_second": 4.855, - "step": 5376 - }, - { - "epoch": 57.0, - "grad_norm": 0.00035891789593733847, - "learning_rate": 2.15e-05, - "loss": 0.0005, - "step": 5472 - }, - { - "epoch": 57.0, - "eval_LOCATION_f1": 0.9378531073446328, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9120879120879121, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9488636363636365, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9597701149425287, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09421912580728531, - "eval_overall_accuracy": 0.9883940620782726, - "eval_overall_f1": 0.9540816326530612, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6215, - "eval_samples_per_second": 273.523, - "eval_steps_per_second": 4.827, - "step": 5472 - }, - { - "epoch": 58.0, - "grad_norm": 0.0010940376669168472, - "learning_rate": 2.1e-05, - "loss": 0.0004, - "step": 5568 - }, - { - "epoch": 58.0, - "eval_LOCATION_f1": 0.9111111111111112, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8723404255319149, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9367816091954023, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9588235294117647, - "eval_ORGANIZATION_recall": 0.9157303370786517, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.1046585738658905, - "eval_overall_accuracy": 0.9881241565452091, - "eval_overall_f1": 0.9425287356321839, - "eval_overall_precision": 0.9437340153452686, - "eval_overall_recall": 0.9413265306122449, - "eval_runtime": 0.628, - "eval_samples_per_second": 270.713, - "eval_steps_per_second": 4.777, - "step": 5568 - }, - { - "epoch": 59.0, - "grad_norm": 0.00014955128426663578, - "learning_rate": 2.05e-05, - "loss": 0.0002, - "step": 5664 - }, - { - "epoch": 59.0, - "eval_LOCATION_f1": 0.9333333333333332, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8936170212765957, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9488636363636365, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9597701149425287, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09259077161550522, - "eval_overall_accuracy": 0.9889338731443995, - "eval_overall_f1": 0.9529860228716647, - "eval_overall_precision": 0.9493670886075949, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6254, - "eval_samples_per_second": 271.812, - "eval_steps_per_second": 4.797, - "step": 5664 - }, - { - "epoch": 60.0, - "grad_norm": 0.00016060801863204688, - "learning_rate": 2e-05, - "loss": 0.0002, - "step": 5760 - }, - { - "epoch": 60.0, - "eval_LOCATION_f1": 0.9281767955801105, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8842105263157894, - "eval_LOCATION_recall": 0.9767441860465116, - "eval_ORGANIZATION_f1": 0.9408450704225352, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.943502824858757, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.0989777147769928, - "eval_overall_accuracy": 0.9878542510121457, - "eval_overall_f1": 0.9506953223767383, - "eval_overall_precision": 0.9423558897243107, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6078, - "eval_samples_per_second": 279.691, - "eval_steps_per_second": 4.936, - "step": 5760 - }, - { - "epoch": 61.0, - "grad_norm": 0.000217687978874892, - "learning_rate": 1.9500000000000003e-05, - "loss": 0.0002, - "step": 5856 - }, - { - "epoch": 61.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9507246376811593, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9820359281437125, - "eval_ORGANIZATION_recall": 0.9213483146067416, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.1063530445098877, - "eval_overall_accuracy": 0.9889338731443995, - "eval_overall_f1": 0.9523809523809523, - "eval_overall_precision": 0.961038961038961, - "eval_overall_recall": 0.9438775510204082, - "eval_runtime": 0.6224, - "eval_samples_per_second": 273.138, - "eval_steps_per_second": 4.82, - "step": 5856 - }, - { - "epoch": 62.0, - "grad_norm": 0.00019853039702866226, - "learning_rate": 1.9e-05, - "loss": 0.0009, - "step": 5952 - }, - { - "epoch": 62.0, - "eval_LOCATION_f1": 0.9213483146067417, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8913043478260869, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9464788732394366, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9491525423728814, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09706703573465347, - "eval_overall_accuracy": 0.9873144399460189, - "eval_overall_f1": 0.949238578680203, - "eval_overall_precision": 0.9444444444444444, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6098, - "eval_samples_per_second": 278.761, - "eval_steps_per_second": 4.919, - "step": 5952 - }, - { - "epoch": 63.0, - "grad_norm": 0.0006731785251758993, - "learning_rate": 1.85e-05, - "loss": 0.0006, - "step": 6048 - }, - { - "epoch": 63.0, - "eval_LOCATION_f1": 0.9318181818181819, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9111111111111111, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9431818181818182, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9540229885057471, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09098156541585922, - "eval_overall_accuracy": 0.9897435897435898, - "eval_overall_f1": 0.9527458492975734, - "eval_overall_precision": 0.9539641943734015, - "eval_overall_recall": 0.951530612244898, - "eval_runtime": 0.6221, - "eval_samples_per_second": 273.278, - "eval_steps_per_second": 4.823, - "step": 6048 - }, - { - "epoch": 64.0, - "grad_norm": 0.00021680475038010627, - "learning_rate": 1.8e-05, - "loss": 0.0002, - "step": 6144 - }, - { - "epoch": 64.0, - "eval_LOCATION_f1": 0.9273743016759777, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8924731182795699, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9467787114845938, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9441340782122905, - "eval_ORGANIZATION_recall": 0.949438202247191, - "eval_PERSON_f1": 0.9765625, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9765625, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09267909079790115, - "eval_overall_accuracy": 0.9873144399460189, - "eval_overall_f1": 0.952020202020202, - "eval_overall_precision": 0.9425, - "eval_overall_recall": 0.9617346938775511, - "eval_runtime": 0.6156, - "eval_samples_per_second": 276.161, - "eval_steps_per_second": 4.873, - "step": 6144 - }, - { - "epoch": 65.0, - "grad_norm": 0.00027364559355191886, - "learning_rate": 1.75e-05, - "loss": 0.0003, - "step": 6240 - }, - { - "epoch": 65.0, - "eval_LOCATION_f1": 0.9378531073446328, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9120879120879121, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9408450704225352, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.943502824858757, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08972997963428497, - "eval_overall_accuracy": 0.988663967611336, - "eval_overall_f1": 0.9529860228716647, - "eval_overall_precision": 0.9493670886075949, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6221, - "eval_samples_per_second": 273.264, - "eval_steps_per_second": 4.822, - "step": 6240 - }, - { - "epoch": 66.0, - "grad_norm": 0.0007816065917722881, - "learning_rate": 1.7000000000000003e-05, - "loss": 0.0004, - "step": 6336 - }, - { - "epoch": 66.0, - "eval_LOCATION_f1": 0.9318181818181819, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9111111111111111, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9464788732394366, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9491525423728814, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.08769363164901733, - "eval_overall_accuracy": 0.9902834008097166, - "eval_overall_f1": 0.9516539440203563, - "eval_overall_precision": 0.949238578680203, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6205, - "eval_samples_per_second": 273.962, - "eval_steps_per_second": 4.835, - "step": 6336 - }, - { - "epoch": 67.0, - "grad_norm": 0.00021609271061606705, - "learning_rate": 1.65e-05, - "loss": 0.0005, - "step": 6432 - }, - { - "epoch": 67.0, - "eval_LOCATION_f1": 0.9378531073446328, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9120879120879121, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9491525423728814, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9545454545454546, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.08973436057567596, - "eval_overall_accuracy": 0.9897435897435898, - "eval_overall_f1": 0.9541984732824428, - "eval_overall_precision": 0.9517766497461929, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6173, - "eval_samples_per_second": 275.382, - "eval_steps_per_second": 4.86, - "step": 6432 - }, - { - "epoch": 68.0, - "grad_norm": 0.00027348866569809616, - "learning_rate": 1.6000000000000003e-05, - "loss": 0.0006, - "step": 6528 - }, - { - "epoch": 68.0, - "eval_LOCATION_f1": 0.9318181818181819, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9111111111111111, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9467787114845938, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9441340782122905, - "eval_ORGANIZATION_recall": 0.949438202247191, - "eval_PERSON_f1": 0.96875, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.96875, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.08785340934991837, - "eval_overall_accuracy": 0.9900134952766532, - "eval_overall_f1": 0.9505703422053231, - "eval_overall_precision": 0.9445843828715366, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6237, - "eval_samples_per_second": 272.578, - "eval_steps_per_second": 4.81, - "step": 6528 - }, - { - "epoch": 69.0, - "grad_norm": 0.0023548200260847807, - "learning_rate": 1.55e-05, - "loss": 0.0009, - "step": 6624 - }, - { - "epoch": 69.0, - "eval_LOCATION_f1": 0.9325842696629213, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9415041782729805, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9337016574585635, - "eval_ORGANIZATION_recall": 0.949438202247191, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09666956961154938, - "eval_overall_accuracy": 0.988663967611336, - "eval_overall_f1": 0.952020202020202, - "eval_overall_precision": 0.9425, - "eval_overall_recall": 0.9617346938775511, - "eval_runtime": 0.6177, - "eval_samples_per_second": 275.204, - "eval_steps_per_second": 4.857, - "step": 6624 - }, - { - "epoch": 70.0, - "grad_norm": 0.00021428128820843995, - "learning_rate": 1.5e-05, - "loss": 0.0002, - "step": 6720 - }, - { - "epoch": 70.0, - "eval_LOCATION_f1": 0.9162011173184358, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8817204301075269, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9441340782122906, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9388888888888889, - "eval_ORGANIZATION_recall": 0.949438202247191, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08946698158979416, - "eval_overall_accuracy": 0.9892037786774629, - "eval_overall_f1": 0.9494949494949495, - "eval_overall_precision": 0.94, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6203, - "eval_samples_per_second": 274.066, - "eval_steps_per_second": 4.836, - "step": 6720 - }, - { - "epoch": 71.0, - "grad_norm": 0.10202125459909439, - "learning_rate": 1.45e-05, - "loss": 0.0002, - "step": 6816 - }, - { - "epoch": 71.0, - "eval_LOCATION_f1": 0.9325842696629213, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.949438202247191, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.949438202247191, - "eval_ORGANIZATION_recall": 0.949438202247191, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.0858520120382309, - "eval_overall_accuracy": 0.9900134952766532, - "eval_overall_f1": 0.9556400506970849, - "eval_overall_precision": 0.9496221662468514, - "eval_overall_recall": 0.9617346938775511, - "eval_runtime": 0.6299, - "eval_samples_per_second": 269.863, - "eval_steps_per_second": 4.762, - "step": 6816 - }, - { - "epoch": 72.0, - "grad_norm": 0.09070995450019836, - "learning_rate": 1.4000000000000001e-05, - "loss": 0.0002, - "step": 6912 - }, - { - "epoch": 72.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9510086455331412, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9763313609467456, - "eval_ORGANIZATION_recall": 0.9269662921348315, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.10603731870651245, - "eval_overall_accuracy": 0.988663967611336, - "eval_overall_f1": 0.9525032092426188, - "eval_overall_precision": 0.958656330749354, - "eval_overall_recall": 0.9464285714285714, - "eval_runtime": 0.6211, - "eval_samples_per_second": 273.721, - "eval_steps_per_second": 4.83, - "step": 6912 - }, - { - "epoch": 73.0, - "grad_norm": 0.0006867772899568081, - "learning_rate": 1.3500000000000001e-05, - "loss": 0.0014, - "step": 7008 - }, - { - "epoch": 73.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.942857142857143, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9593023255813954, - "eval_ORGANIZATION_recall": 0.9269662921348315, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.09222256392240524, - "eval_overall_accuracy": 0.9889338731443995, - "eval_overall_f1": 0.9488491048593349, - "eval_overall_precision": 0.9512820512820512, - "eval_overall_recall": 0.9464285714285714, - "eval_runtime": 0.6161, - "eval_samples_per_second": 275.908, - "eval_steps_per_second": 4.869, - "step": 7008 - }, - { - "epoch": 74.0, - "grad_norm": 0.0002241921320091933, - "learning_rate": 1.3000000000000001e-05, - "loss": 0.0002, - "step": 7104 - }, - { - "epoch": 74.0, - "eval_LOCATION_f1": 0.9101123595505618, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.8804347826086957, - "eval_LOCATION_recall": 0.9418604651162791, - "eval_ORGANIZATION_f1": 0.9307479224376731, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9180327868852459, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08940228819847107, - "eval_overall_accuracy": 0.9875843454790824, - "eval_overall_f1": 0.9420654911838792, - "eval_overall_precision": 0.9303482587064676, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6348, - "eval_samples_per_second": 267.81, - "eval_steps_per_second": 4.726, - "step": 7104 - }, - { - "epoch": 75.0, - "grad_norm": 0.00044650197378359735, - "learning_rate": 1.25e-05, - "loss": 0.0006, - "step": 7200 - }, - { - "epoch": 75.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9512893982808023, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9707602339181286, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09128385782241821, - "eval_overall_accuracy": 0.9883940620782726, - "eval_overall_f1": 0.9551856594110116, - "eval_overall_precision": 0.9588688946015425, - "eval_overall_recall": 0.951530612244898, - "eval_runtime": 0.6124, - "eval_samples_per_second": 277.584, - "eval_steps_per_second": 4.899, - "step": 7200 - }, - { - "epoch": 76.0, - "grad_norm": 0.000292993790935725, - "learning_rate": 1.2e-05, - "loss": 0.0001, - "step": 7296 - }, - { - "epoch": 76.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9512893982808023, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9707602339181286, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09170693904161453, - "eval_overall_accuracy": 0.9883940620782726, - "eval_overall_f1": 0.9551856594110116, - "eval_overall_precision": 0.9588688946015425, - "eval_overall_recall": 0.951530612244898, - "eval_runtime": 0.6146, - "eval_samples_per_second": 276.609, - "eval_steps_per_second": 4.881, - "step": 7296 - }, - { - "epoch": 77.0, - "grad_norm": 0.00019047647947445512, - "learning_rate": 1.1500000000000002e-05, - "loss": 0.0002, - "step": 7392 - }, - { - "epoch": 77.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9482758620689655, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9705882352941176, - "eval_ORGANIZATION_recall": 0.9269662921348315, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09116268157958984, - "eval_overall_accuracy": 0.9881241565452091, - "eval_overall_f1": 0.9538461538461539, - "eval_overall_precision": 0.9587628865979382, - "eval_overall_recall": 0.9489795918367347, - "eval_runtime": 0.6263, - "eval_samples_per_second": 271.453, - "eval_steps_per_second": 4.79, - "step": 7392 - }, - { - "epoch": 78.0, - "grad_norm": 0.00014276170986704528, - "learning_rate": 1.1000000000000001e-05, - "loss": 0.0005, - "step": 7488 - }, - { - "epoch": 78.0, - "eval_LOCATION_f1": 0.9378531073446328, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9120879120879121, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9545454545454545, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9655172413793104, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08652422577142715, - "eval_overall_accuracy": 0.9900134952766532, - "eval_overall_f1": 0.9591836734693877, - "eval_overall_precision": 0.9591836734693877, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6082, - "eval_samples_per_second": 279.516, - "eval_steps_per_second": 4.933, - "step": 7488 - }, - { - "epoch": 79.0, - "grad_norm": 0.0016763107851147652, - "learning_rate": 1.05e-05, - "loss": 0.0006, - "step": 7584 - }, - { - "epoch": 79.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9570200573065902, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9766081871345029, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9765625, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9765625, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09329984337091446, - "eval_overall_accuracy": 0.9889338731443995, - "eval_overall_f1": 0.9565217391304348, - "eval_overall_precision": 0.958974358974359, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6441, - "eval_samples_per_second": 263.927, - "eval_steps_per_second": 4.658, - "step": 7584 - }, - { - "epoch": 80.0, - "grad_norm": 0.0002261540648760274, - "learning_rate": 1e-05, - "loss": 0.0002, - "step": 7680 - }, - { - "epoch": 80.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9572649572649572, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9710982658959537, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08884274959564209, - "eval_overall_accuracy": 0.9892037786774629, - "eval_overall_f1": 0.9578544061302682, - "eval_overall_precision": 0.959079283887468, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6274, - "eval_samples_per_second": 270.946, - "eval_steps_per_second": 4.781, - "step": 7680 - }, - { - "epoch": 81.0, - "grad_norm": 0.0001378485030727461, - "learning_rate": 9.5e-06, - "loss": 0.0002, - "step": 7776 - }, - { - "epoch": 81.0, - "eval_LOCATION_f1": 0.9325842696629213, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9597701149425286, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9823529411764705, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09848271310329437, - "eval_overall_accuracy": 0.9889338731443995, - "eval_overall_f1": 0.9603072983354674, - "eval_overall_precision": 0.9640102827763496, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6212, - "eval_samples_per_second": 273.66, - "eval_steps_per_second": 4.829, - "step": 7776 - }, - { - "epoch": 82.0, - "grad_norm": 0.0001702494773780927, - "learning_rate": 9e-06, - "loss": 0.0002, - "step": 7872 - }, - { - "epoch": 82.0, - "eval_LOCATION_f1": 0.9378531073446328, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9120879120879121, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9597701149425286, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9823529411764705, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09399879723787308, - "eval_overall_accuracy": 0.9889338731443995, - "eval_overall_f1": 0.9615384615384616, - "eval_overall_precision": 0.9664948453608248, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6254, - "eval_samples_per_second": 271.818, - "eval_steps_per_second": 4.797, - "step": 7872 - }, - { - "epoch": 83.0, - "grad_norm": 0.0005685426294803619, - "learning_rate": 8.500000000000002e-06, - "loss": 0.0002, - "step": 7968 - }, - { - "epoch": 83.0, - "eval_LOCATION_f1": 0.9318181818181819, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9111111111111111, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9602272727272727, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9712643678160919, - "eval_ORGANIZATION_recall": 0.949438202247191, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09204275161027908, - "eval_overall_accuracy": 0.9900134952766532, - "eval_overall_f1": 0.9604086845466155, - "eval_overall_precision": 0.9616368286445013, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6175, - "eval_samples_per_second": 275.294, - "eval_steps_per_second": 4.858, - "step": 7968 - }, - { - "epoch": 84.0, - "grad_norm": 8.941295527620241e-05, - "learning_rate": 8.000000000000001e-06, - "loss": 0.0001, - "step": 8064 - }, - { - "epoch": 84.0, - "eval_LOCATION_f1": 0.9318181818181819, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9111111111111111, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9602272727272727, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9712643678160919, - "eval_ORGANIZATION_recall": 0.949438202247191, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09195154905319214, - "eval_overall_accuracy": 0.9902834008097166, - "eval_overall_f1": 0.9604086845466155, - "eval_overall_precision": 0.9616368286445013, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6245, - "eval_samples_per_second": 272.23, - "eval_steps_per_second": 4.804, - "step": 8064 - }, - { - "epoch": 85.0, - "grad_norm": 0.00026850702124647796, - "learning_rate": 7.5e-06, - "loss": 0.0002, - "step": 8160 - }, - { - "epoch": 85.0, - "eval_LOCATION_f1": 0.9325842696629213, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9572649572649572, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9710982658959537, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09229335933923721, - "eval_overall_accuracy": 0.9900134952766532, - "eval_overall_f1": 0.9591836734693877, - "eval_overall_precision": 0.9591836734693877, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6206, - "eval_samples_per_second": 273.932, - "eval_steps_per_second": 4.834, - "step": 8160 - }, - { - "epoch": 86.0, - "grad_norm": 8.95110861165449e-05, - "learning_rate": 7.000000000000001e-06, - "loss": 0.0002, - "step": 8256 - }, - { - "epoch": 86.0, - "eval_LOCATION_f1": 0.9325842696629213, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9572649572649572, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9710982658959537, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09191098809242249, - "eval_overall_accuracy": 0.9902834008097166, - "eval_overall_f1": 0.9591836734693877, - "eval_overall_precision": 0.9591836734693877, - "eval_overall_recall": 0.9591836734693877, - "eval_runtime": 0.6242, - "eval_samples_per_second": 272.347, - "eval_steps_per_second": 4.806, - "step": 8256 - }, - { - "epoch": 87.0, - "grad_norm": 0.00012342970876488835, - "learning_rate": 6.5000000000000004e-06, - "loss": 0.0002, - "step": 8352 - }, - { - "epoch": 87.0, - "eval_LOCATION_f1": 0.9378531073446328, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9120879120879121, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.956772334293948, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9822485207100592, - "eval_ORGANIZATION_recall": 0.9325842696629213, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.10022114217281342, - "eval_overall_accuracy": 0.9889338731443995, - "eval_overall_f1": 0.9602053915275995, - "eval_overall_precision": 0.9664082687338501, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6298, - "eval_samples_per_second": 269.93, - "eval_steps_per_second": 4.763, - "step": 8352 - }, - { - "epoch": 88.0, - "grad_norm": 0.00012417102698236704, - "learning_rate": 6e-06, - "loss": 0.0002, - "step": 8448 - }, - { - "epoch": 88.0, - "eval_LOCATION_f1": 0.9325842696629213, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9575070821529745, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9657142857142857, - "eval_ORGANIZATION_recall": 0.949438202247191, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08628383278846741, - "eval_overall_accuracy": 0.99055330634278, - "eval_overall_f1": 0.9592875318066159, - "eval_overall_precision": 0.9568527918781726, - "eval_overall_recall": 0.9617346938775511, - "eval_runtime": 0.6195, - "eval_samples_per_second": 274.403, - "eval_steps_per_second": 4.842, - "step": 8448 - }, - { - "epoch": 89.0, - "grad_norm": 0.00016193394549190998, - "learning_rate": 5.500000000000001e-06, - "loss": 0.0002, - "step": 8544 - }, - { - "epoch": 89.0, - "eval_LOCATION_f1": 0.9325842696629213, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9021739130434783, - "eval_LOCATION_recall": 0.9651162790697675, - "eval_ORGANIZATION_f1": 0.9575070821529745, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9657142857142857, - "eval_ORGANIZATION_recall": 0.949438202247191, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08694127202033997, - "eval_overall_accuracy": 0.99055330634278, - "eval_overall_f1": 0.9592875318066159, - "eval_overall_precision": 0.9568527918781726, - "eval_overall_recall": 0.9617346938775511, - "eval_runtime": 0.6125, - "eval_samples_per_second": 277.536, - "eval_steps_per_second": 4.898, - "step": 8544 - }, - { - "epoch": 90.0, - "grad_norm": 0.00011703658674377948, - "learning_rate": 5e-06, - "loss": 0.0002, - "step": 8640 - }, - { - "epoch": 90.0, - "eval_LOCATION_f1": 0.9318181818181819, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9111111111111111, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9575070821529745, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9657142857142857, - "eval_ORGANIZATION_recall": 0.949438202247191, - "eval_PERSON_f1": 0.9725490196078432, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.9763779527559056, - "eval_PERSON_recall": 0.96875, - "eval_loss": 0.08616501092910767, - "eval_overall_accuracy": 0.99055330634278, - "eval_overall_f1": 0.9566326530612245, - "eval_overall_precision": 0.9566326530612245, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6273, - "eval_samples_per_second": 271.011, - "eval_steps_per_second": 4.783, - "step": 8640 - }, - { - "epoch": 91.0, - "grad_norm": 0.00015127098595257849, - "learning_rate": 4.5e-06, - "loss": 0.0004, - "step": 8736 - }, - { - "epoch": 91.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9488636363636365, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9597701149425287, - "eval_ORGANIZATION_recall": 0.9382022471910112, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09131529182195663, - "eval_overall_accuracy": 0.9892037786774629, - "eval_overall_f1": 0.9540816326530612, - "eval_overall_precision": 0.9540816326530612, - "eval_overall_recall": 0.9540816326530612, - "eval_runtime": 0.6102, - "eval_samples_per_second": 278.592, - "eval_steps_per_second": 4.916, - "step": 8736 - }, - { - "epoch": 92.0, - "grad_norm": 0.0004967550048604608, - "learning_rate": 4.000000000000001e-06, - "loss": 0.0002, - "step": 8832 - }, - { - "epoch": 92.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9659090909090908, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9770114942528736, - "eval_ORGANIZATION_recall": 0.9550561797752809, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09077779948711395, - "eval_overall_accuracy": 0.9900134952766532, - "eval_overall_f1": 0.9617346938775511, - "eval_overall_precision": 0.9617346938775511, - "eval_overall_recall": 0.9617346938775511, - "eval_runtime": 0.6856, - "eval_samples_per_second": 247.968, - "eval_steps_per_second": 4.376, - "step": 8832 - }, - { - "epoch": 93.0, - "grad_norm": 0.00010416532313684002, - "learning_rate": 3.5000000000000004e-06, - "loss": 0.0007, - "step": 8928 - }, - { - "epoch": 93.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9659090909090908, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9770114942528736, - "eval_ORGANIZATION_recall": 0.9550561797752809, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.09203241765499115, - "eval_overall_accuracy": 0.9900134952766532, - "eval_overall_f1": 0.9617346938775511, - "eval_overall_precision": 0.9617346938775511, - "eval_overall_recall": 0.9617346938775511, - "eval_runtime": 0.6328, - "eval_samples_per_second": 268.661, - "eval_steps_per_second": 4.741, - "step": 8928 - }, - { - "epoch": 94.0, - "grad_norm": 0.0006368375616148114, - "learning_rate": 3e-06, - "loss": 0.0005, - "step": 9024 - }, - { - "epoch": 94.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9572649572649572, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9710982658959537, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08861399441957474, - "eval_overall_accuracy": 0.9897435897435898, - "eval_overall_f1": 0.9578544061302682, - "eval_overall_precision": 0.959079283887468, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6208, - "eval_samples_per_second": 273.851, - "eval_steps_per_second": 4.833, - "step": 9024 - }, - { - "epoch": 95.0, - "grad_norm": 0.001301290700212121, - "learning_rate": 2.5e-06, - "loss": 0.0002, - "step": 9120 - }, - { - "epoch": 95.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9572649572649572, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9710982658959537, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08977135270833969, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9578544061302682, - "eval_overall_precision": 0.959079283887468, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6255, - "eval_samples_per_second": 271.773, - "eval_steps_per_second": 4.796, - "step": 9120 - }, - { - "epoch": 96.0, - "grad_norm": 0.00017601429135538638, - "learning_rate": 2.0000000000000003e-06, - "loss": 0.0002, - "step": 9216 - }, - { - "epoch": 96.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9572649572649572, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9710982658959537, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08961812406778336, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9578544061302682, - "eval_overall_precision": 0.959079283887468, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6231, - "eval_samples_per_second": 272.836, - "eval_steps_per_second": 4.815, - "step": 9216 - }, - { - "epoch": 97.0, - "grad_norm": 7.052494038362056e-05, - "learning_rate": 1.5e-06, - "loss": 0.0002, - "step": 9312 - }, - { - "epoch": 97.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9572649572649572, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9710982658959537, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08967098593711853, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9578544061302682, - "eval_overall_precision": 0.959079283887468, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.617, - "eval_samples_per_second": 275.505, - "eval_steps_per_second": 4.862, - "step": 9312 - }, - { - "epoch": 98.0, - "grad_norm": 0.00025956094032153487, - "learning_rate": 1.0000000000000002e-06, - "loss": 0.0001, - "step": 9408 - }, - { - "epoch": 98.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9572649572649572, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9710982658959537, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08968637883663177, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9578544061302682, - "eval_overall_precision": 0.959079283887468, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6096, - "eval_samples_per_second": 278.858, - "eval_steps_per_second": 4.921, - "step": 9408 - }, - { - "epoch": 99.0, - "grad_norm": 0.00043740239925682545, - "learning_rate": 5.000000000000001e-07, - "loss": 0.0001, - "step": 9504 - }, - { - "epoch": 99.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9572649572649572, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9710982658959537, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08951318264007568, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9578544061302682, - "eval_overall_precision": 0.959079283887468, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6177, - "eval_samples_per_second": 275.197, - "eval_steps_per_second": 4.856, - "step": 9504 - }, - { - "epoch": 100.0, - "grad_norm": 0.0001184117209049873, - "learning_rate": 0.0, - "loss": 0.0001, - "step": 9600 - }, - { - "epoch": 100.0, - "eval_LOCATION_f1": 0.9265536723163842, - "eval_LOCATION_number": 86, - "eval_LOCATION_precision": 0.9010989010989011, - "eval_LOCATION_recall": 0.9534883720930233, - "eval_ORGANIZATION_f1": 0.9572649572649572, - "eval_ORGANIZATION_number": 178, - "eval_ORGANIZATION_precision": 0.9710982658959537, - "eval_ORGANIZATION_recall": 0.9438202247191011, - "eval_PERSON_f1": 0.9803921568627452, - "eval_PERSON_number": 128, - "eval_PERSON_precision": 0.984251968503937, - "eval_PERSON_recall": 0.9765625, - "eval_loss": 0.08944942057132721, - "eval_overall_accuracy": 0.9894736842105263, - "eval_overall_f1": 0.9578544061302682, - "eval_overall_precision": 0.959079283887468, - "eval_overall_recall": 0.9566326530612245, - "eval_runtime": 0.6326, - "eval_samples_per_second": 268.753, - "eval_steps_per_second": 4.743, - "step": 9600 - }, - { - "epoch": 100.0, - "step": 9600, - "total_flos": 3867927199316004.0, - "train_loss": 0.005222862970840652, - "train_runtime": 2315.2948, - "train_samples_per_second": 66.125, - "train_steps_per_second": 4.146 + "epoch": 5.0, + "step": 480, + "total_flos": 193448468569026.0, + "train_loss": 0.06961918647090594, + "train_runtime": 225.7241, + "train_samples_per_second": 33.913, + "train_steps_per_second": 2.126 } ], "logging_steps": 500, - "max_steps": 9600, + "max_steps": 480, "num_input_tokens_seen": 0, - "num_train_epochs": 100, + "num_train_epochs": 5, "save_steps": 500, - "total_flos": 3867927199316004.0, + "total_flos": 193448468569026.0, "train_batch_size": 16, "trial_name": null, "trial_params": null