diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -10,1497 +10,1497 @@ "log_history": [ { "epoch": 1.0, - "grad_norm": 1.1324371099472046, + "grad_norm": 1.0955090522766113, "learning_rate": 4.9500000000000004e-05, - "loss": 0.8428, + "loss": 0.8608, "step": 96 }, { "epoch": 1.0, - "eval_LOCATION_f1": 0.07272727272727272, - "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.23529411764705882, - "eval_LOCATION_recall": 0.043010752688172046, - "eval_ORGANIZATION_f1": 0.2706766917293233, - "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.2317596566523605, - "eval_ORGANIZATION_recall": 0.3253012048192771, - "eval_PERSON_f1": 0.31309904153354634, - "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.28654970760233917, - "eval_PERSON_recall": 0.34507042253521125, - "eval_loss": 0.3884907364845276, - "eval_overall_accuracy": 0.869684499314129, - "eval_overall_f1": 0.2603406326034063, - "eval_overall_precision": 0.25415676959619954, - "eval_overall_recall": 0.26683291770573564, - "eval_runtime": 0.6812, - "eval_samples_per_second": 249.575, - "eval_steps_per_second": 4.404, + "eval_LOCATION_f1": 0.03960396039603961, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.25, + "eval_LOCATION_recall": 0.021505376344086023, + "eval_ORGANIZATION_f1": 0.22727272727272724, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.21505376344086022, + "eval_ORGANIZATION_recall": 0.24096385542168675, + "eval_PERSON_f1": 0.2815884476534296, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.28888888888888886, + "eval_PERSON_recall": 0.2746478873239437, + "eval_loss": 0.3943306505680084, + "eval_overall_accuracy": 0.8650205761316873, + "eval_overall_f1": 0.2219178082191781, + "eval_overall_precision": 0.24620060790273557, + "eval_overall_recall": 0.20199501246882792, + "eval_runtime": 0.2929, + "eval_samples_per_second": 580.415, + "eval_steps_per_second": 10.243, "step": 96 }, { "epoch": 2.0, - "grad_norm": 0.9713420867919922, + "grad_norm": 0.8960049152374268, "learning_rate": 4.9e-05, - "loss": 0.3617, + "loss": 0.3625, "step": 192 }, { "epoch": 2.0, - "eval_LOCATION_f1": 0.3596491228070176, - "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.3037037037037037, - "eval_LOCATION_recall": 0.44086021505376344, - "eval_ORGANIZATION_f1": 0.5814696485623003, - "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.6190476190476191, - "eval_ORGANIZATION_recall": 0.5481927710843374, - "eval_PERSON_f1": 0.5706051873198846, - "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.48292682926829267, - "eval_PERSON_recall": 0.6971830985915493, - "eval_loss": 0.21714521944522858, - "eval_overall_accuracy": 0.9316872427983539, - "eval_overall_f1": 0.5202702702702703, - "eval_overall_precision": 0.47433264887063653, - "eval_overall_recall": 0.5760598503740648, - "eval_runtime": 0.636, - "eval_samples_per_second": 267.295, - "eval_steps_per_second": 4.717, + "eval_LOCATION_f1": 0.4050632911392405, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.3333333333333333, + "eval_LOCATION_recall": 0.5161290322580645, + "eval_ORGANIZATION_f1": 0.6056782334384858, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.6357615894039735, + "eval_ORGANIZATION_recall": 0.5783132530120482, + "eval_PERSON_f1": 0.6726726726726726, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.5863874345549738, + "eval_PERSON_recall": 0.7887323943661971, + "eval_loss": 0.21519558131694794, + "eval_overall_accuracy": 0.9404663923182441, + "eval_overall_f1": 0.5772266065388952, + "eval_overall_precision": 0.5267489711934157, + "eval_overall_recall": 0.6384039900249376, + "eval_runtime": 0.2904, + "eval_samples_per_second": 585.432, + "eval_steps_per_second": 10.331, "step": 192 }, { "epoch": 3.0, - "grad_norm": 0.8544023036956787, + "grad_norm": 0.8648101687431335, "learning_rate": 4.85e-05, - "loss": 0.1955, + "loss": 0.1958, "step": 288 }, { "epoch": 3.0, - "eval_LOCATION_f1": 0.7727272727272727, - "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8192771084337349, - "eval_LOCATION_recall": 0.7311827956989247, - "eval_ORGANIZATION_f1": 0.7717391304347826, - "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.7029702970297029, - "eval_ORGANIZATION_recall": 0.8554216867469879, - "eval_PERSON_f1": 0.968421052631579, - "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.965034965034965, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.1000150665640831, - "eval_overall_accuracy": 0.9681755829903979, - "eval_overall_f1": 0.8395657418576598, - "eval_overall_precision": 0.8130841121495327, - "eval_overall_recall": 0.8678304239401496, - "eval_runtime": 0.6673, - "eval_samples_per_second": 254.774, - "eval_steps_per_second": 4.496, + "eval_LOCATION_f1": 0.7909604519774012, + "eval_LOCATION_number": 93, + "eval_LOCATION_precision": 0.8333333333333334, + "eval_LOCATION_recall": 0.7526881720430108, + "eval_ORGANIZATION_f1": 0.7342465753424658, + "eval_ORGANIZATION_number": 166, + "eval_ORGANIZATION_precision": 0.6733668341708543, + "eval_ORGANIZATION_recall": 0.8072289156626506, + "eval_PERSON_f1": 0.9655172413793103, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9459459459459459, + "eval_PERSON_recall": 0.9859154929577465, + "eval_loss": 0.09694702178239822, + "eval_overall_accuracy": 0.9676268861454047, + "eval_overall_f1": 0.826923076923077, + "eval_overall_precision": 0.7981438515081206, + "eval_overall_recall": 0.85785536159601, + "eval_runtime": 0.2959, + "eval_samples_per_second": 574.554, + "eval_steps_per_second": 10.139, "step": 288 }, { "epoch": 4.0, - "grad_norm": 1.1749948263168335, + "grad_norm": 1.2493984699249268, "learning_rate": 4.8e-05, - "loss": 0.1335, + "loss": 0.1348, "step": 384 }, { "epoch": 4.0, - "eval_LOCATION_f1": 0.7959183673469389, + "eval_LOCATION_f1": 0.7536231884057972, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.7572815533980582, + "eval_LOCATION_precision": 0.6842105263157895, "eval_LOCATION_recall": 0.8387096774193549, - "eval_ORGANIZATION_f1": 0.8109589041095889, + "eval_ORGANIZATION_f1": 0.8033240997229917, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.7437185929648241, - "eval_ORGANIZATION_recall": 0.891566265060241, - "eval_PERSON_f1": 0.9652777777777778, - "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.952054794520548, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.08597932755947113, - "eval_overall_accuracy": 0.9714677640603566, - "eval_overall_f1": 0.8598351001177856, - "eval_overall_precision": 0.8147321428571429, - "eval_overall_recall": 0.9102244389027432, - "eval_runtime": 0.6604, - "eval_samples_per_second": 257.436, - "eval_steps_per_second": 4.543, + "eval_ORGANIZATION_precision": 0.7435897435897436, + "eval_ORGANIZATION_recall": 0.8734939759036144, + "eval_PERSON_f1": 0.979020979020979, + "eval_PERSON_number": 142, + "eval_PERSON_precision": 0.9722222222222222, + "eval_PERSON_recall": 0.9859154929577465, + "eval_loss": 0.08365878462791443, + "eval_overall_accuracy": 0.9711934156378601, + "eval_overall_f1": 0.8501170960187353, + "eval_overall_precision": 0.8013245033112583, + "eval_overall_recall": 0.9052369077306733, + "eval_runtime": 0.2924, + "eval_samples_per_second": 581.361, + "eval_steps_per_second": 10.259, "step": 384 }, { "epoch": 5.0, - "grad_norm": 0.6283699870109558, + "grad_norm": 0.48399850726127625, "learning_rate": 4.75e-05, - "loss": 0.1071, + "loss": 0.1081, "step": 480 }, { "epoch": 5.0, - "eval_LOCATION_f1": 0.8686868686868686, + "eval_LOCATION_f1": 0.8633879781420764, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.819047619047619, - "eval_LOCATION_recall": 0.9247311827956989, - "eval_ORGANIZATION_f1": 0.880952380952381, + "eval_LOCATION_precision": 0.8777777777777778, + "eval_LOCATION_recall": 0.8494623655913979, + "eval_ORGANIZATION_f1": 0.8563049853372433, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.8705882352941177, - "eval_ORGANIZATION_recall": 0.891566265060241, - "eval_PERSON_f1": 0.9754385964912281, + "eval_ORGANIZATION_precision": 0.8342857142857143, + "eval_ORGANIZATION_recall": 0.8795180722891566, + "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.972027972027972, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.06288647651672363, - "eval_overall_accuracy": 0.9813443072702333, - "eval_overall_f1": 0.910866910866911, - "eval_overall_precision": 0.8923444976076556, - "eval_overall_recall": 0.9301745635910225, - "eval_runtime": 0.6328, - "eval_samples_per_second": 268.66, - "eval_steps_per_second": 4.741, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06440697610378265, + "eval_overall_accuracy": 0.9807956104252401, + "eval_overall_f1": 0.8985148514851485, + "eval_overall_precision": 0.8918918918918919, + "eval_overall_recall": 0.9052369077306733, + "eval_runtime": 0.2962, + "eval_samples_per_second": 573.926, + "eval_steps_per_second": 10.128, "step": 480 }, { "epoch": 6.0, - "grad_norm": 0.7255609035491943, + "grad_norm": 0.6719767451286316, "learning_rate": 4.7e-05, - "loss": 0.0956, + "loss": 0.0952, "step": 576 }, { "epoch": 6.0, - "eval_LOCATION_f1": 0.8682926829268293, + "eval_LOCATION_f1": 0.8195121951219512, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.7946428571428571, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.8875739644970415, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.9032258064516129, + "eval_ORGANIZATION_f1": 0.8580060422960725, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.872093023255814, - "eval_ORGANIZATION_recall": 0.9036144578313253, + "eval_ORGANIZATION_precision": 0.8606060606060606, + "eval_ORGANIZATION_recall": 0.8554216867469879, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.053115665912628174, - "eval_overall_accuracy": 0.9829903978052126, - "eval_overall_f1": 0.9152542372881356, - "eval_overall_precision": 0.8894117647058823, - "eval_overall_recall": 0.942643391521197, - "eval_runtime": 0.627, - "eval_samples_per_second": 271.122, - "eval_steps_per_second": 4.785, + "eval_loss": 0.05366777628660202, + "eval_overall_accuracy": 0.9824417009602194, + "eval_overall_f1": 0.8913308913308914, + "eval_overall_precision": 0.8732057416267942, + "eval_overall_recall": 0.9102244389027432, + "eval_runtime": 0.2937, + "eval_samples_per_second": 578.83, + "eval_steps_per_second": 10.215, "step": 576 }, { "epoch": 7.0, - "grad_norm": 1.3008713722229004, + "grad_norm": 1.178981900215149, "learning_rate": 4.6500000000000005e-05, - "loss": 0.083, + "loss": 0.0875, "step": 672 }, { "epoch": 7.0, - "eval_LOCATION_f1": 0.8663101604278075, + "eval_LOCATION_f1": 0.849740932642487, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8617021276595744, - "eval_LOCATION_recall": 0.8709677419354839, - "eval_ORGANIZATION_f1": 0.8725212464589235, + "eval_LOCATION_precision": 0.82, + "eval_LOCATION_recall": 0.8817204301075269, + "eval_ORGANIZATION_f1": 0.888888888888889, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.8235294117647058, - "eval_ORGANIZATION_recall": 0.927710843373494, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.8432432432432433, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_precision": 0.9788732394366197, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05037233233451843, - "eval_overall_accuracy": 0.9832647462277092, - "eval_overall_f1": 0.9088699878493317, - "eval_overall_precision": 0.8862559241706162, - "eval_overall_recall": 0.9326683291770573, - "eval_runtime": 0.6164, - "eval_samples_per_second": 275.782, - "eval_steps_per_second": 4.867, + "eval_loss": 0.05959905683994293, + "eval_overall_accuracy": 0.9816186556927298, + "eval_overall_f1": 0.9106280193236714, + "eval_overall_precision": 0.882903981264637, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.2926, + "eval_samples_per_second": 581.049, + "eval_steps_per_second": 10.254, "step": 672 }, { "epoch": 8.0, - "grad_norm": 0.7086864113807678, + "grad_norm": 0.6990514993667603, "learning_rate": 4.600000000000001e-05, - "loss": 0.0784, + "loss": 0.073, "step": 768 }, { "epoch": 8.0, - "eval_LOCATION_f1": 0.8502415458937198, + "eval_LOCATION_f1": 0.883248730964467, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.7719298245614035, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.8753799392097266, + "eval_LOCATION_precision": 0.8365384615384616, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9032258064516129, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.8834355828220859, - "eval_ORGANIZATION_recall": 0.8674698795180723, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.88, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_precision": 0.9788732394366197, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05147531256079674, - "eval_overall_accuracy": 0.9818930041152263, - "eval_overall_f1": 0.9059829059829061, - "eval_overall_precision": 0.8875598086124402, - "eval_overall_recall": 0.9251870324189526, - "eval_runtime": 0.626, - "eval_samples_per_second": 271.558, - "eval_steps_per_second": 4.792, + "eval_loss": 0.05377694219350815, + "eval_overall_accuracy": 0.9835390946502057, + "eval_overall_f1": 0.9245742092457422, + "eval_overall_precision": 0.9026128266033254, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.2917, + "eval_samples_per_second": 582.8, + "eval_steps_per_second": 10.285, "step": 768 }, { "epoch": 9.0, - "grad_norm": 0.5022886395454407, + "grad_norm": 0.3934107720851898, "learning_rate": 4.55e-05, - "loss": 0.072, + "loss": 0.0696, "step": 864 }, { "epoch": 9.0, - "eval_LOCATION_f1": 0.8958333333333334, + "eval_LOCATION_f1": 0.8900523560209423, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8686868686868687, - "eval_LOCATION_recall": 0.9247311827956989, - "eval_ORGANIZATION_f1": 0.9009009009009009, + "eval_LOCATION_precision": 0.8673469387755102, + "eval_LOCATION_recall": 0.9139784946236559, + "eval_ORGANIZATION_f1": 0.88955223880597, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.8982035928143712, - "eval_ORGANIZATION_recall": 0.9036144578313253, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.8816568047337278, + "eval_ORGANIZATION_recall": 0.8975903614457831, + "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.04265189915895462, - "eval_overall_accuracy": 0.9860082304526749, - "eval_overall_f1": 0.9282178217821783, - "eval_overall_precision": 0.9213759213759214, - "eval_overall_recall": 0.9351620947630923, - "eval_runtime": 0.6359, - "eval_samples_per_second": 267.337, - "eval_steps_per_second": 4.718, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.04754691570997238, + "eval_overall_accuracy": 0.9857338820301783, + "eval_overall_f1": 0.9185185185185184, + "eval_overall_precision": 0.9095354523227384, + "eval_overall_recall": 0.9276807980049875, + "eval_runtime": 0.2908, + "eval_samples_per_second": 584.625, + "eval_steps_per_second": 10.317, "step": 864 }, { "epoch": 10.0, - "grad_norm": 0.7925252318382263, + "grad_norm": 0.8873964548110962, "learning_rate": 4.5e-05, - "loss": 0.0688, + "loss": 0.064, "step": 960 }, { "epoch": 10.0, - "eval_LOCATION_f1": 0.89, + "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8317757009345794, + "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9107142857142857, + "eval_ORGANIZATION_f1": 0.9179331306990882, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.968421052631579, + "eval_ORGANIZATION_precision": 0.9263803680981595, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.9754385964912281, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.965034965034965, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.046313099563121796, - "eval_overall_accuracy": 0.9849108367626886, - "eval_overall_f1": 0.9257003654080391, - "eval_overall_precision": 0.9047619047619048, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.635, - "eval_samples_per_second": 267.722, - "eval_steps_per_second": 4.725, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.052041731774806976, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9392812887236679, + "eval_overall_precision": 0.9334975369458128, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2893, + "eval_samples_per_second": 587.659, + "eval_steps_per_second": 10.37, "step": 960 }, { "epoch": 11.0, - "grad_norm": 0.376903772354126, + "grad_norm": 0.5631869435310364, "learning_rate": 4.4500000000000004e-05, - "loss": 0.0621, + "loss": 0.0626, "step": 1056 }, { "epoch": 11.0, - "eval_LOCATION_f1": 0.914572864321608, + "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8584905660377359, - "eval_LOCATION_recall": 0.978494623655914, - "eval_ORGANIZATION_f1": 0.9174311926605505, + "eval_LOCATION_precision": 0.8811881188118812, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9169230769230768, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9316770186335404, - "eval_ORGANIZATION_recall": 0.9036144578313253, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9371069182389937, + "eval_ORGANIZATION_recall": 0.8975903614457831, + "eval_PERSON_f1": 0.968421052631579, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.04419618099927902, - "eval_overall_accuracy": 0.9860082304526749, - "eval_overall_f1": 0.9394313967861558, - "eval_overall_precision": 0.9313725490196079, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6308, - "eval_samples_per_second": 269.488, - "eval_steps_per_second": 4.756, + "eval_PERSON_precision": 0.965034965034965, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.043987125158309937, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9353233830845771, + "eval_overall_precision": 0.9330024813895782, + "eval_overall_recall": 0.9376558603491272, + "eval_runtime": 0.2913, + "eval_samples_per_second": 583.542, + "eval_steps_per_second": 10.298, "step": 1056 }, { "epoch": 12.0, - "grad_norm": 0.7413063645362854, + "grad_norm": 0.559025228023529, "learning_rate": 4.4000000000000006e-05, - "loss": 0.0595, + "loss": 0.0554, "step": 1152 }, { "epoch": 12.0, - "eval_LOCATION_f1": 0.9374999999999999, + "eval_LOCATION_f1": 0.9139784946236559, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9090909090909091, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9235474006116209, + "eval_LOCATION_precision": 0.9139784946236559, + "eval_LOCATION_recall": 0.9139784946236559, + "eval_ORGANIZATION_f1": 0.9058823529411766, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.937888198757764, - "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.8850574712643678, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9754385964912281, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.045279767364263535, - "eval_overall_accuracy": 0.9873799725651577, - "eval_overall_f1": 0.9451371571072319, - "eval_overall_precision": 0.9451371571072319, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.628, - "eval_samples_per_second": 270.712, - "eval_steps_per_second": 4.777, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.04547671601176262, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9321824907521579, + "eval_overall_precision": 0.9219512195121952, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2923, + "eval_samples_per_second": 581.688, + "eval_steps_per_second": 10.265, "step": 1152 }, { "epoch": 13.0, - "grad_norm": 0.575614869594574, + "grad_norm": 1.0373061895370483, "learning_rate": 4.35e-05, - "loss": 0.0576, + "loss": 0.0548, "step": 1248 }, { "epoch": 13.0, - "eval_LOCATION_f1": 0.91, + "eval_LOCATION_f1": 0.9197860962566845, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8504672897196262, - "eval_LOCATION_recall": 0.978494623655914, - "eval_ORGANIZATION_f1": 0.9268292682926829, + "eval_LOCATION_precision": 0.9148936170212766, + "eval_LOCATION_recall": 0.9247311827956989, + "eval_ORGANIZATION_f1": 0.9317507418397626, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9382716049382716, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9788732394366197, + "eval_ORGANIZATION_precision": 0.9181286549707602, + "eval_ORGANIZATION_recall": 0.9457831325301205, + "eval_PERSON_f1": 0.9754385964912281, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_precision": 0.972027972027972, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.04553345590829849, - "eval_overall_accuracy": 0.9868312757201646, - "eval_overall_f1": 0.9408866995073891, - "eval_overall_precision": 0.9294403892944039, + "eval_loss": 0.04833203926682472, + "eval_overall_accuracy": 0.988477366255144, + "eval_overall_f1": 0.9443757725587144, + "eval_overall_precision": 0.9362745098039216, "eval_overall_recall": 0.9526184538653366, - "eval_runtime": 0.6217, - "eval_samples_per_second": 273.463, - "eval_steps_per_second": 4.826, + "eval_runtime": 0.2913, + "eval_samples_per_second": 583.67, + "eval_steps_per_second": 10.3, "step": 1248 }, { "epoch": 14.0, - "grad_norm": 0.5367061495780945, + "grad_norm": 0.2547779381275177, "learning_rate": 4.3e-05, - "loss": 0.054, + "loss": 0.0504, "step": 1344 }, { "epoch": 14.0, - "eval_LOCATION_f1": 0.9361702127659575, + "eval_LOCATION_f1": 0.9247311827956989, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9263157894736842, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9212121212121213, + "eval_LOCATION_precision": 0.9247311827956989, + "eval_LOCATION_recall": 0.9247311827956989, + "eval_ORGANIZATION_f1": 0.9461077844311376, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.926829268292683, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.971830985915493, + "eval_ORGANIZATION_precision": 0.9404761904761905, + "eval_ORGANIZATION_recall": 0.9518072289156626, + "eval_PERSON_f1": 0.968421052631579, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_precision": 0.965034965034965, "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.041482534259557724, - "eval_overall_accuracy": 0.9879286694101509, - "eval_overall_f1": 0.942643391521197, - "eval_overall_precision": 0.942643391521197, - "eval_overall_recall": 0.942643391521197, - "eval_runtime": 0.6192, - "eval_samples_per_second": 274.558, - "eval_steps_per_second": 4.845, + "eval_loss": 0.04443012550473213, + "eval_overall_accuracy": 0.9895747599451303, + "eval_overall_f1": 0.9490683229813663, + "eval_overall_precision": 0.9455445544554455, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.2937, + "eval_samples_per_second": 578.827, + "eval_steps_per_second": 10.215, "step": 1344 }, { "epoch": 15.0, - "grad_norm": 0.7122671604156494, + "grad_norm": 0.5698651075363159, "learning_rate": 4.25e-05, - "loss": 0.0491, + "loss": 0.0494, "step": 1440 }, { "epoch": 15.0, - "eval_LOCATION_f1": 0.925531914893617, + "eval_LOCATION_f1": 0.9354838709677419, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9157894736842105, + "eval_LOCATION_precision": 0.9354838709677419, "eval_LOCATION_recall": 0.9354838709677419, - "eval_ORGANIZATION_f1": 0.9221556886227544, + "eval_ORGANIZATION_f1": 0.9317507418397626, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9166666666666666, - "eval_ORGANIZATION_recall": 0.927710843373494, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9181286549707602, + "eval_ORGANIZATION_recall": 0.9457831325301205, + "eval_PERSON_f1": 0.9754385964912281, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_precision": 0.972027972027972, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.036706916987895966, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.9440993788819876, - "eval_overall_precision": 0.9405940594059405, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6348, - "eval_samples_per_second": 267.784, - "eval_steps_per_second": 4.726, + "eval_loss": 0.042747754603624344, + "eval_overall_accuracy": 0.9887517146776406, + "eval_overall_f1": 0.9480198019801981, + "eval_overall_precision": 0.941031941031941, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.2936, + "eval_samples_per_second": 578.975, + "eval_steps_per_second": 10.217, "step": 1440 }, { "epoch": 16.0, - "grad_norm": 1.1978126764297485, + "grad_norm": 0.5443034172058105, "learning_rate": 4.2e-05, - "loss": 0.0468, + "loss": 0.0445, "step": 1536 }, { "epoch": 16.0, - "eval_LOCATION_f1": 0.9128205128205128, + "eval_LOCATION_f1": 0.8989898989898989, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8725490196078431, + "eval_LOCATION_precision": 0.8476190476190476, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9216867469879518, + "eval_ORGANIZATION_f1": 0.9123867069486404, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9216867469879518, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.968421052631579, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.04370429739356041, - "eval_overall_accuracy": 0.9865569272976681, - "eval_overall_f1": 0.9407407407407408, - "eval_overall_precision": 0.9315403422982885, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6383, - "eval_samples_per_second": 266.338, - "eval_steps_per_second": 4.7, + "eval_PERSON_precision": 0.965034965034965, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.049201611429452896, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9287469287469288, + "eval_overall_precision": 0.9152542372881356, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2919, + "eval_samples_per_second": 582.42, + "eval_steps_per_second": 10.278, "step": 1536 }, { "epoch": 17.0, - "grad_norm": 0.49218180775642395, + "grad_norm": 0.4349294602870941, "learning_rate": 4.15e-05, - "loss": 0.0466, + "loss": 0.0457, "step": 1632 }, { "epoch": 17.0, - "eval_LOCATION_f1": 0.9157894736842105, + "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8969072164948454, - "eval_LOCATION_recall": 0.9354838709677419, - "eval_ORGANIZATION_f1": 0.913946587537092, + "eval_LOCATION_precision": 0.898989898989899, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9397590361445783, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9005847953216374, - "eval_ORGANIZATION_recall": 0.927710843373494, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9397590361445783, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04270460084080696, - "eval_overall_accuracy": 0.9876543209876543, - "eval_overall_f1": 0.9358024691358026, - "eval_overall_precision": 0.9266503667481663, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6391, - "eval_samples_per_second": 265.99, - "eval_steps_per_second": 4.694, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.04454689472913742, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.9504950495049505, + "eval_overall_precision": 0.9434889434889435, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.2923, + "eval_samples_per_second": 581.575, + "eval_steps_per_second": 10.263, "step": 1632 }, { "epoch": 18.0, - "grad_norm": 0.3642722964286804, + "grad_norm": 0.2558753192424774, "learning_rate": 4.1e-05, - "loss": 0.0442, + "loss": 0.0433, "step": 1728 }, { "epoch": 18.0, - "eval_LOCATION_f1": 0.9263157894736843, + "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9072164948453608, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9151515151515153, + "eval_LOCATION_precision": 0.898989898989899, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9305135951661632, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9207317073170732, - "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9333333333333333, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.03979617729783058, - "eval_overall_accuracy": 0.9876543209876543, - "eval_overall_f1": 0.9389788293897883, - "eval_overall_precision": 0.9378109452736318, - "eval_overall_recall": 0.940149625935162, - "eval_runtime": 0.6398, - "eval_samples_per_second": 265.693, - "eval_steps_per_second": 4.689, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.05001579597592354, + "eval_overall_accuracy": 0.9895747599451303, + "eval_overall_f1": 0.946716232961586, + "eval_overall_precision": 0.9408866995073891, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.291, + "eval_samples_per_second": 584.152, + "eval_steps_per_second": 10.309, "step": 1728 }, { "epoch": 19.0, - "grad_norm": 1.037690281867981, + "grad_norm": 0.414883553981781, "learning_rate": 4.05e-05, - "loss": 0.0407, + "loss": 0.0395, "step": 1824 }, { "epoch": 19.0, - "eval_LOCATION_f1": 0.9430051813471503, + "eval_LOCATION_f1": 0.9304812834224598, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.91, - "eval_LOCATION_recall": 0.978494623655914, - "eval_ORGANIZATION_f1": 0.9263803680981595, + "eval_LOCATION_precision": 0.925531914893617, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9285714285714286, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.94375, - "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9176470588235294, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04469582438468933, - "eval_overall_accuracy": 0.9887517146776406, - "eval_overall_f1": 0.9476309226932669, - "eval_overall_precision": 0.9476309226932669, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6332, - "eval_samples_per_second": 268.466, - "eval_steps_per_second": 4.738, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.04901066794991493, + "eval_overall_accuracy": 0.9890260631001372, + "eval_overall_f1": 0.946716232961586, + "eval_overall_precision": 0.9408866995073891, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.2903, + "eval_samples_per_second": 585.592, + "eval_steps_per_second": 10.334, "step": 1824 }, { "epoch": 20.0, - "grad_norm": 0.3398281931877136, + "grad_norm": 0.6924071311950684, "learning_rate": 4e-05, - "loss": 0.0391, + "loss": 0.0375, "step": 1920 }, { "epoch": 20.0, - "eval_LOCATION_f1": 0.9045226130653267, + "eval_LOCATION_f1": 0.9263157894736843, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8490566037735849, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9057750759878419, + "eval_LOCATION_precision": 0.9072164948453608, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9289940828402367, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9141104294478528, - "eval_ORGANIZATION_recall": 0.8975903614457831, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9127906976744186, + "eval_ORGANIZATION_recall": 0.9457831325301205, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04723087698221207, - "eval_overall_accuracy": 0.9857338820301783, - "eval_overall_f1": 0.9297163995067818, - "eval_overall_precision": 0.9195121951219513, - "eval_overall_recall": 0.940149625935162, - "eval_runtime": 0.6325, - "eval_samples_per_second": 268.763, - "eval_steps_per_second": 4.743, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.04936488717794418, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9458128078817734, + "eval_overall_precision": 0.9343065693430657, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.2926, + "eval_samples_per_second": 581.005, + "eval_steps_per_second": 10.253, "step": 1920 }, { "epoch": 21.0, - "grad_norm": 0.9918632507324219, + "grad_norm": 0.32161277532577515, "learning_rate": 3.9500000000000005e-05, - "loss": 0.0387, + "loss": 0.0394, "step": 2016 }, { "epoch": 21.0, - "eval_LOCATION_f1": 0.9191919191919192, + "eval_LOCATION_f1": 0.9319371727748691, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8666666666666667, - "eval_LOCATION_recall": 0.978494623655914, - "eval_ORGANIZATION_f1": 0.8888888888888888, + "eval_LOCATION_precision": 0.9081632653061225, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9281437125748503, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9113924050632911, - "eval_ORGANIZATION_recall": 0.8674698795180723, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9226190476190477, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.046005215495824814, - "eval_overall_accuracy": 0.9873799725651577, - "eval_overall_f1": 0.929192546583851, - "eval_overall_precision": 0.9257425742574258, - "eval_overall_recall": 0.9326683291770573, - "eval_runtime": 0.6246, - "eval_samples_per_second": 272.187, - "eval_steps_per_second": 4.803, - "step": 2016 - }, - { + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.05034565553069115, + "eval_overall_accuracy": 0.9887517146776406, + "eval_overall_f1": 0.9443757725587144, + "eval_overall_precision": 0.9362745098039216, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.2909, + "eval_samples_per_second": 584.394, + "eval_steps_per_second": 10.313, + "step": 2016 + }, + { "epoch": 22.0, - "grad_norm": 0.2701110541820526, + "grad_norm": 0.15593692660331726, "learning_rate": 3.9000000000000006e-05, - "loss": 0.0343, + "loss": 0.0364, "step": 2112 }, { "epoch": 22.0, - "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_f1": 0.9214659685863874, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9244712990936556, + "eval_LOCATION_precision": 0.8979591836734694, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.920353982300885, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9272727272727272, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9017341040462428, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_precision": 0.9788732394366197, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.04117260500788689, - "eval_overall_accuracy": 0.9890260631001372, - "eval_overall_f1": 0.9502487562189055, - "eval_overall_precision": 0.9478908188585607, - "eval_overall_recall": 0.9526184538653366, - "eval_runtime": 0.6316, - "eval_samples_per_second": 269.142, - "eval_steps_per_second": 4.75, + "eval_loss": 0.04979310929775238, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.941031941031941, + "eval_overall_precision": 0.927360774818402, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.2951, + "eval_samples_per_second": 576.049, + "eval_steps_per_second": 10.166, "step": 2112 }, { "epoch": 23.0, - "grad_norm": 0.3225027322769165, + "grad_norm": 1.1374483108520508, "learning_rate": 3.85e-05, - "loss": 0.0331, + "loss": 0.035, "step": 2208 }, { "epoch": 23.0, - "eval_LOCATION_f1": 0.9157894736842105, + "eval_LOCATION_f1": 0.9109947643979057, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8969072164948454, + "eval_LOCATION_precision": 0.8877551020408163, "eval_LOCATION_recall": 0.9354838709677419, - "eval_ORGANIZATION_f1": 0.9174311926605505, + "eval_ORGANIZATION_f1": 0.924924924924925, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9316770186335404, - "eval_ORGANIZATION_recall": 0.9036144578313253, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9221556886227545, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04424041137099266, - "eval_overall_accuracy": 0.9879286694101509, - "eval_overall_f1": 0.9375, - "eval_overall_precision": 0.9398496240601504, - "eval_overall_recall": 0.9351620947630923, - "eval_runtime": 0.6279, - "eval_samples_per_second": 270.749, - "eval_steps_per_second": 4.778, + "eval_loss": 0.04968388378620148, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9381188118811881, + "eval_overall_precision": 0.9312039312039312, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2946, + "eval_samples_per_second": 577.009, + "eval_steps_per_second": 10.183, "step": 2208 }, { "epoch": 24.0, - "grad_norm": 0.8494559526443481, + "grad_norm": 0.7729804515838623, "learning_rate": 3.8e-05, - "loss": 0.0329, + "loss": 0.0355, "step": 2304 }, { "epoch": 24.0, - "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_f1": 0.9361702127659575, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9309309309309309, + "eval_LOCATION_precision": 0.9263157894736842, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.923076923076923, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9281437125748503, - "eval_ORGANIZATION_recall": 0.9337349397590361, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9069767441860465, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04214971885085106, - "eval_overall_accuracy": 0.9882030178326474, - "eval_overall_f1": 0.9503722084367245, - "eval_overall_precision": 0.945679012345679, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.04491155222058296, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9468479604449938, + "eval_overall_precision": 0.9387254901960784, "eval_overall_recall": 0.9551122194513716, - "eval_runtime": 0.6347, - "eval_samples_per_second": 267.825, - "eval_steps_per_second": 4.726, + "eval_runtime": 0.2916, + "eval_samples_per_second": 582.943, + "eval_steps_per_second": 10.287, "step": 2304 }, { "epoch": 25.0, - "grad_norm": 0.9196493625640869, + "grad_norm": 0.5651174783706665, "learning_rate": 3.7500000000000003e-05, - "loss": 0.0336, + "loss": 0.0316, "step": 2400 }, { "epoch": 25.0, - "eval_LOCATION_f1": 0.9374999999999999, + "eval_LOCATION_f1": 0.9128205128205128, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9090909090909091, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9325153374233127, + "eval_LOCATION_precision": 0.8725490196078431, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9085365853658537, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.95, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9788732394366197, + "eval_ORGANIZATION_precision": 0.9197530864197531, + "eval_ORGANIZATION_recall": 0.8975903614457831, + "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9788732394366197, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.047626204788684845, - "eval_overall_accuracy": 0.9890260631001372, - "eval_overall_f1": 0.9501246882793017, - "eval_overall_precision": 0.9501246882793017, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6357, - "eval_samples_per_second": 267.402, - "eval_steps_per_second": 4.719, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.046948954463005066, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9318463444857497, + "eval_overall_precision": 0.9261083743842364, + "eval_overall_recall": 0.9376558603491272, + "eval_runtime": 0.2914, + "eval_samples_per_second": 583.339, + "eval_steps_per_second": 10.294, "step": 2400 }, { "epoch": 26.0, - "grad_norm": 0.2330346554517746, + "grad_norm": 0.0639706701040268, "learning_rate": 3.7e-05, - "loss": 0.0313, + "loss": 0.0309, "step": 2496 }, { "epoch": 26.0, - "eval_LOCATION_f1": 0.946808510638298, + "eval_LOCATION_f1": 0.9354838709677419, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9368421052631579, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9235294117647058, + "eval_LOCATION_precision": 0.9354838709677419, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9337349397590361, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9022988505747126, - "eval_ORGANIZATION_recall": 0.9457831325301205, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9337349397590361, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.968421052631579, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.04078715667128563, - "eval_overall_accuracy": 0.9893004115226337, - "eval_overall_f1": 0.9494451294697903, - "eval_overall_precision": 0.9390243902439024, - "eval_overall_recall": 0.9600997506234414, - "eval_runtime": 0.6352, - "eval_samples_per_second": 267.628, - "eval_steps_per_second": 4.723, + "eval_PERSON_precision": 0.965034965034965, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.04571140930056572, + "eval_overall_accuracy": 0.9895747599451303, + "eval_overall_f1": 0.9464508094645082, + "eval_overall_precision": 0.945273631840796, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.2914, + "eval_samples_per_second": 583.325, + "eval_steps_per_second": 10.294, "step": 2496 }, { "epoch": 27.0, - "grad_norm": 0.33721673488616943, + "grad_norm": 1.063063383102417, "learning_rate": 3.65e-05, - "loss": 0.0287, + "loss": 0.0293, "step": 2592 }, { "epoch": 27.0, - "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9272727272727274, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9285714285714286, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9329268292682927, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9176470588235294, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9754385964912281, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04490111395716667, - "eval_overall_accuracy": 0.9898491083676269, - "eval_overall_f1": 0.9489414694894147, - "eval_overall_precision": 0.9477611940298507, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6359, - "eval_samples_per_second": 267.352, - "eval_steps_per_second": 4.718, + "eval_PERSON_precision": 0.972027972027972, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.04741787165403366, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.942189421894219, + "eval_overall_precision": 0.9296116504854369, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.2945, + "eval_samples_per_second": 577.282, + "eval_steps_per_second": 10.187, "step": 2592 }, { "epoch": 28.0, - "grad_norm": 0.46604397892951965, + "grad_norm": 0.6515504121780396, "learning_rate": 3.6e-05, - "loss": 0.03, + "loss": 0.0299, "step": 2688 }, { "epoch": 28.0, - "eval_LOCATION_f1": 0.9263157894736843, + "eval_LOCATION_f1": 0.9473684210526316, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9072164948453608, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9262536873156342, + "eval_LOCATION_precision": 0.9278350515463918, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.920353982300885, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9075144508670521, - "eval_ORGANIZATION_recall": 0.9457831325301205, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9017341040462428, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.042864300310611725, - "eval_overall_accuracy": 0.9887517146776406, - "eval_overall_f1": 0.9433497536945813, - "eval_overall_precision": 0.9318734793187348, - "eval_overall_recall": 0.9551122194513716, - "eval_runtime": 0.6323, - "eval_samples_per_second": 268.849, - "eval_steps_per_second": 4.744, + "eval_loss": 0.050067901611328125, + "eval_overall_accuracy": 0.9882030178326474, + "eval_overall_f1": 0.944649446494465, + "eval_overall_precision": 0.9320388349514563, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.2919, + "eval_samples_per_second": 582.353, + "eval_steps_per_second": 10.277, "step": 2688 }, { "epoch": 29.0, - "grad_norm": 0.5060299038887024, + "grad_norm": 0.7020676136016846, "learning_rate": 3.55e-05, - "loss": 0.031, + "loss": 0.0264, "step": 2784 }, { "epoch": 29.0, - "eval_LOCATION_f1": 0.946236559139785, + "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.946236559139785, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9321533923303834, + "eval_LOCATION_precision": 0.8811881188118812, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9281437125748503, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9132947976878613, - "eval_ORGANIZATION_recall": 0.9518072289156626, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9226190476190477, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04162987321615219, - "eval_overall_accuracy": 0.9898491083676269, - "eval_overall_f1": 0.9504950495049505, - "eval_overall_precision": 0.9434889434889435, - "eval_overall_recall": 0.9576059850374065, - "eval_runtime": 0.7055, - "eval_samples_per_second": 240.973, - "eval_steps_per_second": 4.252, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.0471595823764801, + "eval_overall_accuracy": 0.9882030178326474, + "eval_overall_f1": 0.9433497536945813, + "eval_overall_precision": 0.9318734793187348, + "eval_overall_recall": 0.9551122194513716, + "eval_runtime": 0.2947, + "eval_samples_per_second": 576.863, + "eval_steps_per_second": 10.18, "step": 2784 }, { "epoch": 30.0, - "grad_norm": 1.1173897981643677, + "grad_norm": 0.7510899901390076, "learning_rate": 3.5e-05, - "loss": 0.027, + "loss": 0.0257, "step": 2880 }, { "epoch": 30.0, - "eval_LOCATION_f1": 0.9513513513513514, + "eval_LOCATION_f1": 0.925531914893617, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9565217391304348, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9321533923303834, + "eval_LOCATION_precision": 0.9157894736842105, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9212827988338192, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9132947976878613, + "eval_ORGANIZATION_precision": 0.8926553672316384, "eval_ORGANIZATION_recall": 0.9518072289156626, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_precision": 0.9788732394366197, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.040551040321588516, - "eval_overall_accuracy": 0.9909465020576131, - "eval_overall_f1": 0.9541511771995044, - "eval_overall_precision": 0.9482758620689655, - "eval_overall_recall": 0.9600997506234414, - "eval_runtime": 0.6794, - "eval_samples_per_second": 250.21, - "eval_steps_per_second": 4.415, + "eval_loss": 0.05023224279284477, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9423312883435583, + "eval_overall_precision": 0.927536231884058, + "eval_overall_recall": 0.9576059850374065, + "eval_runtime": 0.2921, + "eval_samples_per_second": 581.974, + "eval_steps_per_second": 10.27, "step": 2880 }, { "epoch": 31.0, - "grad_norm": 0.17513635754585266, + "grad_norm": 0.40647125244140625, "learning_rate": 3.45e-05, - "loss": 0.0257, + "loss": 0.0262, "step": 2976 }, { "epoch": 31.0, - "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_f1": 0.911917098445596, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.911854103343465, + "eval_LOCATION_precision": 0.88, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9189189189189191, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9202453987730062, - "eval_ORGANIZATION_recall": 0.9036144578313253, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9161676646706587, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9754385964912281, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_precision": 0.972027972027972, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.043482180684804916, - "eval_overall_accuracy": 0.9901234567901235, - "eval_overall_f1": 0.9451371571072319, - "eval_overall_precision": 0.9451371571072319, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6311, - "eval_samples_per_second": 269.372, - "eval_steps_per_second": 4.754, + "eval_loss": 0.050489392131567, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9371146732429101, + "eval_overall_precision": 0.926829268292683, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.2918, + "eval_samples_per_second": 582.684, + "eval_steps_per_second": 10.283, "step": 2976 }, { "epoch": 32.0, - "grad_norm": 0.40757057070732117, + "grad_norm": 0.44217225909233093, "learning_rate": 3.4000000000000007e-05, - "loss": 0.0277, + "loss": 0.0263, "step": 3072 }, { "epoch": 32.0, - "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_f1": 0.9263157894736843, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9183673469387755, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.8996960486322187, + "eval_LOCATION_precision": 0.9072164948453608, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9235474006116209, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9079754601226994, - "eval_ORGANIZATION_recall": 0.891566265060241, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.937888198757764, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04837553948163986, - "eval_overall_accuracy": 0.9887517146776406, - "eval_overall_f1": 0.9364881693648817, - "eval_overall_precision": 0.9353233830845771, - "eval_overall_recall": 0.9376558603491272, - "eval_runtime": 0.6374, - "eval_samples_per_second": 266.721, - "eval_steps_per_second": 4.707, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.05121176689863205, + "eval_overall_accuracy": 0.988477366255144, + "eval_overall_f1": 0.9438202247191011, + "eval_overall_precision": 0.945, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2907, + "eval_samples_per_second": 584.728, + "eval_steps_per_second": 10.319, "step": 3072 }, { "epoch": 33.0, - "grad_norm": 0.5286445617675781, + "grad_norm": 0.46512940526008606, "learning_rate": 3.35e-05, - "loss": 0.024, + "loss": 0.0246, "step": 3168 }, { "epoch": 33.0, - "eval_LOCATION_f1": 0.9270833333333334, + "eval_LOCATION_f1": 0.9361702127659575, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.898989898989899, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.896969696969697, + "eval_LOCATION_precision": 0.9263157894736842, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.923076923076923, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9024390243902439, - "eval_ORGANIZATION_recall": 0.891566265060241, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9069767441860465, + "eval_ORGANIZATION_recall": 0.9397590361445783, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.04499199241399765, - "eval_overall_accuracy": 0.9887517146776406, - "eval_overall_f1": 0.9341614906832298, - "eval_overall_precision": 0.9306930693069307, - "eval_overall_recall": 0.9376558603491272, - "eval_runtime": 0.6314, - "eval_samples_per_second": 269.238, - "eval_steps_per_second": 4.751, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.04934880882501602, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9443757725587144, + "eval_overall_precision": 0.9362745098039216, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.2912, + "eval_samples_per_second": 583.778, + "eval_steps_per_second": 10.302, "step": 3168 }, { "epoch": 34.0, - "grad_norm": 0.7948114275932312, + "grad_norm": 0.3086019456386566, "learning_rate": 3.3e-05, - "loss": 0.0227, + "loss": 0.0242, "step": 3264 }, { "epoch": 34.0, - "eval_LOCATION_f1": 0.9473684210526316, + "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9278350515463918, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9156626506024096, + "eval_LOCATION_precision": 0.89, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9161676646706587, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9156626506024096, - "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_ORGANIZATION_precision": 0.9107142857142857, + "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05086701363325119, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.946583850931677, - "eval_overall_precision": 0.943069306930693, + "eval_loss": 0.04890041425824165, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9407407407407408, + "eval_overall_precision": 0.9315403422982885, "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6371, - "eval_samples_per_second": 266.844, - "eval_steps_per_second": 4.709, + "eval_runtime": 0.2886, + "eval_samples_per_second": 589.007, + "eval_steps_per_second": 10.394, "step": 3264 }, { "epoch": 35.0, - "grad_norm": 0.3472101390361786, + "grad_norm": 0.2681832015514374, "learning_rate": 3.2500000000000004e-05, - "loss": 0.0226, + "loss": 0.0217, "step": 3360 }, { "epoch": 35.0, - "eval_LOCATION_f1": 0.9374999999999999, + "eval_LOCATION_f1": 0.9304812834224598, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9090909090909091, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9085545722713864, + "eval_LOCATION_precision": 0.925531914893617, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.911764705882353, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.8901734104046243, - "eval_ORGANIZATION_recall": 0.927710843373494, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.8908045977011494, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9647887323943662, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.052043646574020386, + "eval_PERSON_precision": 0.9647887323943662, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.05158211290836334, "eval_overall_accuracy": 0.9871056241426612, - "eval_overall_f1": 0.941031941031941, - "eval_overall_precision": 0.927360774818402, - "eval_overall_recall": 0.9551122194513716, - "eval_runtime": 0.6348, - "eval_samples_per_second": 267.78, - "eval_steps_per_second": 4.726, + "eval_overall_f1": 0.9346485819975339, + "eval_overall_precision": 0.9243902439024391, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2907, + "eval_samples_per_second": 584.806, + "eval_steps_per_second": 10.32, "step": 3360 }, { "epoch": 36.0, - "grad_norm": 0.432446151971817, + "grad_norm": 0.3382647633552551, "learning_rate": 3.2000000000000005e-05, - "loss": 0.0239, + "loss": 0.0221, "step": 3456 }, { "epoch": 36.0, - "eval_LOCATION_f1": 0.9108910891089109, + "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8440366972477065, - "eval_LOCATION_recall": 0.989247311827957, - "eval_ORGANIZATION_f1": 0.8909657320872274, + "eval_LOCATION_precision": 0.89, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9240121580547112, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9225806451612903, - "eval_ORGANIZATION_recall": 0.8614457831325302, + "eval_ORGANIZATION_precision": 0.9325153374233128, + "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.971830985915493, "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.06132661551237106, - "eval_overall_accuracy": 0.9862825788751715, - "eval_overall_f1": 0.9244114002478315, - "eval_overall_precision": 0.9187192118226601, - "eval_overall_recall": 0.9301745635910225, - "eval_runtime": 0.627, - "eval_samples_per_second": 271.131, - "eval_steps_per_second": 4.785, + "eval_loss": 0.056546807289123535, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9404466501240694, + "eval_overall_precision": 0.9358024691358025, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2901, + "eval_samples_per_second": 586.08, + "eval_steps_per_second": 10.343, "step": 3456 }, { "epoch": 37.0, - "grad_norm": 1.2789398431777954, + "grad_norm": 1.3838005065917969, "learning_rate": 3.15e-05, - "loss": 0.0222, + "loss": 0.0219, "step": 3552 }, { "epoch": 37.0, - "eval_LOCATION_f1": 0.911917098445596, + "eval_LOCATION_f1": 0.9319371727748691, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.88, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.8982035928143712, + "eval_LOCATION_precision": 0.9081632653061225, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.8928571428571429, - "eval_ORGANIZATION_recall": 0.9036144578313253, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9156626506024096, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.048861872404813766, - "eval_overall_accuracy": 0.9876543209876543, - "eval_overall_f1": 0.9308641975308642, - "eval_overall_precision": 0.921760391198044, - "eval_overall_recall": 0.940149625935162, - "eval_runtime": 0.6344, - "eval_samples_per_second": 267.981, - "eval_steps_per_second": 4.729, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.04845535010099411, + "eval_overall_accuracy": 0.9893004115226337, + "eval_overall_f1": 0.9404466501240694, + "eval_overall_precision": 0.9358024691358025, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2893, + "eval_samples_per_second": 587.579, + "eval_steps_per_second": 10.369, "step": 3552 }, { "epoch": 38.0, - "grad_norm": 0.11196000874042511, + "grad_norm": 0.24594035744667053, "learning_rate": 3.1e-05, - "loss": 0.0221, + "loss": 0.0217, "step": 3648 }, { "epoch": 38.0, - "eval_LOCATION_f1": 0.9292929292929293, + "eval_LOCATION_f1": 0.9424083769633509, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8761904761904762, - "eval_LOCATION_recall": 0.989247311827957, - "eval_ORGANIZATION_f1": 0.9057750759878419, + "eval_LOCATION_precision": 0.9183673469387755, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.918429003021148, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9141104294478528, - "eval_ORGANIZATION_recall": 0.8975903614457831, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9212121212121213, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.052511900663375854, - "eval_overall_accuracy": 0.9868312757201646, - "eval_overall_f1": 0.9382716049382714, - "eval_overall_precision": 0.9290953545232273, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6225, - "eval_samples_per_second": 273.092, - "eval_steps_per_second": 4.819, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.052869848906993866, + "eval_overall_accuracy": 0.9882030178326474, + "eval_overall_f1": 0.9416149068322982, + "eval_overall_precision": 0.9381188118811881, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2898, + "eval_samples_per_second": 586.556, + "eval_steps_per_second": 10.351, "step": 3648 }, { "epoch": 39.0, - "grad_norm": 0.2599373757839203, + "grad_norm": 0.40099620819091797, "learning_rate": 3.05e-05, - "loss": 0.0225, + "loss": 0.0236, "step": 3744 }, { "epoch": 39.0, - "eval_LOCATION_f1": 0.923076923076923, + "eval_LOCATION_f1": 0.9304812834224598, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8823529411764706, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.8975903614457831, + "eval_LOCATION_precision": 0.925531914893617, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9107142857142857, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.8975903614457831, - "eval_ORGANIZATION_recall": 0.8975903614457831, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.050379928201436996, - "eval_overall_accuracy": 0.9860082304526749, - "eval_overall_f1": 0.9333333333333333, - "eval_overall_precision": 0.9242053789731052, - "eval_overall_recall": 0.942643391521197, - "eval_runtime": 0.6319, - "eval_samples_per_second": 269.039, - "eval_steps_per_second": 4.748, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.05043813958764076, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9354838709677419, + "eval_overall_precision": 0.9308641975308642, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.288, + "eval_samples_per_second": 590.367, + "eval_steps_per_second": 10.418, "step": 3744 }, { "epoch": 40.0, - "grad_norm": 0.2717348039150238, + "grad_norm": 0.19064545631408691, "learning_rate": 3e-05, - "loss": 0.0194, + "loss": 0.0205, "step": 3840 }, { "epoch": 40.0, - "eval_LOCATION_f1": 0.9270833333333334, + "eval_LOCATION_f1": 0.90625, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.898989898989899, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.918429003021148, + "eval_LOCATION_precision": 0.8787878787878788, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9134328358208955, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9212121212121213, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9053254437869822, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9614035087719298, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04696699604392052, - "eval_overall_accuracy": 0.9890260631001372, - "eval_overall_f1": 0.9404466501240694, - "eval_overall_precision": 0.9358024691358025, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6215, - "eval_samples_per_second": 273.521, - "eval_steps_per_second": 4.827, + "eval_PERSON_precision": 0.958041958041958, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.053412336856126785, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9285714285714285, + "eval_overall_precision": 0.9172749391727494, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.2891, + "eval_samples_per_second": 588.081, + "eval_steps_per_second": 10.378, "step": 3840 }, { "epoch": 41.0, - "grad_norm": 0.43004825711250305, + "grad_norm": 0.02482328750193119, "learning_rate": 2.95e-05, - "loss": 0.0206, + "loss": 0.0193, "step": 3936 }, { "epoch": 41.0, - "eval_LOCATION_f1": 0.9157894736842105, + "eval_LOCATION_f1": 0.9072164948453608, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8969072164948454, - "eval_LOCATION_recall": 0.9354838709677419, - "eval_ORGANIZATION_f1": 0.9, + "eval_LOCATION_precision": 0.8712871287128713, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.890207715133531, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.8793103448275862, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.8771929824561403, + "eval_ORGANIZATION_recall": 0.9036144578313253, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.048884760588407516, - "eval_overall_accuracy": 0.9865569272976681, - "eval_overall_f1": 0.932349323493235, - "eval_overall_precision": 0.9199029126213593, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6384, - "eval_samples_per_second": 266.27, - "eval_steps_per_second": 4.699, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.0644853487610817, + "eval_overall_accuracy": 0.9860082304526749, + "eval_overall_f1": 0.9249692496924969, + "eval_overall_precision": 0.912621359223301, + "eval_overall_recall": 0.9376558603491272, + "eval_runtime": 0.2901, + "eval_samples_per_second": 586.09, + "eval_steps_per_second": 10.343, "step": 3936 }, { "epoch": 42.0, - "grad_norm": 0.7156493663787842, + "grad_norm": 0.5300124287605286, "learning_rate": 2.9e-05, - "loss": 0.0208, + "loss": 0.0198, "step": 4032 }, { "epoch": 42.0, - "eval_LOCATION_f1": 0.9326424870466321, + "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9101796407185628, + "eval_LOCATION_precision": 0.89, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.8948948948948949, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9047619047619048, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.8922155688622755, + "eval_ORGANIZATION_recall": 0.8975903614457831, + "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.05095763877034187, - "eval_overall_accuracy": 0.9879286694101509, - "eval_overall_f1": 0.9382716049382714, - "eval_overall_precision": 0.9290953545232273, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6338, - "eval_samples_per_second": 268.241, - "eval_steps_per_second": 4.734, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.0561443567276001, + "eval_overall_accuracy": 0.9860082304526749, + "eval_overall_f1": 0.9320148331273177, + "eval_overall_precision": 0.9240196078431373, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.2902, + "eval_samples_per_second": 585.717, + "eval_steps_per_second": 10.336, "step": 4032 }, { "epoch": 43.0, - "grad_norm": 0.23692376911640167, + "grad_norm": 0.8966781497001648, "learning_rate": 2.8499999999999998e-05, - "loss": 0.0178, + "loss": 0.018, "step": 4128 }, { "epoch": 43.0, - "eval_LOCATION_f1": 0.9278350515463919, + "eval_LOCATION_f1": 0.9368421052631579, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8910891089108911, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9382716049382716, + "eval_LOCATION_precision": 0.9175257731958762, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9123867069486404, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9620253164556962, - "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.9096385542168675, "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9787234042553191, "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04964771866798401, - "eval_overall_accuracy": 0.9895747599451303, - "eval_overall_f1": 0.9488139825218477, - "eval_overall_precision": 0.95, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6373, - "eval_samples_per_second": 266.748, - "eval_steps_per_second": 4.707, + "eval_loss": 0.05398855730891228, + "eval_overall_accuracy": 0.9882030178326474, + "eval_overall_f1": 0.9402985074626866, + "eval_overall_precision": 0.9379652605459057, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2865, + "eval_samples_per_second": 593.421, + "eval_steps_per_second": 10.472, "step": 4128 }, { "epoch": 44.0, - "grad_norm": 0.20943285524845123, + "grad_norm": 0.3985178768634796, "learning_rate": 2.8000000000000003e-05, - "loss": 0.0184, + "loss": 0.0188, "step": 4224 }, { "epoch": 44.0, - "eval_LOCATION_f1": 0.9381443298969072, + "eval_LOCATION_f1": 0.9109947643979057, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.900990099009901, - "eval_LOCATION_recall": 0.978494623655914, - "eval_ORGANIZATION_f1": 0.9101796407185628, + "eval_LOCATION_precision": 0.8877551020408163, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.8975903614457831, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9047619047619048, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.8975903614457831, + "eval_ORGANIZATION_recall": 0.8975903614457831, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.05329016596078873, - "eval_overall_accuracy": 0.9882030178326474, - "eval_overall_f1": 0.9395807644882862, - "eval_overall_precision": 0.9292682926829269, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6294, - "eval_samples_per_second": 270.096, - "eval_steps_per_second": 4.766, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.05714266002178192, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9293680297397771, + "eval_overall_precision": 0.9236453201970444, + "eval_overall_recall": 0.9351620947630923, + "eval_runtime": 0.2944, + "eval_samples_per_second": 577.415, + "eval_steps_per_second": 10.19, "step": 4224 }, { "epoch": 45.0, - "grad_norm": 0.8311372399330139, + "grad_norm": 0.04688119515776634, "learning_rate": 2.7500000000000004e-05, - "loss": 0.0172, + "loss": 0.0177, "step": 4320 }, { "epoch": 45.0, - "eval_LOCATION_f1": 0.9479166666666667, + "eval_LOCATION_f1": 0.9263157894736843, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9191919191919192, - "eval_LOCATION_recall": 0.978494623655914, - "eval_ORGANIZATION_f1": 0.9329268292682927, + "eval_LOCATION_precision": 0.9072164948453608, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9030303030303031, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9444444444444444, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9085365853658537, + "eval_ORGANIZATION_recall": 0.8975903614457831, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.05253069847822189, - "eval_overall_accuracy": 0.9882030178326474, - "eval_overall_f1": 0.9514321295143213, - "eval_overall_precision": 0.9502487562189055, - "eval_overall_recall": 0.9526184538653366, - "eval_runtime": 0.6282, - "eval_samples_per_second": 270.603, - "eval_steps_per_second": 4.775, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.05286408215761185, + "eval_overall_accuracy": 0.9862825788751715, + "eval_overall_f1": 0.9353233830845771, + "eval_overall_precision": 0.9330024813895782, + "eval_overall_recall": 0.9376558603491272, + "eval_runtime": 0.2902, + "eval_samples_per_second": 585.832, + "eval_steps_per_second": 10.338, "step": 4320 }, { "epoch": 46.0, - "grad_norm": 0.6656791567802429, + "grad_norm": 0.19999141991138458, "learning_rate": 2.7000000000000002e-05, - "loss": 0.0159, + "loss": 0.0178, "step": 4416 }, { "epoch": 46.0, - "eval_LOCATION_f1": 0.9533678756476685, + "eval_LOCATION_f1": 0.914572864321608, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.92, - "eval_LOCATION_recall": 0.989247311827957, - "eval_ORGANIZATION_f1": 0.9151515151515153, + "eval_LOCATION_precision": 0.8584905660377359, + "eval_LOCATION_recall": 0.978494623655914, + "eval_ORGANIZATION_f1": 0.8895705521472392, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9207317073170732, - "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_ORGANIZATION_precision": 0.90625, + "eval_ORGANIZATION_recall": 0.8734939759036144, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.052427615970373154, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.9478908188585606, - "eval_overall_precision": 0.9432098765432099, - "eval_overall_recall": 0.9526184538653366, - "eval_runtime": 0.6393, - "eval_samples_per_second": 265.902, - "eval_steps_per_second": 4.692, + "eval_loss": 0.053873803466558456, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9282178217821783, + "eval_overall_precision": 0.9213759213759214, + "eval_overall_recall": 0.9351620947630923, + "eval_runtime": 0.2867, + "eval_samples_per_second": 593.025, + "eval_steps_per_second": 10.465, "step": 4416 }, { "epoch": 47.0, - "grad_norm": 0.03922390192747116, + "grad_norm": 0.04654427990317345, "learning_rate": 2.6500000000000004e-05, - "loss": 0.0174, + "loss": 0.0168, "step": 4512 }, { "epoch": 47.0, - "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_f1": 0.9319371727748691, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9270833333333334, + "eval_LOCATION_precision": 0.9081632653061225, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9014925373134328, + "eval_ORGANIZATION_f1": 0.9166666666666667, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.893491124260355, - "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9058823529411765, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.051573678851127625, - "eval_overall_accuracy": 0.9879286694101509, - "eval_overall_f1": 0.9368029739776952, - "eval_overall_precision": 0.9310344827586207, - "eval_overall_recall": 0.942643391521197, - "eval_runtime": 0.6335, - "eval_samples_per_second": 268.33, - "eval_steps_per_second": 4.735, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.05449910834431648, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9432098765432098, + "eval_overall_precision": 0.9339853300733496, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.288, + "eval_samples_per_second": 590.188, + "eval_steps_per_second": 10.415, "step": 4512 }, { "epoch": 48.0, - "grad_norm": 0.08619959652423859, + "grad_norm": 0.35884276032447815, "learning_rate": 2.6000000000000002e-05, - "loss": 0.0176, + "loss": 0.016, "step": 4608 }, { "epoch": 48.0, - "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9270833333333334, + "eval_LOCATION_precision": 0.898989898989899, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.924924924924925, + "eval_ORGANIZATION_f1": 0.9096385542168675, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9221556886227545, - "eval_ORGANIZATION_recall": 0.927710843373494, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9096385542168675, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.04628601670265198, - "eval_overall_accuracy": 0.99039780521262, - "eval_overall_f1": 0.946583850931677, - "eval_overall_precision": 0.943069306930693, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6378, - "eval_samples_per_second": 266.532, - "eval_steps_per_second": 4.704, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.05373828485608101, + "eval_overall_accuracy": 0.988477366255144, + "eval_overall_f1": 0.9392812887236679, + "eval_overall_precision": 0.9334975369458128, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2902, + "eval_samples_per_second": 585.775, + "eval_steps_per_second": 10.337, "step": 4608 }, { "epoch": 49.0, - "grad_norm": 0.017897285521030426, + "grad_norm": 0.010812529362738132, "learning_rate": 2.5500000000000003e-05, - "loss": 0.0156, + "loss": 0.0167, "step": 4704 }, { @@ -1509,587 +1509,587 @@ "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9081632653061225, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9207317073170733, + "eval_ORGANIZATION_f1": 0.927710843373494, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9320987654320988, - "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.927710843373494, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.05306881293654442, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.05074891448020935, "eval_overall_accuracy": 0.9893004115226337, - "eval_overall_f1": 0.942643391521197, - "eval_overall_precision": 0.942643391521197, - "eval_overall_recall": 0.942643391521197, - "eval_runtime": 0.6253, - "eval_samples_per_second": 271.873, - "eval_steps_per_second": 4.798, + "eval_overall_f1": 0.9478908188585606, + "eval_overall_precision": 0.9432098765432099, + "eval_overall_recall": 0.9526184538653366, + "eval_runtime": 0.2898, + "eval_samples_per_second": 586.57, + "eval_steps_per_second": 10.351, "step": 4704 }, { "epoch": 50.0, - "grad_norm": 0.15169987082481384, + "grad_norm": 0.36007794737815857, "learning_rate": 2.5e-05, - "loss": 0.017, + "loss": 0.0157, "step": 4800 }, { "epoch": 50.0, - "eval_LOCATION_f1": 0.9326424870466321, + "eval_LOCATION_f1": 0.9109947643979057, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.918429003021148, + "eval_LOCATION_precision": 0.8877551020408163, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9080118694362017, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9212121212121213, - "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_ORGANIZATION_precision": 0.8947368421052632, + "eval_ORGANIZATION_recall": 0.9216867469879518, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05259489268064499, - "eval_overall_accuracy": 0.9879286694101509, - "eval_overall_f1": 0.9442379182156134, - "eval_overall_precision": 0.9384236453201971, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6281, - "eval_samples_per_second": 270.654, - "eval_steps_per_second": 4.776, + "eval_loss": 0.05863998457789421, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9346485819975339, + "eval_overall_precision": 0.9243902439024391, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2884, + "eval_samples_per_second": 589.476, + "eval_steps_per_second": 10.403, "step": 4800 }, { "epoch": 51.0, - "grad_norm": 0.4998176097869873, + "grad_norm": 0.11987640708684921, "learning_rate": 2.45e-05, - "loss": 0.0156, + "loss": 0.015, "step": 4896 }, { "epoch": 51.0, - "eval_LOCATION_f1": 0.923076923076923, + "eval_LOCATION_f1": 0.90625, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8823529411764706, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.920245398773006, + "eval_LOCATION_precision": 0.8787878787878788, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9189189189189191, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9375, - "eval_ORGANIZATION_recall": 0.9036144578313253, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9161676646706587, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05702535808086395, - "eval_overall_accuracy": 0.9871056241426612, - "eval_overall_f1": 0.9427860696517413, - "eval_overall_precision": 0.9404466501240695, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6253, - "eval_samples_per_second": 271.881, - "eval_steps_per_second": 4.798, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.052376989275217056, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9344870210135972, + "eval_overall_precision": 0.9264705882352942, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2909, + "eval_samples_per_second": 584.325, + "eval_steps_per_second": 10.312, "step": 4896 }, { "epoch": 52.0, - "grad_norm": 0.4189845025539398, + "grad_norm": 0.8939554691314697, "learning_rate": 2.4e-05, - "loss": 0.0152, + "loss": 0.0155, "step": 4992 }, { "epoch": 52.0, - "eval_LOCATION_f1": 0.9263157894736843, + "eval_LOCATION_f1": 0.9025641025641027, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9072164948453608, + "eval_LOCATION_precision": 0.8627450980392157, "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9272727272727274, + "eval_ORGANIZATION_f1": 0.9041916167664671, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9329268292682927, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.8988095238095238, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.05183970928192139, - "eval_overall_accuracy": 0.9898491083676269, - "eval_overall_f1": 0.9439601494396015, - "eval_overall_precision": 0.9427860696517413, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6336, - "eval_samples_per_second": 268.303, - "eval_steps_per_second": 4.735, + "eval_loss": 0.06287980824708939, + "eval_overall_accuracy": 0.9862825788751715, + "eval_overall_f1": 0.9297163995067818, + "eval_overall_precision": 0.9195121951219513, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.2901, + "eval_samples_per_second": 585.943, + "eval_steps_per_second": 10.34, "step": 4992 }, { "epoch": 53.0, - "grad_norm": 1.3798094987869263, + "grad_norm": 0.20487843453884125, "learning_rate": 2.35e-05, - "loss": 0.017, + "loss": 0.0156, "step": 5088 }, { "epoch": 53.0, - "eval_LOCATION_f1": 0.9263157894736843, + "eval_LOCATION_f1": 0.9015544041450777, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9072164948453608, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9309309309309309, + "eval_LOCATION_precision": 0.87, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9216867469879518, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9281437125748503, - "eval_ORGANIZATION_recall": 0.9337349397590361, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9216867469879518, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.048745788633823395, - "eval_overall_accuracy": 0.9895747599451303, - "eval_overall_f1": 0.9478908188585606, - "eval_overall_precision": 0.9432098765432099, - "eval_overall_recall": 0.9526184538653366, - "eval_runtime": 0.632, - "eval_samples_per_second": 268.983, - "eval_steps_per_second": 4.747, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06014350429177284, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9356435643564357, + "eval_overall_precision": 0.9287469287469288, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2903, + "eval_samples_per_second": 585.535, + "eval_steps_per_second": 10.333, "step": 5088 }, { "epoch": 54.0, - "grad_norm": 0.7631996870040894, + "grad_norm": 0.055074214935302734, "learning_rate": 2.3000000000000003e-05, - "loss": 0.0143, + "loss": 0.0141, "step": 5184 }, { "epoch": 54.0, - "eval_LOCATION_f1": 0.911917098445596, + "eval_LOCATION_f1": 0.9081632653061223, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.88, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9212121212121213, + "eval_LOCATION_precision": 0.8640776699029126, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9085545722713864, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.926829268292683, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.8901734104046243, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.054195113480091095, - "eval_overall_accuracy": 0.9876543209876543, - "eval_overall_f1": 0.9379652605459058, - "eval_overall_precision": 0.9333333333333333, - "eval_overall_recall": 0.942643391521197, - "eval_runtime": 0.6274, - "eval_samples_per_second": 270.938, - "eval_steps_per_second": 4.781, + "eval_loss": 0.07066118717193604, + "eval_overall_accuracy": 0.9849108367626886, + "eval_overall_f1": 0.9326805385556914, + "eval_overall_precision": 0.9158653846153846, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.2903, + "eval_samples_per_second": 585.543, + "eval_steps_per_second": 10.333, "step": 5184 }, { "epoch": 55.0, - "grad_norm": 0.7094045281410217, + "grad_norm": 0.6687442064285278, "learning_rate": 2.25e-05, - "loss": 0.0152, + "loss": 0.0133, "step": 5280 }, { "epoch": 55.0, - "eval_LOCATION_f1": 0.9270833333333334, + "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.898989898989899, + "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.924924924924925, + "eval_ORGANIZATION_f1": 0.9146341463414634, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9221556886227545, - "eval_ORGANIZATION_recall": 0.927710843373494, - "eval_PERSON_f1": 0.9716312056737589, + "eval_ORGANIZATION_precision": 0.9259259259259259, + "eval_ORGANIZATION_recall": 0.9036144578313253, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9785714285714285, - "eval_PERSON_recall": 0.9647887323943662, - "eval_loss": 0.052083536982536316, - "eval_overall_accuracy": 0.9887517146776406, - "eval_overall_f1": 0.9417596034696407, - "eval_overall_precision": 0.9359605911330049, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6299, - "eval_samples_per_second": 269.891, - "eval_steps_per_second": 4.763, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06329189985990524, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9378109452736318, + "eval_overall_precision": 0.9354838709677419, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.2896, + "eval_samples_per_second": 587.102, + "eval_steps_per_second": 10.361, "step": 5280 }, { "epoch": 56.0, - "grad_norm": 0.5432300567626953, + "grad_norm": 0.35132700204849243, "learning_rate": 2.2000000000000003e-05, - "loss": 0.0123, + "loss": 0.013, "step": 5376 }, { "epoch": 56.0, - "eval_LOCATION_f1": 0.9263157894736843, + "eval_LOCATION_f1": 0.9214659685863874, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9072164948453608, + "eval_LOCATION_precision": 0.8979591836734694, "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9272727272727274, + "eval_ORGANIZATION_f1": 0.9216867469879518, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9329268292682927, + "eval_ORGANIZATION_precision": 0.9216867469879518, "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.05357722193002701, - "eval_overall_accuracy": 0.9887517146776406, - "eval_overall_f1": 0.9439601494396015, - "eval_overall_precision": 0.9427860696517413, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6253, - "eval_samples_per_second": 271.851, - "eval_steps_per_second": 4.797, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.059241946786642075, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9379652605459058, + "eval_overall_precision": 0.9333333333333333, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2893, + "eval_samples_per_second": 587.723, + "eval_steps_per_second": 10.372, "step": 5376 }, { "epoch": 57.0, - "grad_norm": 0.9852465391159058, + "grad_norm": 0.6489832401275635, "learning_rate": 2.15e-05, - "loss": 0.0145, + "loss": 0.0143, "step": 5472 }, { "epoch": 57.0, - "eval_LOCATION_f1": 0.9312169312169313, + "eval_LOCATION_f1": 0.9128205128205128, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9166666666666666, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.918429003021148, + "eval_LOCATION_precision": 0.8725490196078431, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9207317073170733, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9212121212121213, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9320987654320988, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.9788732394366197, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.05067111924290657, - "eval_overall_accuracy": 0.9898491083676269, - "eval_overall_f1": 0.941469489414695, - "eval_overall_precision": 0.9402985074626866, - "eval_overall_recall": 0.942643391521197, - "eval_runtime": 0.7174, - "eval_samples_per_second": 236.961, - "eval_steps_per_second": 4.182, + "eval_PERSON_precision": 0.9788732394366197, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.0567299947142601, + "eval_overall_accuracy": 0.9887517146776406, + "eval_overall_f1": 0.9392812887236679, + "eval_overall_precision": 0.9334975369458128, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2876, + "eval_samples_per_second": 591.033, + "eval_steps_per_second": 10.43, "step": 5472 }, { "epoch": 58.0, - "grad_norm": 0.5912477374076843, + "grad_norm": 0.1813511699438095, "learning_rate": 2.1e-05, - "loss": 0.0138, + "loss": 0.0135, "step": 5568 }, { "epoch": 58.0, - "eval_LOCATION_f1": 0.9214659685863874, + "eval_LOCATION_f1": 0.9175257731958764, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8979591836734694, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9179331306990882, + "eval_LOCATION_precision": 0.8811881188118812, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9240121580547112, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9263803680981595, - "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9681978798586572, + "eval_ORGANIZATION_precision": 0.9325153374233128, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9716312056737588, - "eval_PERSON_recall": 0.9647887323943662, - "eval_loss": 0.054992906749248505, - "eval_overall_accuracy": 0.9893004115226337, - "eval_overall_f1": 0.9364881693648817, - "eval_overall_precision": 0.9353233830845771, - "eval_overall_recall": 0.9376558603491272, - "eval_runtime": 0.636, - "eval_samples_per_second": 267.284, - "eval_steps_per_second": 4.717, + "eval_PERSON_precision": 0.9858156028368794, + "eval_PERSON_recall": 0.9788732394366197, + "eval_loss": 0.06323638558387756, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9429280397022332, + "eval_overall_precision": 0.9382716049382716, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.2942, + "eval_samples_per_second": 577.895, + "eval_steps_per_second": 10.198, "step": 5568 }, { "epoch": 59.0, - "grad_norm": 0.44573214650154114, + "grad_norm": 0.027425482869148254, "learning_rate": 2.05e-05, - "loss": 0.0122, + "loss": 0.0111, "step": 5664 }, { "epoch": 59.0, - "eval_LOCATION_f1": 0.9381443298969072, + "eval_LOCATION_f1": 0.9263157894736843, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.900990099009901, - "eval_LOCATION_recall": 0.978494623655914, - "eval_ORGANIZATION_f1": 0.9235474006116209, + "eval_LOCATION_precision": 0.9072164948453608, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9174311926605505, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.937888198757764, - "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9752650176678446, + "eval_ORGANIZATION_precision": 0.9316770186335404, + "eval_ORGANIZATION_recall": 0.9036144578313253, + "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.05316930636763573, - "eval_overall_accuracy": 0.9882030178326474, - "eval_overall_f1": 0.945273631840796, - "eval_overall_precision": 0.9429280397022333, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6375, - "eval_samples_per_second": 266.686, - "eval_steps_per_second": 4.706, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.06284485757350922, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9375, + "eval_overall_precision": 0.9398496240601504, + "eval_overall_recall": 0.9351620947630923, + "eval_runtime": 0.2885, + "eval_samples_per_second": 589.346, + "eval_steps_per_second": 10.4, "step": 5664 }, { "epoch": 60.0, - "grad_norm": 0.14488360285758972, + "grad_norm": 0.3114117383956909, "learning_rate": 2e-05, - "loss": 0.0144, + "loss": 0.0108, "step": 5760 }, { "epoch": 60.0, - "eval_LOCATION_f1": 0.9285714285714286, + "eval_LOCATION_f1": 0.9157894736842105, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.883495145631068, - "eval_LOCATION_recall": 0.978494623655914, - "eval_ORGANIZATION_f1": 0.9135802469135801, + "eval_LOCATION_precision": 0.8969072164948454, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9129129129129129, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9367088607594937, - "eval_ORGANIZATION_recall": 0.891566265060241, - "eval_PERSON_f1": 0.971830985915493, + "eval_ORGANIZATION_precision": 0.9101796407185628, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.971830985915493, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.05720693990588188, - "eval_overall_accuracy": 0.9882030178326474, - "eval_overall_f1": 0.9378109452736318, - "eval_overall_precision": 0.9354838709677419, - "eval_overall_recall": 0.940149625935162, - "eval_runtime": 0.6336, - "eval_samples_per_second": 268.304, - "eval_steps_per_second": 4.735, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.05974132940173149, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9330024813895781, + "eval_overall_precision": 0.928395061728395, + "eval_overall_recall": 0.9376558603491272, + "eval_runtime": 0.2915, + "eval_samples_per_second": 583.278, + "eval_steps_per_second": 10.293, "step": 5760 }, { "epoch": 61.0, - "grad_norm": 0.5334123373031616, + "grad_norm": 0.14523079991340637, "learning_rate": 1.9500000000000003e-05, - "loss": 0.0126, + "loss": 0.0144, "step": 5856 }, { "epoch": 61.0, - "eval_LOCATION_f1": 0.9574468085106383, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9473684210526315, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9333333333333333, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9169230769230768, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9390243902439024, - "eval_ORGANIZATION_recall": 0.927710843373494, - "eval_PERSON_f1": 0.971830985915493, + "eval_ORGANIZATION_precision": 0.9371069182389937, + "eval_ORGANIZATION_recall": 0.8975903614457831, + "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.971830985915493, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.048754770308732986, - "eval_overall_accuracy": 0.9906721536351166, - "eval_overall_f1": 0.9526184538653366, - "eval_overall_precision": 0.9526184538653366, - "eval_overall_recall": 0.9526184538653366, - "eval_runtime": 0.63, - "eval_samples_per_second": 269.825, - "eval_steps_per_second": 4.762, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.06437654048204422, + "eval_overall_accuracy": 0.9860082304526749, + "eval_overall_f1": 0.935, + "eval_overall_precision": 0.9373433583959899, + "eval_overall_recall": 0.9326683291770573, + "eval_runtime": 0.2896, + "eval_samples_per_second": 587.057, + "eval_steps_per_second": 10.36, "step": 5856 }, { "epoch": 62.0, - "grad_norm": 1.1560488939285278, + "grad_norm": 0.9911012649536133, "learning_rate": 1.9e-05, - "loss": 0.0113, + "loss": 0.0124, "step": 5952 }, { "epoch": 62.0, - "eval_LOCATION_f1": 0.9278350515463919, + "eval_LOCATION_f1": 0.9109947643979057, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8910891089108911, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9079754601226994, + "eval_LOCATION_precision": 0.8877551020408163, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9166666666666667, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.925, - "eval_ORGANIZATION_recall": 0.891566265060241, - "eval_PERSON_f1": 0.971830985915493, + "eval_ORGANIZATION_precision": 0.9058823529411765, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.056291624903678894, - "eval_overall_accuracy": 0.9887517146776406, - "eval_overall_f1": 0.9353233830845771, - "eval_overall_precision": 0.9330024813895782, - "eval_overall_recall": 0.9376558603491272, - "eval_runtime": 0.626, - "eval_samples_per_second": 271.571, - "eval_steps_per_second": 4.792, + "eval_loss": 0.05878928676247597, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9369592088998765, + "eval_overall_precision": 0.928921568627451, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2911, + "eval_samples_per_second": 583.943, + "eval_steps_per_second": 10.305, "step": 5952 }, { "epoch": 63.0, - "grad_norm": 0.4997645616531372, + "grad_norm": 0.3812105059623718, "learning_rate": 1.85e-05, - "loss": 0.0109, + "loss": 0.0117, "step": 6048 }, { "epoch": 63.0, - "eval_LOCATION_f1": 0.9128205128205128, + "eval_LOCATION_f1": 0.9214659685863874, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8725490196078431, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.8895705521472392, + "eval_LOCATION_precision": 0.8979591836734694, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9281437125748503, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.90625, - "eval_ORGANIZATION_recall": 0.8734939759036144, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9226190476190477, + "eval_ORGANIZATION_recall": 0.9337349397590361, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.0592079721391201, - "eval_overall_accuracy": 0.9865569272976681, - "eval_overall_f1": 0.927860696517413, - "eval_overall_precision": 0.9255583126550868, - "eval_overall_recall": 0.9301745635910225, - "eval_runtime": 0.635, - "eval_samples_per_second": 267.73, - "eval_steps_per_second": 4.725, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.0626605674624443, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9442379182156134, + "eval_overall_precision": 0.9384236453201971, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.2883, + "eval_samples_per_second": 589.687, + "eval_steps_per_second": 10.406, "step": 6048 }, { "epoch": 64.0, - "grad_norm": 0.4493540823459625, + "grad_norm": 0.28910690546035767, "learning_rate": 1.8e-05, - "loss": 0.0124, + "loss": 0.0101, "step": 6144 }, { "epoch": 64.0, - "eval_LOCATION_f1": 0.9304812834224598, + "eval_LOCATION_f1": 0.9263157894736843, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.925531914893617, - "eval_LOCATION_recall": 0.9354838709677419, - "eval_ORGANIZATION_f1": 0.9030303030303031, + "eval_LOCATION_precision": 0.9072164948453608, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.924924924924925, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9085365853658537, - "eval_ORGANIZATION_recall": 0.8975903614457831, - "eval_PERSON_f1": 0.971830985915493, + "eval_ORGANIZATION_precision": 0.9221556886227545, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_precision": 0.9857142857142858, "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.05676347389817238, - "eval_overall_accuracy": 0.9876543209876543, - "eval_overall_f1": 0.933832709113608, - "eval_overall_precision": 0.935, - "eval_overall_recall": 0.9326683291770573, - "eval_runtime": 0.6373, - "eval_samples_per_second": 266.761, - "eval_steps_per_second": 4.708, + "eval_loss": 0.0623946376144886, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9440993788819876, + "eval_overall_precision": 0.9405940594059405, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.2886, + "eval_samples_per_second": 588.967, + "eval_steps_per_second": 10.394, "step": 6144 }, { "epoch": 65.0, - "grad_norm": 0.3998490273952484, + "grad_norm": 0.6301209926605225, "learning_rate": 1.75e-05, - "loss": 0.0126, + "loss": 0.0117, "step": 6240 }, { "epoch": 65.0, - "eval_LOCATION_f1": 0.9270833333333334, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.898989898989899, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.8929663608562691, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9101796407185628, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.906832298136646, - "eval_ORGANIZATION_recall": 0.8795180722891566, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9047619047619048, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05590180307626724, - "eval_overall_accuracy": 0.9876543209876543, - "eval_overall_f1": 0.9326683291770573, - "eval_overall_precision": 0.9326683291770573, - "eval_overall_recall": 0.9326683291770573, - "eval_runtime": 0.6342, - "eval_samples_per_second": 268.035, - "eval_steps_per_second": 4.73, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06263893097639084, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9356435643564357, + "eval_overall_precision": 0.9287469287469288, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2889, + "eval_samples_per_second": 588.45, + "eval_steps_per_second": 10.384, "step": 6240 }, { "epoch": 66.0, - "grad_norm": 0.5854397416114807, + "grad_norm": 0.4729289412498474, "learning_rate": 1.7000000000000003e-05, - "loss": 0.0112, + "loss": 0.0108, "step": 6336 }, { "epoch": 66.0, - "eval_LOCATION_f1": 0.9361702127659575, + "eval_LOCATION_f1": 0.9052631578947367, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9263157894736842, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9123867069486404, + "eval_LOCATION_precision": 0.8865979381443299, + "eval_LOCATION_recall": 0.9247311827956989, + "eval_ORGANIZATION_f1": 0.9179331306990882, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_precision": 0.9263803680981595, "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9752650176678446, + "eval_PERSON_f1": 0.9647887323943662, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9787234042553191, - "eval_PERSON_recall": 0.971830985915493, - "eval_loss": 0.057300012558698654, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.940149625935162, - "eval_overall_precision": 0.940149625935162, - "eval_overall_recall": 0.940149625935162, - "eval_runtime": 0.6275, - "eval_samples_per_second": 270.902, - "eval_steps_per_second": 4.781, + "eval_PERSON_precision": 0.9647887323943662, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.05901220068335533, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9315068493150686, + "eval_overall_precision": 0.9303482587064676, + "eval_overall_recall": 0.9326683291770573, + "eval_runtime": 0.2942, + "eval_samples_per_second": 577.799, + "eval_steps_per_second": 10.196, "step": 6336 }, { "epoch": 67.0, - "grad_norm": 0.009959683753550053, + "grad_norm": 0.20351357758045197, "learning_rate": 1.65e-05, - "loss": 0.0112, + "loss": 0.0104, "step": 6432 }, { "epoch": 67.0, - "eval_LOCATION_f1": 0.9035532994923857, + "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8557692307692307, + "eval_LOCATION_precision": 0.898989898989899, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.8848484848484849, + "eval_ORGANIZATION_f1": 0.9161676646706587, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.8902439024390244, - "eval_ORGANIZATION_recall": 0.8795180722891566, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9107142857142857, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.06168980151414871, - "eval_overall_accuracy": 0.9862825788751715, - "eval_overall_f1": 0.9234567901234567, - "eval_overall_precision": 0.9144254278728606, - "eval_overall_recall": 0.9326683291770573, - "eval_runtime": 0.6306, - "eval_samples_per_second": 269.564, - "eval_steps_per_second": 4.757, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06248219311237335, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9405940594059405, + "eval_overall_precision": 0.9336609336609336, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.2905, + "eval_samples_per_second": 585.283, + "eval_steps_per_second": 10.329, "step": 6432 }, { "epoch": 68.0, - "grad_norm": 0.43869954347610474, + "grad_norm": 0.7123875617980957, "learning_rate": 1.6000000000000003e-05, - "loss": 0.0117, + "loss": 0.0119, "step": 6528 }, { @@ -2098,1024 +2098,1024 @@ "eval_LOCATION_number": 93, "eval_LOCATION_precision": 0.9081632653061225, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9146341463414634, + "eval_ORGANIZATION_f1": 0.9166666666666667, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9259259259259259, - "eval_ORGANIZATION_recall": 0.9036144578313253, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9058823529411765, + "eval_ORGANIZATION_recall": 0.927710843373494, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05341716110706329, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.942643391521197, - "eval_overall_precision": 0.942643391521197, - "eval_overall_recall": 0.942643391521197, - "eval_runtime": 0.64, - "eval_samples_per_second": 265.619, - "eval_steps_per_second": 4.687, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06472578644752502, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9419035846724352, + "eval_overall_precision": 0.9338235294117647, + "eval_overall_recall": 0.9501246882793017, + "eval_runtime": 0.2879, + "eval_samples_per_second": 590.566, + "eval_steps_per_second": 10.422, "step": 6528 }, { "epoch": 69.0, - "grad_norm": 0.6488298177719116, + "grad_norm": 0.5229490399360657, "learning_rate": 1.55e-05, - "loss": 0.0101, + "loss": 0.011, "step": 6624 }, { "epoch": 69.0, - "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_f1": 0.9214659685863874, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9270833333333334, - "eval_LOCATION_recall": 0.956989247311828, + "eval_LOCATION_precision": 0.8979591836734694, + "eval_LOCATION_recall": 0.946236559139785, "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 166, "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05710820481181145, - "eval_overall_accuracy": 0.9890260631001372, - "eval_overall_f1": 0.945273631840796, - "eval_overall_precision": 0.9429280397022333, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6329, - "eval_samples_per_second": 268.611, - "eval_steps_per_second": 4.74, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.05594140291213989, + "eval_overall_accuracy": 0.9887517146776406, + "eval_overall_f1": 0.9379652605459058, + "eval_overall_precision": 0.9333333333333333, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2896, + "eval_samples_per_second": 586.973, + "eval_steps_per_second": 10.358, "step": 6624 }, { "epoch": 70.0, - "grad_norm": 0.07458376884460449, + "grad_norm": 0.36743980646133423, "learning_rate": 1.5e-05, - "loss": 0.0092, + "loss": 0.0096, "step": 6720 }, { "epoch": 70.0, - "eval_LOCATION_f1": 0.9374999999999999, + "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9090909090909091, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.9240121580547112, + "eval_LOCATION_precision": 0.898989898989899, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9129129129129129, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9325153374233128, + "eval_ORGANIZATION_precision": 0.9101796407185628, "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05593955144286156, - "eval_overall_accuracy": 0.9893004115226337, - "eval_overall_f1": 0.9477611940298507, - "eval_overall_precision": 0.9454094292803971, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.631, - "eval_samples_per_second": 269.43, - "eval_steps_per_second": 4.755, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.05785645917057991, + "eval_overall_accuracy": 0.9882030178326474, + "eval_overall_f1": 0.9392812887236679, + "eval_overall_precision": 0.9334975369458128, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2897, + "eval_samples_per_second": 586.718, + "eval_steps_per_second": 10.354, "step": 6720 }, { "epoch": 71.0, - "grad_norm": 1.7527589797973633, + "grad_norm": 1.1272770166397095, "learning_rate": 1.45e-05, - "loss": 0.011, + "loss": 0.0103, "step": 6816 }, { "epoch": 71.0, - "eval_LOCATION_f1": 0.946808510638298, + "eval_LOCATION_f1": 0.9214659685863874, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9368421052631579, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9272727272727274, + "eval_LOCATION_precision": 0.8979591836734694, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9047619047619048, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9329268292682927, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.8941176470588236, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.061140261590480804, - "eval_overall_accuracy": 0.9879286694101509, - "eval_overall_f1": 0.951310861423221, - "eval_overall_precision": 0.9525, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6251, - "eval_samples_per_second": 271.935, - "eval_steps_per_second": 4.799, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06020239740610123, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9344870210135972, + "eval_overall_precision": 0.9264705882352942, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2903, + "eval_samples_per_second": 585.692, + "eval_steps_per_second": 10.336, "step": 6816 }, { "epoch": 72.0, - "grad_norm": 0.09629790484905243, + "grad_norm": 0.5486194491386414, "learning_rate": 1.4000000000000001e-05, - "loss": 0.0097, + "loss": 0.0091, "step": 6912 }, { "epoch": 72.0, - "eval_LOCATION_f1": 0.9312169312169313, + "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9166666666666666, - "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9268292682926829, + "eval_LOCATION_precision": 0.898989898989899, + "eval_LOCATION_recall": 0.956989247311828, + "eval_ORGANIZATION_f1": 0.9212121212121213, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9382716049382716, + "eval_ORGANIZATION_precision": 0.926829268292683, "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.055803388357162476, - "eval_overall_accuracy": 0.9893004115226337, - "eval_overall_f1": 0.9475000000000001, - "eval_overall_precision": 0.949874686716792, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.0652991458773613, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9416149068322982, + "eval_overall_precision": 0.9381188118811881, "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6294, - "eval_samples_per_second": 270.101, - "eval_steps_per_second": 4.766, + "eval_runtime": 0.2905, + "eval_samples_per_second": 585.264, + "eval_steps_per_second": 10.328, "step": 6912 }, { "epoch": 73.0, - "grad_norm": 0.45607447624206543, + "grad_norm": 1.128568172454834, "learning_rate": 1.3500000000000001e-05, - "loss": 0.0102, + "loss": 0.0093, "step": 7008 }, { "epoch": 73.0, - "eval_LOCATION_f1": 0.9214659685863874, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8979591836734694, + "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9207317073170733, + "eval_ORGANIZATION_f1": 0.9096385542168675, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9320987654320988, + "eval_ORGANIZATION_precision": 0.9096385542168675, "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05601666495203972, - "eval_overall_accuracy": 0.9879286694101509, - "eval_overall_f1": 0.942643391521197, - "eval_overall_precision": 0.942643391521197, - "eval_overall_recall": 0.942643391521197, - "eval_runtime": 0.632, - "eval_samples_per_second": 268.986, - "eval_steps_per_second": 4.747, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.06982634961605072, + "eval_overall_accuracy": 0.9865569272976681, + "eval_overall_f1": 0.9318463444857497, + "eval_overall_precision": 0.9261083743842364, + "eval_overall_recall": 0.9376558603491272, + "eval_runtime": 0.2922, + "eval_samples_per_second": 581.854, + "eval_steps_per_second": 10.268, "step": 7008 }, { "epoch": 74.0, - "grad_norm": 0.02336580492556095, + "grad_norm": 0.033605970442295074, "learning_rate": 1.3000000000000001e-05, - "loss": 0.0098, + "loss": 0.0091, "step": 7104 }, { "epoch": 74.0, - "eval_LOCATION_f1": 0.9424083769633509, + "eval_LOCATION_f1": 0.9263157894736843, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9183673469387755, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.918429003021148, + "eval_LOCATION_precision": 0.9072164948453608, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9074626865671641, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9212121212121213, + "eval_ORGANIZATION_precision": 0.8994082840236687, "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05871947109699249, - "eval_overall_accuracy": 0.9882030178326474, - "eval_overall_f1": 0.946583850931677, - "eval_overall_precision": 0.943069306930693, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6932, - "eval_samples_per_second": 245.254, - "eval_steps_per_second": 4.328, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06609321385622025, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9356435643564357, + "eval_overall_precision": 0.9287469287469288, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2892, + "eval_samples_per_second": 587.781, + "eval_steps_per_second": 10.373, "step": 7104 }, { "epoch": 75.0, - "grad_norm": 0.5431827902793884, + "grad_norm": 0.5324849486351013, "learning_rate": 1.25e-05, - "loss": 0.0105, + "loss": 0.0093, "step": 7200 }, { "epoch": 75.0, - "eval_LOCATION_f1": 0.9175257731958764, + "eval_LOCATION_f1": 0.90625, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.8811881188118812, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9057750759878419, + "eval_LOCATION_precision": 0.8787878787878788, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.9009009009009009, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9141104294478528, - "eval_ORGANIZATION_recall": 0.8975903614457831, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.8982035928143712, + "eval_ORGANIZATION_recall": 0.9036144578313253, + "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05846357345581055, - "eval_overall_accuracy": 0.9873799725651577, - "eval_overall_f1": 0.9354838709677419, - "eval_overall_precision": 0.9308641975308642, - "eval_overall_recall": 0.940149625935162, - "eval_runtime": 0.6414, - "eval_samples_per_second": 265.034, - "eval_steps_per_second": 4.677, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.07267607003450394, + "eval_overall_accuracy": 0.9854595336076818, + "eval_overall_f1": 0.9257425742574258, + "eval_overall_precision": 0.918918918918919, + "eval_overall_recall": 0.9326683291770573, + "eval_runtime": 0.2896, + "eval_samples_per_second": 586.938, + "eval_steps_per_second": 10.358, "step": 7200 }, { "epoch": 76.0, - "grad_norm": 0.803601086139679, + "grad_norm": 0.02331961691379547, "learning_rate": 1.2e-05, - "loss": 0.01, + "loss": 0.0087, "step": 7296 }, { "epoch": 76.0, - "eval_LOCATION_f1": 0.946808510638298, + "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9368421052631579, + "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9244712990936556, + "eval_ORGANIZATION_f1": 0.9129129129129129, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9272727272727272, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9101796407185628, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.059869080781936646, - "eval_overall_accuracy": 0.9893004115226337, - "eval_overall_f1": 0.9501246882793017, - "eval_overall_precision": 0.9501246882793017, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6266, - "eval_samples_per_second": 271.309, - "eval_steps_per_second": 4.788, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06968007981777191, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9369592088998765, + "eval_overall_precision": 0.928921568627451, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2896, + "eval_samples_per_second": 587.037, + "eval_steps_per_second": 10.359, "step": 7296 }, { "epoch": 77.0, - "grad_norm": 0.8899322152137756, + "grad_norm": 1.1283886432647705, "learning_rate": 1.1500000000000002e-05, - "loss": 0.0084, + "loss": 0.0104, "step": 7392 }, { "epoch": 77.0, - "eval_LOCATION_f1": 0.9374999999999999, + "eval_LOCATION_f1": 0.911917098445596, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9090909090909091, - "eval_LOCATION_recall": 0.967741935483871, - "eval_ORGANIZATION_f1": 0.918429003021148, + "eval_LOCATION_precision": 0.88, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9212121212121213, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9212121212121213, + "eval_ORGANIZATION_precision": 0.926829268292683, "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.060152728110551834, - "eval_overall_accuracy": 0.9893004115226337, - "eval_overall_f1": 0.9454094292803972, - "eval_overall_precision": 0.9407407407407408, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.629, - "eval_samples_per_second": 270.252, - "eval_steps_per_second": 4.769, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06767893582582474, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9391304347826087, + "eval_overall_precision": 0.9356435643564357, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2891, + "eval_samples_per_second": 588.107, + "eval_steps_per_second": 10.378, "step": 7392 }, { "epoch": 78.0, - "grad_norm": 0.5667243599891663, + "grad_norm": 0.6260883212089539, "learning_rate": 1.1000000000000001e-05, "loss": 0.0089, "step": 7488 }, { "epoch": 78.0, - "eval_LOCATION_f1": 0.922279792746114, + "eval_LOCATION_f1": 0.9109947643979057, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.89, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9151515151515153, + "eval_LOCATION_precision": 0.8877551020408163, + "eval_LOCATION_recall": 0.9354838709677419, + "eval_ORGANIZATION_f1": 0.906906906906907, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9207317073170732, + "eval_ORGANIZATION_precision": 0.9041916167664671, "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9681978798586572, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.06323432922363281, - "eval_overall_accuracy": 0.9879286694101509, - "eval_overall_f1": 0.9404466501240694, - "eval_overall_precision": 0.9358024691358025, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.63, - "eval_samples_per_second": 269.835, - "eval_steps_per_second": 4.762, + "eval_PERSON_precision": 0.9716312056737588, + "eval_PERSON_recall": 0.9647887323943662, + "eval_loss": 0.06810528039932251, + "eval_overall_accuracy": 0.9862825788751715, + "eval_overall_f1": 0.9293680297397771, + "eval_overall_precision": 0.9236453201970444, + "eval_overall_recall": 0.9351620947630923, + "eval_runtime": 0.2872, + "eval_samples_per_second": 591.943, + "eval_steps_per_second": 10.446, "step": 7488 }, { "epoch": 79.0, - "grad_norm": 0.020913848653435707, + "grad_norm": 0.1493467390537262, "learning_rate": 1.05e-05, - "loss": 0.0086, + "loss": 0.0092, "step": 7584 }, { "epoch": 79.0, - "eval_LOCATION_f1": 0.9263157894736843, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9072164948453608, + "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9235474006116209, + "eval_ORGANIZATION_f1": 0.9014925373134328, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.937888198757764, + "eval_ORGANIZATION_precision": 0.893491124260355, "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05881747603416443, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.9450000000000001, - "eval_overall_precision": 0.9473684210526315, - "eval_overall_recall": 0.942643391521197, - "eval_runtime": 0.6316, - "eval_samples_per_second": 269.163, - "eval_steps_per_second": 4.75, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06890777498483658, + "eval_overall_accuracy": 0.9862825788751715, + "eval_overall_f1": 0.9320148331273177, + "eval_overall_precision": 0.9240196078431373, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.291, + "eval_samples_per_second": 584.258, + "eval_steps_per_second": 10.31, "step": 7584 }, { "epoch": 80.0, - "grad_norm": 0.7093363404273987, + "grad_norm": 0.6644344329833984, "learning_rate": 1e-05, - "loss": 0.009, + "loss": 0.0069, "step": 7680 }, { "epoch": 80.0, - "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9175257731958762, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9244712990936556, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9101796407185628, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9272727272727272, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9047619047619048, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.06179361417889595, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.9477611940298507, - "eval_overall_precision": 0.9454094292803971, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.641, - "eval_samples_per_second": 265.216, - "eval_steps_per_second": 4.68, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.07149485498666763, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9356435643564357, + "eval_overall_precision": 0.9287469287469288, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2881, + "eval_samples_per_second": 590.06, + "eval_steps_per_second": 10.413, "step": 7680 }, { "epoch": 81.0, - "grad_norm": 0.23694337904453278, + "grad_norm": 1.0200001001358032, "learning_rate": 9.5e-06, - "loss": 0.0084, + "loss": 0.0087, "step": 7776 }, { "epoch": 81.0, - "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9175257731958762, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9123867069486404, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9074626865671641, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9151515151515152, - "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.8994082840236687, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.06119789183139801, - "eval_overall_accuracy": 0.9882030178326474, - "eval_overall_f1": 0.9427860696517413, - "eval_overall_precision": 0.9404466501240695, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6315, - "eval_samples_per_second": 269.203, - "eval_steps_per_second": 4.751, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06909173727035522, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9344870210135972, + "eval_overall_precision": 0.9264705882352942, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2887, + "eval_samples_per_second": 588.812, + "eval_steps_per_second": 10.391, "step": 7776 }, { "epoch": 82.0, - "grad_norm": 0.3362814784049988, + "grad_norm": 0.21058472990989685, "learning_rate": 9e-06, - "loss": 0.0089, + "loss": 0.0081, "step": 7872 }, { "epoch": 82.0, - "eval_LOCATION_f1": 0.9319371727748691, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9081632653061225, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9151515151515153, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.906906906906907, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9207317073170732, + "eval_ORGANIZATION_precision": 0.9041916167664671, "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.059507716447114944, - "eval_overall_accuracy": 0.9890260631001372, - "eval_overall_f1": 0.9427860696517413, - "eval_overall_precision": 0.9404466501240695, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6408, - "eval_samples_per_second": 265.312, - "eval_steps_per_second": 4.682, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.07346898317337036, + "eval_overall_accuracy": 0.9860082304526749, + "eval_overall_f1": 0.9343246592317225, + "eval_overall_precision": 0.9285714285714286, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.2908, + "eval_samples_per_second": 584.583, + "eval_steps_per_second": 10.316, "step": 7872 }, { "epoch": 83.0, - "grad_norm": 0.15785475075244904, + "grad_norm": 0.9036238789558411, "learning_rate": 8.500000000000002e-06, - "loss": 0.0089, + "loss": 0.0099, "step": 7968 }, { "epoch": 83.0, - "eval_LOCATION_f1": 0.946808510638298, + "eval_LOCATION_f1": 0.9214659685863874, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9368421052631579, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.924924924924925, + "eval_LOCATION_precision": 0.8979591836734694, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9189189189189191, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9221556886227545, - "eval_ORGANIZATION_recall": 0.927710843373494, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9161676646706587, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05860907584428787, - "eval_overall_accuracy": 0.9890260631001372, - "eval_overall_f1": 0.9502487562189055, - "eval_overall_precision": 0.9478908188585607, - "eval_overall_recall": 0.9526184538653366, - "eval_runtime": 0.6371, - "eval_samples_per_second": 266.844, - "eval_steps_per_second": 4.709, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06314818561077118, + "eval_overall_accuracy": 0.9882030178326474, + "eval_overall_f1": 0.9392812887236679, + "eval_overall_precision": 0.9334975369458128, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.2899, + "eval_samples_per_second": 586.334, + "eval_steps_per_second": 10.347, "step": 7968 }, { "epoch": 84.0, - "grad_norm": 0.4708460569381714, + "grad_norm": 0.02120956964790821, "learning_rate": 8.000000000000001e-06, - "loss": 0.0084, + "loss": 0.0085, "step": 8064 }, { "epoch": 84.0, - "eval_LOCATION_f1": 0.9518716577540107, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9468085106382979, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9244712990936556, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.918429003021148, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9272727272727272, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9212121212121213, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05669812113046646, - "eval_overall_accuracy": 0.9909465020576131, - "eval_overall_f1": 0.951310861423221, - "eval_overall_precision": 0.9525, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6365, - "eval_samples_per_second": 267.075, - "eval_steps_per_second": 4.713, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06590625643730164, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9391304347826087, + "eval_overall_precision": 0.9356435643564357, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2882, + "eval_samples_per_second": 589.934, + "eval_steps_per_second": 10.411, "step": 8064 }, { "epoch": 85.0, - "grad_norm": 0.6606996059417725, + "grad_norm": 0.7899487018585205, "learning_rate": 7.5e-06, - "loss": 0.0088, + "loss": 0.0092, "step": 8160 }, { "epoch": 85.0, - "eval_LOCATION_f1": 0.9319371727748691, + "eval_LOCATION_f1": 0.922279792746114, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9081632653061225, + "eval_LOCATION_precision": 0.89, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.918429003021148, + "eval_ORGANIZATION_f1": 0.9216867469879518, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9212121212121213, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9216867469879518, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.06166466698050499, - "eval_overall_accuracy": 0.9890260631001372, - "eval_overall_f1": 0.9440993788819876, - "eval_overall_precision": 0.9405940594059405, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06632930040359497, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9405940594059405, + "eval_overall_precision": 0.9336609336609336, "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6881, - "eval_samples_per_second": 247.07, - "eval_steps_per_second": 4.36, + "eval_runtime": 0.2876, + "eval_samples_per_second": 591.174, + "eval_steps_per_second": 10.432, "step": 8160 }, { "epoch": 86.0, - "grad_norm": 0.701899528503418, + "grad_norm": 1.0977509021759033, "learning_rate": 7.000000000000001e-06, - "loss": 0.0089, + "loss": 0.0076, "step": 8256 }, { "epoch": 86.0, - "eval_LOCATION_f1": 0.9361702127659575, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9263157894736842, + "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.946236559139785, - "eval_ORGANIZATION_f1": 0.9212121212121213, + "eval_ORGANIZATION_f1": 0.906906906906907, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.926829268292683, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9041916167664671, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05820832401514053, - "eval_overall_accuracy": 0.9895747599451303, - "eval_overall_f1": 0.9463171036204744, - "eval_overall_precision": 0.9475, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6343, - "eval_samples_per_second": 268.032, - "eval_steps_per_second": 4.73, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06624973565340042, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9343246592317225, + "eval_overall_precision": 0.9285714285714286, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.2903, + "eval_samples_per_second": 585.654, + "eval_steps_per_second": 10.335, "step": 8256 }, { "epoch": 87.0, - "grad_norm": 0.12563535571098328, + "grad_norm": 0.06188450753688812, "learning_rate": 6.5000000000000004e-06, - "loss": 0.0066, + "loss": 0.0073, "step": 8352 }, { "epoch": 87.0, - "eval_LOCATION_f1": 0.946808510638298, + "eval_LOCATION_f1": 0.9326424870466321, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9368421052631579, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.927710843373494, + "eval_LOCATION_precision": 0.9, + "eval_LOCATION_recall": 0.967741935483871, + "eval_ORGANIZATION_f1": 0.918429003021148, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.927710843373494, - "eval_ORGANIZATION_recall": 0.927710843373494, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9212121212121213, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.061063703149557114, - "eval_overall_accuracy": 0.9893004115226337, - "eval_overall_f1": 0.9514321295143213, - "eval_overall_precision": 0.9502487562189055, - "eval_overall_recall": 0.9526184538653366, - "eval_runtime": 0.6323, - "eval_samples_per_second": 268.857, - "eval_steps_per_second": 4.745, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06855077296495438, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9429280397022332, + "eval_overall_precision": 0.9382716049382716, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.2878, + "eval_samples_per_second": 590.656, + "eval_steps_per_second": 10.423, "step": 8352 }, { "epoch": 88.0, - "grad_norm": 0.18005654215812683, + "grad_norm": 0.21970857679843903, "learning_rate": 6e-06, - "loss": 0.0088, + "loss": 0.0068, "step": 8448 }, { "epoch": 88.0, - "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9175257731958762, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9296636085626911, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9123867069486404, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9440993788819876, - "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9151515151515152, + "eval_ORGANIZATION_recall": 0.9096385542168675, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.058633558452129364, - "eval_overall_accuracy": 0.9893004115226337, - "eval_overall_f1": 0.95, - "eval_overall_precision": 0.9523809523809523, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6336, - "eval_samples_per_second": 268.325, - "eval_steps_per_second": 4.735, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06893233209848404, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9366459627329192, + "eval_overall_precision": 0.9331683168316832, + "eval_overall_recall": 0.940149625935162, + "eval_runtime": 0.2916, + "eval_samples_per_second": 582.955, + "eval_steps_per_second": 10.287, "step": 8448 }, { "epoch": 89.0, - "grad_norm": 0.44412586092948914, + "grad_norm": 0.05854140594601631, "learning_rate": 5.500000000000001e-06, - "loss": 0.007, + "loss": 0.0076, "step": 8544 }, { "epoch": 89.0, - "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9270833333333334, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9151515151515153, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9207317073170732, - "eval_ORGANIZATION_recall": 0.9096385542168675, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9156626506024096, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.06018080934882164, - "eval_overall_accuracy": 0.9893004115226337, - "eval_overall_f1": 0.9451371571072319, - "eval_overall_precision": 0.9451371571072319, - "eval_overall_recall": 0.9451371571072319, - "eval_runtime": 0.6276, - "eval_samples_per_second": 270.894, - "eval_steps_per_second": 4.78, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06832458078861237, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9356435643564357, + "eval_overall_precision": 0.9287469287469288, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2879, + "eval_samples_per_second": 590.424, + "eval_steps_per_second": 10.419, "step": 8544 }, { "epoch": 90.0, - "grad_norm": 0.5702086091041565, + "grad_norm": 0.20013177394866943, "learning_rate": 5e-06, - "loss": 0.0083, + "loss": 0.0071, "step": 8640 }, { "epoch": 90.0, - "eval_LOCATION_f1": 0.9518716577540107, + "eval_LOCATION_f1": 0.9270833333333334, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9468085106382979, + "eval_LOCATION_precision": 0.898989898989899, "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9305135951661632, + "eval_ORGANIZATION_f1": 0.9189189189189191, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9333333333333333, - "eval_ORGANIZATION_recall": 0.927710843373494, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9161676646706587, + "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.057951029390096664, - "eval_overall_accuracy": 0.9901234567901235, - "eval_overall_f1": 0.9538077403245941, - "eval_overall_precision": 0.955, - "eval_overall_recall": 0.9526184538653366, - "eval_runtime": 0.6268, - "eval_samples_per_second": 271.204, - "eval_steps_per_second": 4.786, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06415162235498428, + "eval_overall_accuracy": 0.9882030178326474, + "eval_overall_f1": 0.9417596034696407, + "eval_overall_precision": 0.9359605911330049, + "eval_overall_recall": 0.9476309226932669, + "eval_runtime": 0.2886, + "eval_samples_per_second": 589.092, + "eval_steps_per_second": 10.396, "step": 8640 }, { "epoch": 91.0, - "grad_norm": 0.6796770691871643, + "grad_norm": 0.0871826782822609, "learning_rate": 4.5e-06, - "loss": 0.0077, + "loss": 0.0069, "step": 8736 }, { "epoch": 91.0, - "eval_LOCATION_f1": 0.946808510638298, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9368421052631579, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9161676646706587, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9101796407185628, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9107142857142857, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9047619047619048, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9787234042553192, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.059122197329998016, - "eval_overall_accuracy": 0.9890260631001372, - "eval_overall_f1": 0.946583850931677, - "eval_overall_precision": 0.943069306930693, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6297, - "eval_samples_per_second": 269.99, - "eval_steps_per_second": 4.765, + "eval_PERSON_precision": 0.9857142857142858, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.07019613683223724, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9356435643564357, + "eval_overall_precision": 0.9287469287469288, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2912, + "eval_samples_per_second": 583.754, + "eval_steps_per_second": 10.302, "step": 8736 }, { "epoch": 92.0, - "grad_norm": 0.009496918879449368, + "grad_norm": 0.16851916909217834, "learning_rate": 4.000000000000001e-06, - "loss": 0.0078, + "loss": 0.0073, "step": 8832 }, { "epoch": 92.0, - "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9270833333333334, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9189189189189191, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9161676646706587, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9156626506024096, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.06082513928413391, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06724745780229568, "eval_overall_accuracy": 0.9879286694101509, - "eval_overall_f1": 0.946583850931677, - "eval_overall_precision": 0.943069306930693, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6215, - "eval_samples_per_second": 273.552, - "eval_steps_per_second": 4.827, + "eval_overall_f1": 0.9368029739776952, + "eval_overall_precision": 0.9310344827586207, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2954, + "eval_samples_per_second": 575.515, + "eval_steps_per_second": 10.156, "step": 8832 }, { "epoch": 93.0, - "grad_norm": 0.6363722085952759, + "grad_norm": 0.19750891625881195, "learning_rate": 3.5000000000000004e-06, - "loss": 0.0068, + "loss": 0.007, "step": 8928 }, { "epoch": 93.0, - "eval_LOCATION_f1": 0.9518716577540107, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9468085106382979, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9101796407185628, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9047619047619048, + "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.9156626506024096, - "eval_PERSON_f1": 0.9823321554770318, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05810477212071419, - "eval_overall_accuracy": 0.9890260631001372, - "eval_overall_f1": 0.945273631840796, - "eval_overall_precision": 0.9429280397022333, - "eval_overall_recall": 0.9476309226932669, - "eval_runtime": 0.6304, - "eval_samples_per_second": 269.655, - "eval_steps_per_second": 4.759, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06532972306013107, + "eval_overall_accuracy": 0.9879286694101509, + "eval_overall_f1": 0.9368029739776952, + "eval_overall_precision": 0.9310344827586207, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2896, + "eval_samples_per_second": 587.055, + "eval_steps_per_second": 10.36, "step": 8928 }, { "epoch": 94.0, - "grad_norm": 0.42099490761756897, + "grad_norm": 0.07180120795965195, "learning_rate": 3e-06, - "loss": 0.0079, + "loss": 0.007, "step": 9024 }, { "epoch": 94.0, - "eval_LOCATION_f1": 0.946808510638298, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9368421052631579, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9161676646706587, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9107142857142857, - "eval_ORGANIZATION_recall": 0.9216867469879518, + "eval_ORGANIZATION_precision": 0.9156626506024096, + "eval_ORGANIZATION_recall": 0.9156626506024096, "eval_PERSON_f1": 0.9823321554770318, "eval_PERSON_number": 142, "eval_PERSON_precision": 0.9858156028368794, "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.0605609156191349, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.946583850931677, - "eval_overall_precision": 0.943069306930693, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6284, - "eval_samples_per_second": 270.534, - "eval_steps_per_second": 4.774, + "eval_loss": 0.06773727387189865, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9392812887236679, + "eval_overall_precision": 0.9334975369458128, + "eval_overall_recall": 0.9451371571072319, + "eval_runtime": 0.288, + "eval_samples_per_second": 590.323, + "eval_steps_per_second": 10.417, "step": 9024 }, { "epoch": 95.0, - "grad_norm": 0.0327790230512619, + "grad_norm": 0.31866025924682617, "learning_rate": 2.5e-06, - "loss": 0.0072, + "loss": 0.0077, "step": 9120 }, { "epoch": 95.0, - "eval_LOCATION_f1": 0.9417989417989417, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9270833333333334, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9189189189189191, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9129129129129129, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9161676646706587, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9101796407185628, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.06001435965299606, - "eval_overall_accuracy": 0.9895747599451303, - "eval_overall_f1": 0.946583850931677, - "eval_overall_precision": 0.943069306930693, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6347, - "eval_samples_per_second": 267.858, - "eval_steps_per_second": 4.727, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.0693470910191536, + "eval_overall_accuracy": 0.9868312757201646, + "eval_overall_f1": 0.9344870210135972, + "eval_overall_precision": 0.9264705882352942, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.3012, + "eval_samples_per_second": 564.461, + "eval_steps_per_second": 9.961, "step": 9120 }, { "epoch": 96.0, - "grad_norm": 0.06773529201745987, + "grad_norm": 0.8485398292541504, "learning_rate": 2.0000000000000003e-06, - "loss": 0.007, + "loss": 0.0071, "step": 9216 }, { "epoch": 96.0, - "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9175257731958762, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9161676646706587, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9101796407185628, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9107142857142857, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9047619047619048, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.971830985915493, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05993801727890968, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.9442379182156134, - "eval_overall_precision": 0.9384236453201971, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6261, - "eval_samples_per_second": 271.504, - "eval_steps_per_second": 4.791, + "eval_PERSON_precision": 0.971830985915493, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.07040981203317642, + "eval_overall_accuracy": 0.9871056241426612, + "eval_overall_f1": 0.9333333333333333, + "eval_overall_precision": 0.9242053789731052, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2916, + "eval_samples_per_second": 583.084, + "eval_steps_per_second": 10.29, "step": 9216 }, { "epoch": 97.0, - "grad_norm": 0.022277064621448517, + "grad_norm": 0.03155836462974548, "learning_rate": 1.5e-06, - "loss": 0.0082, + "loss": 0.007, "step": 9312 }, { "epoch": 97.0, - "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9175257731958762, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9216867469879518, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9129129129129129, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9216867469879518, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9101796407185628, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.060069117695093155, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.946583850931677, - "eval_overall_precision": 0.943069306930693, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6293, - "eval_samples_per_second": 270.143, - "eval_steps_per_second": 4.767, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.069269560277462, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9356435643564357, + "eval_overall_precision": 0.9287469287469288, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2886, + "eval_samples_per_second": 589.08, + "eval_steps_per_second": 10.396, "step": 9312 }, { "epoch": 98.0, - "grad_norm": 0.3417333662509918, + "grad_norm": 0.1880914568901062, "learning_rate": 1.0000000000000002e-06, - "loss": 0.0065, + "loss": 0.0062, "step": 9408 }, { "epoch": 98.0, - "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9175257731958762, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9216867469879518, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9216867469879518, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9156626506024096, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.06064934283494949, - "eval_overall_accuracy": 0.988477366255144, - "eval_overall_f1": 0.946583850931677, - "eval_overall_precision": 0.943069306930693, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6301, - "eval_samples_per_second": 269.806, - "eval_steps_per_second": 4.761, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06927915662527084, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9368029739776952, + "eval_overall_precision": 0.9310344827586207, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2886, + "eval_samples_per_second": 589.05, + "eval_steps_per_second": 10.395, "step": 9408 }, { "epoch": 99.0, - "grad_norm": 0.2305583655834198, + "grad_norm": 0.29024115204811096, "learning_rate": 5.000000000000001e-07, - "loss": 0.0075, + "loss": 0.0062, "step": 9504 }, { "epoch": 99.0, - "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9175257731958762, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9189189189189191, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9161676646706587, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9156626506024096, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.059974148869514465, - "eval_overall_accuracy": 0.9882030178326474, - "eval_overall_f1": 0.9454094292803972, - "eval_overall_precision": 0.9407407407407408, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6422, - "eval_samples_per_second": 264.724, - "eval_steps_per_second": 4.672, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06901175528764725, + "eval_overall_accuracy": 0.9873799725651577, + "eval_overall_f1": 0.9368029739776952, + "eval_overall_precision": 0.9310344827586207, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2901, + "eval_samples_per_second": 585.92, + "eval_steps_per_second": 10.34, "step": 9504 }, { "epoch": 100.0, - "grad_norm": 0.5298776030540466, + "grad_norm": 0.5472394824028015, "learning_rate": 0.0, - "loss": 0.0057, + "loss": 0.0068, "step": 9600 }, { "epoch": 100.0, - "eval_LOCATION_f1": 0.9368421052631579, + "eval_LOCATION_f1": 0.9166666666666667, "eval_LOCATION_number": 93, - "eval_LOCATION_precision": 0.9175257731958762, - "eval_LOCATION_recall": 0.956989247311828, - "eval_ORGANIZATION_f1": 0.9189189189189191, + "eval_LOCATION_precision": 0.8888888888888888, + "eval_LOCATION_recall": 0.946236559139785, + "eval_ORGANIZATION_f1": 0.9156626506024096, "eval_ORGANIZATION_number": 166, - "eval_ORGANIZATION_precision": 0.9161676646706587, - "eval_ORGANIZATION_recall": 0.9216867469879518, - "eval_PERSON_f1": 0.9823321554770318, + "eval_ORGANIZATION_precision": 0.9156626506024096, + "eval_ORGANIZATION_recall": 0.9156626506024096, + "eval_PERSON_f1": 0.9752650176678446, "eval_PERSON_number": 142, - "eval_PERSON_precision": 0.9858156028368794, - "eval_PERSON_recall": 0.9788732394366197, - "eval_loss": 0.05988968536257744, - "eval_overall_accuracy": 0.9882030178326474, - "eval_overall_f1": 0.9454094292803972, - "eval_overall_precision": 0.9407407407407408, - "eval_overall_recall": 0.9501246882793017, - "eval_runtime": 0.6423, - "eval_samples_per_second": 264.661, - "eval_steps_per_second": 4.67, + "eval_PERSON_precision": 0.9787234042553191, + "eval_PERSON_recall": 0.971830985915493, + "eval_loss": 0.06890885531902313, + "eval_overall_accuracy": 0.9876543209876543, + "eval_overall_f1": 0.9368029739776952, + "eval_overall_precision": 0.9310344827586207, + "eval_overall_recall": 0.942643391521197, + "eval_runtime": 0.2901, + "eval_samples_per_second": 586.027, + "eval_steps_per_second": 10.342, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 4208466347875278.0, - "train_loss": 0.037790342985341945, - "train_runtime": 1071.0881, - "train_samples_per_second": 142.939, - "train_steps_per_second": 8.963 + "train_loss": 0.03755435147012273, + "train_runtime": 554.4341, + "train_samples_per_second": 276.137, + "train_steps_per_second": 17.315 } ], "logging_steps": 500,