{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.3216755390167236, "learning_rate": 4.9500000000000004e-05, "loss": 0.834, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.021739130434782608, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.3333333333333333, "eval_ORGANIZATION_recall": 0.011235955056179775, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.519976794719696, "eval_overall_accuracy": 0.8437246963562753, "eval_overall_f1": 0.009999999999999998, "eval_overall_precision": 0.25, "eval_overall_recall": 0.00510204081632653, "eval_runtime": 0.2471, "eval_samples_per_second": 687.937, "eval_steps_per_second": 12.14, "step": 96 }, { "epoch": 2.0, "grad_norm": 0.9635790586471558, "learning_rate": 4.9e-05, "loss": 0.4501, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.296875, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.4523809523809524, "eval_LOCATION_recall": 0.22093023255813954, "eval_ORGANIZATION_f1": 0.4215686274509804, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.3739130434782609, "eval_ORGANIZATION_recall": 0.48314606741573035, "eval_PERSON_f1": 0.427299703264095, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.3444976076555024, "eval_PERSON_recall": 0.5625, "eval_loss": 0.30531638860702515, "eval_overall_accuracy": 0.9041835357624831, "eval_overall_f1": 0.4054982817869416, "eval_overall_precision": 0.367983367983368, "eval_overall_recall": 0.45153061224489793, "eval_runtime": 0.2475, "eval_samples_per_second": 686.861, "eval_steps_per_second": 12.121, "step": 192 }, { "epoch": 3.0, "grad_norm": 1.0305564403533936, "learning_rate": 4.85e-05, "loss": 0.309, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.5398773006134969, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.5714285714285714, "eval_LOCATION_recall": 0.5116279069767442, "eval_ORGANIZATION_f1": 0.6542553191489362, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.6212121212121212, "eval_ORGANIZATION_recall": 0.6910112359550562, "eval_PERSON_f1": 0.7342657342657343, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.6645569620253164, "eval_PERSON_recall": 0.8203125, "eval_loss": 0.21796205639839172, "eval_overall_accuracy": 0.9425101214574899, "eval_overall_f1": 0.6593939393939394, "eval_overall_precision": 0.628175519630485, "eval_overall_recall": 0.6938775510204082, "eval_runtime": 0.2476, "eval_samples_per_second": 686.487, "eval_steps_per_second": 12.114, "step": 288 }, { "epoch": 4.0, "grad_norm": 0.8546921610832214, "learning_rate": 4.8e-05, "loss": 0.2169, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.7065217391304347, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.6632653061224489, "eval_LOCATION_recall": 0.7558139534883721, "eval_ORGANIZATION_f1": 0.7457627118644068, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.75, "eval_ORGANIZATION_recall": 0.7415730337078652, "eval_PERSON_f1": 0.9022556390977444, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.8695652173913043, "eval_PERSON_recall": 0.9375, "eval_loss": 0.1386880725622177, "eval_overall_accuracy": 0.9624831309041836, "eval_overall_f1": 0.7885572139303483, "eval_overall_precision": 0.7694174757281553, "eval_overall_recall": 0.8086734693877551, "eval_runtime": 0.2495, "eval_samples_per_second": 681.233, "eval_steps_per_second": 12.022, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.6915740966796875, "learning_rate": 4.75e-05, "loss": 0.145, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.7807486631016043, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7227722772277227, "eval_LOCATION_recall": 0.8488372093023255, "eval_ORGANIZATION_f1": 0.7819148936170214, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.7424242424242424, "eval_ORGANIZATION_recall": 0.8258426966292135, "eval_PERSON_f1": 0.9389312977099238, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.917910447761194, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10832899063825607, "eval_overall_accuracy": 0.9670715249662618, "eval_overall_f1": 0.8315151515151514, "eval_overall_precision": 0.792147806004619, "eval_overall_recall": 0.875, "eval_runtime": 0.2512, "eval_samples_per_second": 676.782, "eval_steps_per_second": 11.943, "step": 480 }, { "epoch": 6.0, "grad_norm": 0.8912493586540222, "learning_rate": 4.7e-05, "loss": 0.1191, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.774869109947644, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7047619047619048, "eval_LOCATION_recall": 0.8604651162790697, "eval_ORGANIZATION_f1": 0.8225352112676056, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8248587570621468, "eval_ORGANIZATION_recall": 0.8202247191011236, "eval_PERSON_f1": 0.9461538461538462, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9318181818181818, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.09309709072113037, "eval_overall_accuracy": 0.970310391363023, "eval_overall_f1": 0.8511166253101737, "eval_overall_precision": 0.8285024154589372, "eval_overall_recall": 0.875, "eval_runtime": 0.2486, "eval_samples_per_second": 683.849, "eval_steps_per_second": 12.068, "step": 576 }, { "epoch": 7.0, "grad_norm": 1.663607120513916, "learning_rate": 4.6500000000000005e-05, "loss": 0.1063, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.7938144329896906, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7129629629629629, "eval_LOCATION_recall": 0.8953488372093024, "eval_ORGANIZATION_f1": 0.8228882833787466, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.798941798941799, "eval_ORGANIZATION_recall": 0.848314606741573, "eval_PERSON_f1": 0.9545454545454545, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9264705882352942, "eval_PERSON_recall": 0.984375, "eval_loss": 0.0903930515050888, "eval_overall_accuracy": 0.9697705802968961, "eval_overall_f1": 0.8581818181818182, "eval_overall_precision": 0.8175519630484989, "eval_overall_recall": 0.9030612244897959, "eval_runtime": 0.2477, "eval_samples_per_second": 686.203, "eval_steps_per_second": 12.109, "step": 672 }, { "epoch": 8.0, "grad_norm": 0.6487865447998047, "learning_rate": 4.600000000000001e-05, "loss": 0.0947, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.823529411764706, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7623762376237624, "eval_LOCATION_recall": 0.8953488372093024, "eval_ORGANIZATION_f1": 0.8333333333333334, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8241758241758241, "eval_ORGANIZATION_recall": 0.8426966292134831, "eval_PERSON_f1": 0.9571984435797667, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9534883720930233, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.0733816996216774, "eval_overall_accuracy": 0.9767881241565453, "eval_overall_f1": 0.8706467661691543, "eval_overall_precision": 0.8495145631067961, "eval_overall_recall": 0.8928571428571429, "eval_runtime": 0.2484, "eval_samples_per_second": 684.488, "eval_steps_per_second": 12.079, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.4733569920063019, "learning_rate": 4.55e-05, "loss": 0.085, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.8478260869565216, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7959183673469388, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8657534246575344, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8449197860962567, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9575289575289575, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9465648854961832, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06920091062784195, "eval_overall_accuracy": 0.9789473684210527, "eval_overall_f1": 0.8910891089108911, "eval_overall_precision": 0.8653846153846154, "eval_overall_recall": 0.9183673469387755, "eval_runtime": 0.2503, "eval_samples_per_second": 679.314, "eval_steps_per_second": 11.988, "step": 864 }, { "epoch": 10.0, "grad_norm": 1.6128334999084473, "learning_rate": 4.5e-05, "loss": 0.082, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.8478260869565216, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7959183673469388, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8698060941828255, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8579234972677595, "eval_ORGANIZATION_recall": 0.8820224719101124, "eval_PERSON_f1": 0.9689922480620154, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9615384615384616, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06693387776613235, "eval_overall_accuracy": 0.9789473684210527, "eval_overall_f1": 0.8966376089663761, "eval_overall_precision": 0.8759124087591241, "eval_overall_recall": 0.9183673469387755, "eval_runtime": 0.244, "eval_samples_per_second": 696.829, "eval_steps_per_second": 12.297, "step": 960 }, { "epoch": 11.0, "grad_norm": 0.9450006484985352, "learning_rate": 4.4500000000000004e-05, "loss": 0.0769, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.8756756756756757, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8181818181818182, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.8736263736263736, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8548387096774194, "eval_ORGANIZATION_recall": 0.8932584269662921, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06316138803958893, "eval_overall_accuracy": 0.9802968960863697, "eval_overall_f1": 0.9032258064516129, "eval_overall_precision": 0.8792270531400966, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2433, "eval_samples_per_second": 698.829, "eval_steps_per_second": 12.332, "step": 1056 }, { "epoch": 12.0, "grad_norm": 1.495387315750122, "learning_rate": 4.4000000000000006e-05, "loss": 0.0749, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.8839779005524862, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8421052631578947, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.893150684931507, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8716577540106952, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.060925450176000595, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.916562889165629, "eval_overall_precision": 0.8953771289537713, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2418, "eval_samples_per_second": 703.179, "eval_steps_per_second": 12.409, "step": 1152 }, { "epoch": 13.0, "grad_norm": 1.075753092765808, "learning_rate": 4.35e-05, "loss": 0.0648, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.8729281767955801, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8315789473684211, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.8833333333333334, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8736263736263736, "eval_ORGANIZATION_recall": 0.8932584269662921, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05693093314766884, "eval_overall_accuracy": 0.9811066126855601, "eval_overall_f1": 0.9072681704260652, "eval_overall_precision": 0.8916256157635468, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2491, "eval_samples_per_second": 682.437, "eval_steps_per_second": 12.043, "step": 1248 }, { "epoch": 14.0, "grad_norm": 0.4505135715007782, "learning_rate": 4.3e-05, "loss": 0.0639, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.861878453038674, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8210526315789474, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8857142857142857, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9011627906976745, "eval_ORGANIZATION_recall": 0.8707865168539326, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.059763651341199875, "eval_overall_accuracy": 0.9811066126855601, "eval_overall_f1": 0.9060913705583755, "eval_overall_precision": 0.9015151515151515, "eval_overall_recall": 0.9107142857142857, "eval_runtime": 0.2481, "eval_samples_per_second": 685.305, "eval_steps_per_second": 12.094, "step": 1344 }, { "epoch": 15.0, "grad_norm": 0.8750821352005005, "learning_rate": 4.25e-05, "loss": 0.0627, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.88268156424581, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8494623655913979, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.8925619834710743, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8756756756756757, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.053729888051748276, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.918444165621079, "eval_overall_precision": 0.9037037037037037, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2483, "eval_samples_per_second": 684.761, "eval_steps_per_second": 12.084, "step": 1440 }, { "epoch": 16.0, "grad_norm": 0.6922814846038818, "learning_rate": 4.2e-05, "loss": 0.0567, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.8743169398907104, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8247422680412371, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.8846153846153846, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8655913978494624, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9689922480620154, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9615384615384616, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05485454946756363, "eval_overall_accuracy": 0.9792172739541161, "eval_overall_f1": 0.9093167701863355, "eval_overall_precision": 0.8861985472154964, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2497, "eval_samples_per_second": 680.691, "eval_steps_per_second": 12.012, "step": 1536 }, { "epoch": 17.0, "grad_norm": 0.5154015421867371, "learning_rate": 4.15e-05, "loss": 0.0546, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.8925619834710743, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8756756756756757, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05112185701727867, "eval_overall_accuracy": 0.982995951417004, "eval_overall_f1": 0.922110552763819, "eval_overall_precision": 0.9084158415841584, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2489, "eval_samples_per_second": 683.073, "eval_steps_per_second": 12.054, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.3619915544986725, "learning_rate": 4.1e-05, "loss": 0.0527, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.8961748633879781, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.845360824742268, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.895184135977337, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9028571428571428, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05289569869637489, "eval_overall_accuracy": 0.9827260458839406, "eval_overall_f1": 0.9191919191919193, "eval_overall_precision": 0.91, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.242, "eval_samples_per_second": 702.406, "eval_steps_per_second": 12.395, "step": 1728 }, { "epoch": 19.0, "grad_norm": 0.6516693234443665, "learning_rate": 4.05e-05, "loss": 0.0492, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.893854748603352, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8602150537634409, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.8926553672316383, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8977272727272727, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.05347622185945511, "eval_overall_accuracy": 0.9821862348178138, "eval_overall_f1": 0.917617237008872, "eval_overall_precision": 0.9118387909319899, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2422, "eval_samples_per_second": 701.922, "eval_steps_per_second": 12.387, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.7471908330917358, "learning_rate": 4e-05, "loss": 0.0474, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.8999999999999999, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8617021276595744, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.913649025069638, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9060773480662984, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05013725161552429, "eval_overall_accuracy": 0.9827260458839406, "eval_overall_f1": 0.9319899244332494, "eval_overall_precision": 0.9203980099502488, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.243, "eval_samples_per_second": 699.537, "eval_steps_per_second": 12.345, "step": 1920 }, { "epoch": 21.0, "grad_norm": 1.2585930824279785, "learning_rate": 3.9500000000000005e-05, "loss": 0.0447, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.9180327868852459, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.865979381443299, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9044943820224719, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9044943820224719, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05279025062918663, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.9319899244332494, "eval_overall_precision": 0.9203980099502488, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2473, "eval_samples_per_second": 687.519, "eval_steps_per_second": 12.133, "step": 2016 }, { "epoch": 22.0, "grad_norm": 0.8391615748405457, "learning_rate": 3.9000000000000006e-05, "loss": 0.0453, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.8829787234042553, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8137254901960784, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9106145251396648, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9055555555555556, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05221620574593544, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.9263420724094882, "eval_overall_precision": 0.9070904645476773, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2477, "eval_samples_per_second": 686.445, "eval_steps_per_second": 12.114, "step": 2112 }, { "epoch": 23.0, "grad_norm": 0.5329034924507141, "learning_rate": 3.85e-05, "loss": 0.039, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.8972972972972972, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8383838383838383, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9121813031161473, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.92, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04938729107379913, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.929471032745592, "eval_overall_precision": 0.917910447761194, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2413, "eval_samples_per_second": 704.628, "eval_steps_per_second": 12.435, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.5503416657447815, "learning_rate": 3.8e-05, "loss": 0.0414, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.8924731182795699, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.83, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.906515580736544, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9142857142857143, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0536591075360775, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.9257861635220126, "eval_overall_precision": 0.913151364764268, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.248, "eval_samples_per_second": 685.464, "eval_steps_per_second": 12.096, "step": 2304 }, { "epoch": 25.0, "grad_norm": 0.6665748953819275, "learning_rate": 3.7500000000000003e-05, "loss": 0.038, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.9060773480662985, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8631578947368421, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.905982905982906, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9190751445086706, "eval_ORGANIZATION_recall": 0.8932584269662921, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05137418955564499, "eval_overall_accuracy": 0.9838056680161943, "eval_overall_f1": 0.9301143583227446, "eval_overall_precision": 0.9265822784810127, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.248, "eval_samples_per_second": 685.611, "eval_steps_per_second": 12.099, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.4098527729511261, "learning_rate": 3.7e-05, "loss": 0.038, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9085714285714286, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9244186046511628, "eval_ORGANIZATION_recall": 0.8932584269662921, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0504906140267849, "eval_overall_accuracy": 0.9832658569500675, "eval_overall_f1": 0.934010152284264, "eval_overall_precision": 0.9292929292929293, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2482, "eval_samples_per_second": 685.063, "eval_steps_per_second": 12.089, "step": 2496 }, { "epoch": 27.0, "grad_norm": 0.31108349561691284, "learning_rate": 3.65e-05, "loss": 0.037, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.9021739130434784, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8469387755102041, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9090909090909091, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9195402298850575, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05084557086229324, "eval_overall_accuracy": 0.9824561403508771, "eval_overall_f1": 0.9304677623261693, "eval_overall_precision": 0.9223057644110275, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2421, "eval_samples_per_second": 702.05, "eval_steps_per_second": 12.389, "step": 2592 }, { "epoch": 28.0, "grad_norm": 1.1318433284759521, "learning_rate": 3.6e-05, "loss": 0.0375, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04823584854602814, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.935687263556116, "eval_overall_precision": 0.9251870324189526, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2486, "eval_samples_per_second": 683.7, "eval_steps_per_second": 12.065, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.8521238565444946, "learning_rate": 3.55e-05, "loss": 0.0342, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.9222222222222223, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8829787234042553, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9070422535211267, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9096045197740112, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04844392463564873, "eval_overall_accuracy": 0.9832658569500675, "eval_overall_f1": 0.9329962073324906, "eval_overall_precision": 0.924812030075188, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2486, "eval_samples_per_second": 683.711, "eval_steps_per_second": 12.065, "step": 2784 }, { "epoch": 30.0, "grad_norm": 0.2059481143951416, "learning_rate": 3.5e-05, "loss": 0.0314, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.907103825136612, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8556701030927835, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9060773480662982, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8913043478260869, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.980544747081712, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9767441860465116, "eval_PERSON_recall": 0.984375, "eval_loss": 0.048888128250837326, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9301745635910225, "eval_overall_precision": 0.9097560975609756, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2412, "eval_samples_per_second": 704.721, "eval_steps_per_second": 12.436, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.7870827317237854, "learning_rate": 3.45e-05, "loss": 0.0308, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.9180327868852459, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.865979381443299, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.895184135977337, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9028571428571428, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.052150849252939224, "eval_overall_accuracy": 0.9824561403508771, "eval_overall_f1": 0.9267676767676768, "eval_overall_precision": 0.9175, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2424, "eval_samples_per_second": 701.228, "eval_steps_per_second": 12.375, "step": 2976 }, { "epoch": 32.0, "grad_norm": 0.24624638259410858, "learning_rate": 3.4000000000000007e-05, "loss": 0.0284, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.906515580736544, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9142857142857143, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0507497675716877, "eval_overall_accuracy": 0.9832658569500675, "eval_overall_f1": 0.9329962073324906, "eval_overall_precision": 0.924812030075188, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2466, "eval_samples_per_second": 689.376, "eval_steps_per_second": 12.165, "step": 3072 }, { "epoch": 33.0, "grad_norm": 0.924070417881012, "learning_rate": 3.35e-05, "loss": 0.029, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.908108108108108, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8484848484848485, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9002849002849002, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9132947976878613, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05186566710472107, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.9267676767676768, "eval_overall_precision": 0.9175, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2479, "eval_samples_per_second": 685.778, "eval_steps_per_second": 12.102, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.6430802345275879, "learning_rate": 3.3e-05, "loss": 0.0273, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.9180327868852459, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.865979381443299, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9121813031161473, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.92, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05143400654196739, "eval_overall_accuracy": 0.9827260458839406, "eval_overall_f1": 0.9343434343434343, "eval_overall_precision": 0.925, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2483, "eval_samples_per_second": 684.535, "eval_steps_per_second": 12.08, "step": 3264 }, { "epoch": 35.0, "grad_norm": 0.4695442020893097, "learning_rate": 3.2500000000000004e-05, "loss": 0.0273, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.9180327868852459, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.865979381443299, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9002849002849002, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9132947976878613, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05347920209169388, "eval_overall_accuracy": 0.9827260458839406, "eval_overall_f1": 0.929113924050633, "eval_overall_precision": 0.9221105527638191, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2467, "eval_samples_per_second": 689.161, "eval_steps_per_second": 12.162, "step": 3360 }, { "epoch": 36.0, "grad_norm": 0.5556744933128357, "learning_rate": 3.2000000000000005e-05, "loss": 0.0275, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.8972972972972972, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8383838383838383, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.8969359331476323, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8895027624309392, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9689922480620154, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9615384615384616, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05185906961560249, "eval_overall_accuracy": 0.9813765182186235, "eval_overall_f1": 0.9201995012468829, "eval_overall_precision": 0.9, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2496, "eval_samples_per_second": 681.016, "eval_steps_per_second": 12.018, "step": 3456 }, { "epoch": 37.0, "grad_norm": 1.0498524904251099, "learning_rate": 3.15e-05, "loss": 0.0244, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.8901098901098902, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.84375, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.898876404494382, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.898876404494382, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05113949626684189, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.9219143576826196, "eval_overall_precision": 0.9104477611940298, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2473, "eval_samples_per_second": 687.363, "eval_steps_per_second": 12.13, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.8411348462104797, "learning_rate": 3.1e-05, "loss": 0.0272, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.9180327868852459, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.865979381443299, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.895184135977337, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9028571428571428, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05628972873091698, "eval_overall_accuracy": 0.9827260458839406, "eval_overall_f1": 0.9267676767676768, "eval_overall_precision": 0.9175, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2479, "eval_samples_per_second": 685.727, "eval_steps_per_second": 12.101, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.41838157176971436, "learning_rate": 3.05e-05, "loss": 0.028, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.051703643053770065, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.9345088161209069, "eval_overall_precision": 0.9228855721393034, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2477, "eval_samples_per_second": 686.448, "eval_steps_per_second": 12.114, "step": 3744 }, { "epoch": 40.0, "grad_norm": 0.9456387758255005, "learning_rate": 3e-05, "loss": 0.0236, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.9111111111111112, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8723404255319149, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9106145251396648, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9055555555555556, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05246607959270477, "eval_overall_accuracy": 0.9832658569500675, "eval_overall_f1": 0.9319899244332494, "eval_overall_precision": 0.9203980099502488, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2488, "eval_samples_per_second": 683.399, "eval_steps_per_second": 12.06, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.31706151366233826, "learning_rate": 2.95e-05, "loss": 0.0223, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.054678838700056076, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.935361216730038, "eval_overall_precision": 0.929471032745592, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2485, "eval_samples_per_second": 684.014, "eval_steps_per_second": 12.071, "step": 3936 }, { "epoch": 42.0, "grad_norm": 0.6095432639122009, "learning_rate": 2.9e-05, "loss": 0.0228, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.9101123595505618, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8804347826086957, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9111111111111112, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9010989010989011, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05048713833093643, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9319899244332494, "eval_overall_precision": 0.9203980099502488, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2475, "eval_samples_per_second": 686.767, "eval_steps_per_second": 12.119, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.06603565812110901, "learning_rate": 2.8499999999999998e-05, "loss": 0.0233, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.9120879120879122, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8645833333333334, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.913649025069638, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9060773480662984, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0556485578417778, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.9335006273525721, "eval_overall_precision": 0.9185185185185185, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2475, "eval_samples_per_second": 686.995, "eval_steps_per_second": 12.123, "step": 4128 }, { "epoch": 44.0, "grad_norm": 0.4065522253513336, "learning_rate": 2.8000000000000003e-05, "loss": 0.0219, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.9130434782608695, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8571428571428571, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9035812672176309, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8864864864864865, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0528954342007637, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.927860696517413, "eval_overall_precision": 0.9053398058252428, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.249, "eval_samples_per_second": 682.73, "eval_steps_per_second": 12.048, "step": 4224 }, { "epoch": 45.0, "grad_norm": 0.8652843832969666, "learning_rate": 2.7500000000000004e-05, "loss": 0.0194, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.9180327868852459, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.865979381443299, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9090909090909091, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9195402298850575, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05724570155143738, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.9329962073324906, "eval_overall_precision": 0.924812030075188, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.248, "eval_samples_per_second": 685.477, "eval_steps_per_second": 12.097, "step": 4320 }, { "epoch": 46.0, "grad_norm": 0.5592328906059265, "learning_rate": 2.7000000000000002e-05, "loss": 0.0212, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.8961748633879781, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.845360824742268, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9090909090909092, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8918918918918919, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9689922480620154, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9615384615384616, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05504947155714035, "eval_overall_accuracy": 0.9827260458839406, "eval_overall_f1": 0.9253731343283583, "eval_overall_precision": 0.9029126213592233, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2477, "eval_samples_per_second": 686.443, "eval_steps_per_second": 12.114, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.38780418038368225, "learning_rate": 2.6500000000000004e-05, "loss": 0.019, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.888888888888889, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.851063829787234, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9116022099447514, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8967391304347826, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.051470689475536346, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9261576971214017, "eval_overall_precision": 0.9090909090909091, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2483, "eval_samples_per_second": 684.586, "eval_steps_per_second": 12.081, "step": 4512 }, { "epoch": 48.0, "grad_norm": 0.4960862994194031, "learning_rate": 2.6000000000000002e-05, "loss": 0.0182, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.9060773480662985, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8631578947368421, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9080779944289694, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9005524861878453, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05240018665790558, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9296482412060302, "eval_overall_precision": 0.9158415841584159, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2469, "eval_samples_per_second": 688.445, "eval_steps_per_second": 12.149, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.2815532386302948, "learning_rate": 2.5500000000000003e-05, "loss": 0.0185, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.9222222222222223, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8829787234042553, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9096045197740114, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9147727272727273, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.052264876663684845, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9341772151898734, "eval_overall_precision": 0.9271356783919598, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.241, "eval_samples_per_second": 705.288, "eval_steps_per_second": 12.446, "step": 4704 }, { "epoch": 50.0, "grad_norm": 0.5738477110862732, "learning_rate": 2.5e-05, "loss": 0.0181, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.923076923076923, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9050279329608939, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.053917378187179565, "eval_overall_accuracy": 0.9838056680161943, "eval_overall_f1": 0.9321608040201005, "eval_overall_precision": 0.9183168316831684, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2469, "eval_samples_per_second": 688.451, "eval_steps_per_second": 12.149, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.5479133129119873, "learning_rate": 2.45e-05, "loss": 0.0181, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9152542372881356, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9204545454545454, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05642145872116089, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9378960709759189, "eval_overall_precision": 0.9319899244332494, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2486, "eval_samples_per_second": 683.917, "eval_steps_per_second": 12.069, "step": 4896 }, { "epoch": 52.0, "grad_norm": 0.19654129445552826, "learning_rate": 2.4e-05, "loss": 0.0177, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.9222222222222223, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8829787234042553, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.058501098304986954, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.9404309252217997, "eval_overall_precision": 0.9345088161209067, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2483, "eval_samples_per_second": 684.607, "eval_steps_per_second": 12.081, "step": 4992 }, { "epoch": 53.0, "grad_norm": 1.0167557001113892, "learning_rate": 2.35e-05, "loss": 0.0168, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.8791208791208791, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8333333333333334, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9095890410958904, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8877005347593583, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.054052095860242844, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.9228855721393036, "eval_overall_precision": 0.9004854368932039, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.247, "eval_samples_per_second": 688.128, "eval_steps_per_second": 12.143, "step": 5088 }, { "epoch": 54.0, "grad_norm": 1.0977023839950562, "learning_rate": 2.3000000000000003e-05, "loss": 0.0172, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.915068493150685, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.893048128342246, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.051402755081653595, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9300000000000002, "eval_overall_precision": 0.9117647058823529, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2481, "eval_samples_per_second": 685.29, "eval_steps_per_second": 12.093, "step": 5184 }, { "epoch": 55.0, "grad_norm": 0.5542155504226685, "learning_rate": 2.25e-05, "loss": 0.0165, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9162011173184358, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9111111111111111, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05044760927557945, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9405815423514539, "eval_overall_precision": 0.9323308270676691, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2414, "eval_samples_per_second": 704.273, "eval_steps_per_second": 12.428, "step": 5280 }, { "epoch": 56.0, "grad_norm": 0.8049263954162598, "learning_rate": 2.2000000000000003e-05, "loss": 0.017, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9222222222222223, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8829787234042553, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9256198347107438, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9081081081081082, "eval_ORGANIZATION_recall": 0.9438202247191011, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.051421601325273514, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9411764705882352, "eval_overall_precision": 0.9238329238329238, "eval_overall_recall": 0.9591836734693877, "eval_runtime": 0.2479, "eval_samples_per_second": 685.841, "eval_steps_per_second": 12.103, "step": 5376 }, { "epoch": 57.0, "grad_norm": 0.5076385736465454, "learning_rate": 2.15e-05, "loss": 0.0153, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.9171270718232045, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8736842105263158, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.901639344262295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8776595744680851, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05126916617155075, "eval_overall_accuracy": 0.9865047233468286, "eval_overall_f1": 0.9290161892901618, "eval_overall_precision": 0.9075425790754258, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2476, "eval_samples_per_second": 686.651, "eval_steps_per_second": 12.117, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.2657437324523926, "learning_rate": 2.1e-05, "loss": 0.0157, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.9333333333333332, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8936170212765957, "eval_LOCATION_recall": 0.9767441860465116, "eval_ORGANIZATION_f1": 0.9090909090909091, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9195402298850575, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05684962496161461, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2471, "eval_samples_per_second": 688.109, "eval_steps_per_second": 12.143, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.2525281310081482, "learning_rate": 2.05e-05, "loss": 0.0139, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.8994413407821229, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8944444444444445, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05300280079245567, "eval_overall_accuracy": 0.9838056680161943, "eval_overall_f1": 0.9302915082382762, "eval_overall_precision": 0.924433249370277, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2478, "eval_samples_per_second": 686.144, "eval_steps_per_second": 12.108, "step": 5664 }, { "epoch": 60.0, "grad_norm": 0.33274856209754944, "learning_rate": 2e-05, "loss": 0.0156, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.9120879120879122, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8645833333333334, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.8950276243093923, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8804347826086957, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05184905230998993, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9238451935081149, "eval_overall_precision": 0.9046454767726161, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2473, "eval_samples_per_second": 687.413, "eval_steps_per_second": 12.131, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.16549794375896454, "learning_rate": 1.9500000000000003e-05, "loss": 0.0134, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.9257142857142857, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9101123595505618, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9201101928374654, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9027027027027027, "eval_ORGANIZATION_recall": 0.9382022471910112, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0490739643573761, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9383647798742139, "eval_overall_precision": 0.9255583126550868, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.249, "eval_samples_per_second": 682.822, "eval_steps_per_second": 12.05, "step": 5856 }, { "epoch": 62.0, "grad_norm": 0.5557262897491455, "learning_rate": 1.9e-05, "loss": 0.0128, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.8986301369863013, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8770053475935828, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.051324497908353806, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9275, "eval_overall_precision": 0.9093137254901961, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2482, "eval_samples_per_second": 684.886, "eval_steps_per_second": 12.086, "step": 5952 }, { "epoch": 63.0, "grad_norm": 1.3744993209838867, "learning_rate": 1.85e-05, "loss": 0.0126, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.9378531073446328, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9120879120879121, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9090909090909092, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8918918918918919, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05115790665149689, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9360100376411542, "eval_overall_precision": 0.9209876543209876, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2472, "eval_samples_per_second": 687.609, "eval_steps_per_second": 12.134, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.0732036605477333, "learning_rate": 1.8e-05, "loss": 0.0134, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.9265536723163842, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9010989010989011, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9055555555555556, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8956043956043956, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05195976048707962, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9331651954602775, "eval_overall_precision": 0.9226932668329177, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2481, "eval_samples_per_second": 685.08, "eval_steps_per_second": 12.09, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.16457298398017883, "learning_rate": 1.75e-05, "loss": 0.0128, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9046321525885558, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8783068783068783, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04940816015005112, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9326683291770573, "eval_overall_precision": 0.9121951219512195, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.248, "eval_samples_per_second": 685.437, "eval_steps_per_second": 12.096, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.8374859690666199, "learning_rate": 1.7000000000000003e-05, "loss": 0.0119, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.9222222222222223, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8829787234042553, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.8980716253443526, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8810810810810811, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05106525495648384, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9275, "eval_overall_precision": 0.9093137254901961, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2476, "eval_samples_per_second": 686.465, "eval_steps_per_second": 12.114, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.22659751772880554, "learning_rate": 1.65e-05, "loss": 0.012, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.9222222222222223, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8829787234042553, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9121813031161473, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.92, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05494025722146034, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.935361216730038, "eval_overall_precision": 0.929471032745592, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2482, "eval_samples_per_second": 684.843, "eval_steps_per_second": 12.085, "step": 6432 }, { "epoch": 68.0, "grad_norm": 0.7559866905212402, "learning_rate": 1.6000000000000003e-05, "loss": 0.0124, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9035812672176309, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8864864864864865, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.052313096821308136, "eval_overall_accuracy": 0.9838056680161943, "eval_overall_f1": 0.9298245614035087, "eval_overall_precision": 0.9137931034482759, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2481, "eval_samples_per_second": 685.296, "eval_steps_per_second": 12.093, "step": 6528 }, { "epoch": 69.0, "grad_norm": 0.5614367723464966, "learning_rate": 1.55e-05, "loss": 0.0139, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.9162011173184358, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8817204301075269, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9070422535211267, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9096045197740112, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.055041369050741196, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9304677623261693, "eval_overall_precision": 0.9223057644110275, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2439, "eval_samples_per_second": 696.924, "eval_steps_per_second": 12.299, "step": 6624 }, { "epoch": 70.0, "grad_norm": 0.11021454632282257, "learning_rate": 1.5e-05, "loss": 0.0121, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9065934065934066, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8870967741935484, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05283116549253464, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9336670838548186, "eval_overall_precision": 0.9164619164619164, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2468, "eval_samples_per_second": 688.687, "eval_steps_per_second": 12.153, "step": 6720 }, { "epoch": 71.0, "grad_norm": 0.6227138638496399, "learning_rate": 1.45e-05, "loss": 0.0113, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.8991825613079019, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.873015873015873, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04995880275964737, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9263420724094882, "eval_overall_precision": 0.9070904645476773, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2476, "eval_samples_per_second": 686.613, "eval_steps_per_second": 12.117, "step": 6816 }, { "epoch": 72.0, "grad_norm": 0.25081753730773926, "learning_rate": 1.4000000000000001e-05, "loss": 0.0114, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.9101123595505618, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8804347826086957, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.8980716253443526, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8810810810810811, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05604882910847664, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.924812030075188, "eval_overall_precision": 0.9088669950738916, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2479, "eval_samples_per_second": 685.886, "eval_steps_per_second": 12.104, "step": 6912 }, { "epoch": 73.0, "grad_norm": 0.3885822296142578, "learning_rate": 1.3500000000000001e-05, "loss": 0.01, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.9171270718232045, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8736842105263158, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9065934065934066, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8870967741935484, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05322219058871269, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9301745635910225, "eval_overall_precision": 0.9097560975609756, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2471, "eval_samples_per_second": 687.924, "eval_steps_per_second": 12.14, "step": 7008 }, { "epoch": 74.0, "grad_norm": 0.020958004519343376, "learning_rate": 1.3000000000000001e-05, "loss": 0.0138, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.901098901098901, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8817204301075269, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.052465103566646576, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9300000000000002, "eval_overall_precision": 0.9117647058823529, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2474, "eval_samples_per_second": 687.252, "eval_steps_per_second": 12.128, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.15891754627227783, "learning_rate": 1.25e-05, "loss": 0.0108, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9120879120879122, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8645833333333334, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9014084507042253, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.903954802259887, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0572768896818161, "eval_overall_accuracy": 0.9827260458839406, "eval_overall_f1": 0.9269521410579346, "eval_overall_precision": 0.9154228855721394, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2477, "eval_samples_per_second": 686.447, "eval_steps_per_second": 12.114, "step": 7200 }, { "epoch": 76.0, "grad_norm": 0.42831161618232727, "learning_rate": 1.2e-05, "loss": 0.0113, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.902506963788301, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8950276243093923, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.054985810071229935, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9319899244332494, "eval_overall_precision": 0.9203980099502488, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2472, "eval_samples_per_second": 687.566, "eval_steps_per_second": 12.134, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.14717106521129608, "learning_rate": 1.1500000000000002e-05, "loss": 0.0111, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.9378531073446328, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9120879120879121, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.056410305202007294, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9404309252217997, "eval_overall_precision": 0.9345088161209067, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2478, "eval_samples_per_second": 686.043, "eval_steps_per_second": 12.107, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.7005665898323059, "learning_rate": 1.1000000000000001e-05, "loss": 0.0114, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9171270718232045, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8736842105263158, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.902506963788301, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8950276243093923, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05650125443935394, "eval_overall_accuracy": 0.9832658569500675, "eval_overall_f1": 0.9296482412060302, "eval_overall_precision": 0.9158415841584159, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2411, "eval_samples_per_second": 705.234, "eval_steps_per_second": 12.445, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.07694126665592194, "learning_rate": 1.05e-05, "loss": 0.0101, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9162011173184358, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9111111111111111, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05472679063677788, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9382093316519546, "eval_overall_precision": 0.9276807980049875, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2491, "eval_samples_per_second": 682.486, "eval_steps_per_second": 12.044, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.6124340295791626, "learning_rate": 1e-05, "loss": 0.0108, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9035812672176309, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8864864864864865, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05269638076424599, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9323308270676692, "eval_overall_precision": 0.916256157635468, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2488, "eval_samples_per_second": 683.407, "eval_steps_per_second": 12.06, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.09941994398832321, "learning_rate": 9.5e-06, "loss": 0.0108, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9095890410958904, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8877005347593583, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05239494517445564, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.933832709113608, "eval_overall_precision": 0.9144254278728606, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2481, "eval_samples_per_second": 685.107, "eval_steps_per_second": 12.09, "step": 7776 }, { "epoch": 82.0, "grad_norm": 0.25073766708374023, "learning_rate": 9e-06, "loss": 0.0108, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.9222222222222223, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8829787234042553, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.8999999999999999, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8901098901098901, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05534841865301132, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9296482412060302, "eval_overall_precision": 0.9158415841584159, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2416, "eval_samples_per_second": 703.691, "eval_steps_per_second": 12.418, "step": 7872 }, { "epoch": 83.0, "grad_norm": 0.9179547429084778, "learning_rate": 8.500000000000002e-06, "loss": 0.0109, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9030470914127424, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8907103825136612, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0529344268143177, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9309912170639899, "eval_overall_precision": 0.9160493827160494, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.251, "eval_samples_per_second": 677.348, "eval_steps_per_second": 11.953, "step": 7968 }, { "epoch": 84.0, "grad_norm": 0.5180923938751221, "learning_rate": 8.000000000000001e-06, "loss": 0.0095, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9050279329608939, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05454019829630852, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9331651954602775, "eval_overall_precision": 0.9226932668329177, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.249, "eval_samples_per_second": 682.731, "eval_steps_per_second": 12.048, "step": 8064 }, { "epoch": 85.0, "grad_norm": 0.7953592538833618, "learning_rate": 7.5e-06, "loss": 0.0096, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05559179559350014, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9367088607594937, "eval_overall_precision": 0.9296482412060302, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2483, "eval_samples_per_second": 684.666, "eval_steps_per_second": 12.082, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.07799229770898819, "learning_rate": 7.000000000000001e-06, "loss": 0.0098, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9171270718232043, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9021739130434783, "eval_ORGANIZATION_recall": 0.9325842696629213, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05340668186545372, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9385194479297364, "eval_overall_precision": 0.9234567901234568, "eval_overall_recall": 0.9540816326530612, "eval_runtime": 0.2486, "eval_samples_per_second": 683.761, "eval_steps_per_second": 12.066, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.04336532950401306, "learning_rate": 6.5000000000000004e-06, "loss": 0.0103, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9378531073446328, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9120879120879121, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9106145251396648, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9055555555555556, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05287463217973709, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9368686868686869, "eval_overall_precision": 0.9275, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2473, "eval_samples_per_second": 687.307, "eval_steps_per_second": 12.129, "step": 8352 }, { "epoch": 88.0, "grad_norm": 1.0946106910705566, "learning_rate": 6e-06, "loss": 0.0113, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.9273743016759777, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8924731182795699, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9141274238227147, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9016393442622951, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05374481528997421, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9371859296482412, "eval_overall_precision": 0.9232673267326733, "eval_overall_recall": 0.951530612244898, "eval_runtime": 0.2475, "eval_samples_per_second": 686.787, "eval_steps_per_second": 12.12, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.055148638784885406, "learning_rate": 5.500000000000001e-06, "loss": 0.0103, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9060773480662982, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8913043478260869, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05380658432841301, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9309912170639899, "eval_overall_precision": 0.9160493827160494, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2484, "eval_samples_per_second": 684.349, "eval_steps_per_second": 12.077, "step": 8544 }, { "epoch": 90.0, "grad_norm": 0.522067666053772, "learning_rate": 5e-06, "loss": 0.0101, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.9213483146067417, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8913043478260869, "eval_LOCATION_recall": 0.9534883720930233, "eval_ORGANIZATION_f1": 0.9035812672176309, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8864864864864865, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05221777409315109, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9298245614035087, "eval_overall_precision": 0.9137931034482759, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2491, "eval_samples_per_second": 682.544, "eval_steps_per_second": 12.045, "step": 8640 }, { "epoch": 91.0, "grad_norm": 0.78941410779953, "learning_rate": 4.5e-06, "loss": 0.0089, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9005524861878453, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8858695652173914, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.052769750356674194, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9309912170639899, "eval_overall_precision": 0.9160493827160494, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2473, "eval_samples_per_second": 687.479, "eval_steps_per_second": 12.132, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.3154488503932953, "learning_rate": 4.000000000000001e-06, "loss": 0.0088, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9055555555555556, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8956043956043956, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05468715354800224, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9333333333333335, "eval_overall_precision": 0.9205955334987593, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2486, "eval_samples_per_second": 683.728, "eval_steps_per_second": 12.066, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.6052812933921814, "learning_rate": 3.5000000000000004e-06, "loss": 0.0099, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9030470914127424, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8907103825136612, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.054242152720689774, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9321608040201005, "eval_overall_precision": 0.9183168316831684, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2487, "eval_samples_per_second": 683.45, "eval_steps_per_second": 12.061, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.10974699258804321, "learning_rate": 3e-06, "loss": 0.0095, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9030470914127424, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8907103825136612, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05423642322421074, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9321608040201005, "eval_overall_precision": 0.9183168316831684, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2486, "eval_samples_per_second": 683.781, "eval_steps_per_second": 12.067, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.3483632206916809, "learning_rate": 2.5e-06, "loss": 0.0094, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9030470914127424, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8907103825136612, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05356050282716751, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9333333333333335, "eval_overall_precision": 0.9205955334987593, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2433, "eval_samples_per_second": 698.606, "eval_steps_per_second": 12.328, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.06140005588531494, "learning_rate": 2.0000000000000003e-06, "loss": 0.0093, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9060773480662982, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8913043478260869, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05313665792346001, "eval_overall_accuracy": 0.9862348178137652, "eval_overall_f1": 0.934673366834171, "eval_overall_precision": 0.9207920792079208, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.2485, "eval_samples_per_second": 684.112, "eval_steps_per_second": 12.073, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.0674736350774765, "learning_rate": 1.5e-06, "loss": 0.009, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9035812672176309, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8864864864864865, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.053574338555336, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9323308270676692, "eval_overall_precision": 0.916256157635468, "eval_overall_recall": 0.9489795918367347, "eval_runtime": 0.248, "eval_samples_per_second": 685.414, "eval_steps_per_second": 12.096, "step": 9312 }, { "epoch": 98.0, "grad_norm": 1.0472229719161987, "learning_rate": 1.0000000000000002e-06, "loss": 0.0099, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9055555555555556, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8956043956043956, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05379428341984749, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9345088161209069, "eval_overall_precision": 0.9228855721393034, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2472, "eval_samples_per_second": 687.708, "eval_steps_per_second": 12.136, "step": 9408 }, { "epoch": 99.0, "grad_norm": 0.5022507905960083, "learning_rate": 5.000000000000001e-07, "loss": 0.0089, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9378531073446328, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9120879120879121, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9080779944289694, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9005524861878453, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.054100390523672104, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9368686868686869, "eval_overall_precision": 0.9275, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.2473, "eval_samples_per_second": 687.373, "eval_steps_per_second": 12.13, "step": 9504 }, { "epoch": 100.0, "grad_norm": 0.782418429851532, "learning_rate": 0.0, "loss": 0.0107, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9325842696629213, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9021739130434783, "eval_LOCATION_recall": 0.9651162790697675, "eval_ORGANIZATION_f1": 0.9080779944289694, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9005524861878453, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05406568571925163, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.935687263556116, "eval_overall_precision": 0.9251870324189526, "eval_overall_recall": 0.9464285714285714, "eval_runtime": 0.248, "eval_samples_per_second": 685.425, "eval_steps_per_second": 12.096, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3908603344703268.0, "train_loss": 0.0451166748503844, "train_runtime": 562.8472, "train_samples_per_second": 272.01, "train_steps_per_second": 17.056 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 3908603344703268.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }