{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.5495089292526245, "learning_rate": 4.9500000000000004e-05, "loss": 0.8449, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.0, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.0, "eval_ORGANIZATION_recall": 0.0, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.5421205163002014, "eval_overall_accuracy": 0.8378900856117095, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, "eval_runtime": 0.242, "eval_samples_per_second": 702.487, "eval_steps_per_second": 12.397, "step": 96 }, { "epoch": 2.0, "grad_norm": 0.8255626559257507, "learning_rate": 4.9e-05, "loss": 0.4618, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.2302158273381295, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.4444444444444444, "eval_LOCATION_recall": 0.1553398058252427, "eval_ORGANIZATION_f1": 0.43544303797468353, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.38392857142857145, "eval_ORGANIZATION_recall": 0.5029239766081871, "eval_PERSON_f1": 0.4112676056338028, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.32589285714285715, "eval_PERSON_recall": 0.5572519083969466, "eval_loss": 0.3207398056983948, "eval_overall_accuracy": 0.8994752830709749, "eval_overall_f1": 0.39370078740157477, "eval_overall_precision": 0.3615702479338843, "eval_overall_recall": 0.43209876543209874, "eval_runtime": 0.2392, "eval_samples_per_second": 710.691, "eval_steps_per_second": 12.542, "step": 192 }, { "epoch": 3.0, "grad_norm": 0.9372028708457947, "learning_rate": 4.85e-05, "loss": 0.3133, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.4210526315789474, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.45977011494252873, "eval_LOCATION_recall": 0.3883495145631068, "eval_ORGANIZATION_f1": 0.6580976863753214, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.5871559633027523, "eval_ORGANIZATION_recall": 0.7485380116959064, "eval_PERSON_f1": 0.6644518272425249, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.5882352941176471, "eval_PERSON_recall": 0.7633587786259542, "eval_loss": 0.23170873522758484, "eval_overall_accuracy": 0.9362054681027341, "eval_overall_f1": 0.609090909090909, "eval_overall_precision": 0.5642105263157895, "eval_overall_recall": 0.6617283950617284, "eval_runtime": 0.2381, "eval_samples_per_second": 714.088, "eval_steps_per_second": 12.602, "step": 288 }, { "epoch": 4.0, "grad_norm": 1.0486959218978882, "learning_rate": 4.8e-05, "loss": 0.219, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.5971563981042655, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.5833333333333334, "eval_LOCATION_recall": 0.6116504854368932, "eval_ORGANIZATION_f1": 0.7717391304347826, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7208121827411168, "eval_ORGANIZATION_recall": 0.8304093567251462, "eval_PERSON_f1": 0.8920863309352518, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.8435374149659864, "eval_PERSON_recall": 0.9465648854961832, "eval_loss": 0.14336846768856049, "eval_overall_accuracy": 0.9624413145539906, "eval_overall_f1": 0.7677946324387397, "eval_overall_precision": 0.7278761061946902, "eval_overall_recall": 0.8123456790123457, "eval_runtime": 0.2381, "eval_samples_per_second": 713.887, "eval_steps_per_second": 12.598, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.8694953918457031, "learning_rate": 4.75e-05, "loss": 0.1518, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.7741935483870966, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.7368421052631579, "eval_LOCATION_recall": 0.8155339805825242, "eval_ORGANIZATION_f1": 0.7953890489913543, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.7840909090909091, "eval_ORGANIZATION_recall": 0.8070175438596491, "eval_PERSON_f1": 0.9213483146067415, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9044117647058824, "eval_PERSON_recall": 0.9389312977099237, "eval_loss": 0.11247999221086502, "eval_overall_accuracy": 0.9698978182822425, "eval_overall_f1": 0.8303249097472925, "eval_overall_precision": 0.8098591549295775, "eval_overall_recall": 0.8518518518518519, "eval_runtime": 0.2391, "eval_samples_per_second": 711.13, "eval_steps_per_second": 12.549, "step": 480 }, { "epoch": 6.0, "grad_norm": 1.2186826467514038, "learning_rate": 4.7e-05, "loss": 0.1247, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.8165137614678899, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.7739130434782608, "eval_LOCATION_recall": 0.8640776699029126, "eval_ORGANIZATION_f1": 0.8396501457725948, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8372093023255814, "eval_ORGANIZATION_recall": 0.8421052631578947, "eval_PERSON_f1": 0.9473684210526315, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9333333333333333, "eval_PERSON_recall": 0.9618320610687023, "eval_loss": 0.08627010881900787, "eval_overall_accuracy": 0.9743164871582436, "eval_overall_f1": 0.86819830713422, "eval_overall_precision": 0.8507109004739336, "eval_overall_recall": 0.8864197530864197, "eval_runtime": 0.2391, "eval_samples_per_second": 711.127, "eval_steps_per_second": 12.549, "step": 576 }, { "epoch": 7.0, "grad_norm": 1.2285685539245605, "learning_rate": 4.6500000000000005e-05, "loss": 0.1071, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.861111111111111, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8230088495575221, "eval_LOCATION_recall": 0.9029126213592233, "eval_ORGANIZATION_f1": 0.8530259365994236, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8409090909090909, "eval_ORGANIZATION_recall": 0.8654970760233918, "eval_PERSON_f1": 0.9407407407407407, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9136690647482014, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.07877262681722641, "eval_overall_accuracy": 0.975973487986744, "eval_overall_f1": 0.8835534213685474, "eval_overall_precision": 0.8598130841121495, "eval_overall_recall": 0.908641975308642, "eval_runtime": 0.2377, "eval_samples_per_second": 715.052, "eval_steps_per_second": 12.619, "step": 672 }, { "epoch": 8.0, "grad_norm": 0.5959410071372986, "learning_rate": 4.600000000000001e-05, "loss": 0.0973, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.8545454545454545, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8034188034188035, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.8436578171091447, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8511904761904762, "eval_ORGANIZATION_recall": 0.8362573099415205, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.07309388369321823, "eval_overall_accuracy": 0.9781828224247445, "eval_overall_f1": 0.883495145631068, "eval_overall_precision": 0.8687350835322196, "eval_overall_recall": 0.8987654320987655, "eval_runtime": 0.2342, "eval_samples_per_second": 725.946, "eval_steps_per_second": 12.811, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.7997366189956665, "learning_rate": 4.55e-05, "loss": 0.0908, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8843930635838151, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8742857142857143, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.951310861423221, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9338235294117647, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.06480322033166885, "eval_overall_accuracy": 0.9803921568627451, "eval_overall_f1": 0.910411622276029, "eval_overall_precision": 0.8931116389548693, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2381, "eval_samples_per_second": 714.033, "eval_steps_per_second": 12.601, "step": 864 }, { "epoch": 10.0, "grad_norm": 0.7275916337966919, "learning_rate": 4.5e-05, "loss": 0.0806, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.9245283018867926, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8990825688073395, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8869565217391304, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8793103448275862, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.06184536591172218, "eval_overall_accuracy": 0.9814968240817453, "eval_overall_f1": 0.9197080291970804, "eval_overall_precision": 0.9064748201438849, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2406, "eval_samples_per_second": 706.576, "eval_steps_per_second": 12.469, "step": 960 }, { "epoch": 11.0, "grad_norm": 1.1911447048187256, "learning_rate": 4.4500000000000004e-05, "loss": 0.0744, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.9134615384615385, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9047619047619048, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8825214899713466, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8651685393258427, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.0592152439057827, "eval_overall_accuracy": 0.9820491576912456, "eval_overall_f1": 0.9159561510353228, "eval_overall_precision": 0.9038461538461539, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2395, "eval_samples_per_second": 709.946, "eval_steps_per_second": 12.528, "step": 1056 }, { "epoch": 12.0, "grad_norm": 0.7836827635765076, "learning_rate": 4.4000000000000006e-05, "loss": 0.0725, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.9333333333333335, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9158878504672897, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8914285714285715, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8715083798882681, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.05613638833165169, "eval_overall_accuracy": 0.9823253244959956, "eval_overall_f1": 0.9247572815533981, "eval_overall_precision": 0.9093078758949881, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2387, "eval_samples_per_second": 712.043, "eval_steps_per_second": 12.565, "step": 1152 }, { "epoch": 13.0, "grad_norm": 1.5938918590545654, "learning_rate": 4.35e-05, "loss": 0.0689, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.9230769230769231, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9142857142857143, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8967551622418879, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9047619047619048, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.05515846982598305, "eval_overall_accuracy": 0.9828776581054958, "eval_overall_f1": 0.9236453201970444, "eval_overall_precision": 0.9213759213759214, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2391, "eval_samples_per_second": 711.092, "eval_steps_per_second": 12.549, "step": 1248 }, { "epoch": 14.0, "grad_norm": 0.3897150754928589, "learning_rate": 4.3e-05, "loss": 0.0648, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.9014084507042254, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8727272727272727, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8693009118541033, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9050632911392406, "eval_ORGANIZATION_recall": 0.8362573099415205, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.05681250989437103, "eval_overall_accuracy": 0.9812206572769953, "eval_overall_f1": 0.9070631970260223, "eval_overall_precision": 0.9104477611940298, "eval_overall_recall": 0.9037037037037037, "eval_runtime": 0.2377, "eval_samples_per_second": 715.189, "eval_steps_per_second": 12.621, "step": 1344 }, { "epoch": 15.0, "grad_norm": 0.5173856616020203, "learning_rate": 4.25e-05, "loss": 0.0606, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.9320388349514563, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9320388349514563, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8999999999999999, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9053254437869822, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.047737739980220795, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.928395061728395, "eval_overall_precision": 0.928395061728395, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2389, "eval_samples_per_second": 711.648, "eval_steps_per_second": 12.558, "step": 1440 }, { "epoch": 16.0, "grad_norm": 1.0459837913513184, "learning_rate": 4.2e-05, "loss": 0.0559, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.9194312796208531, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8981481481481481, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8908554572271385, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8988095238095238, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04616360366344452, "eval_overall_accuracy": 0.983982325324496, "eval_overall_f1": 0.9213759213759213, "eval_overall_precision": 0.9168704156479217, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2396, "eval_samples_per_second": 709.397, "eval_steps_per_second": 12.519, "step": 1536 }, { "epoch": 17.0, "grad_norm": 0.5851144194602966, "learning_rate": 4.15e-05, "loss": 0.056, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.9306930693069307, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9494949494949495, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.9080459770114941, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8926553672316384, "eval_ORGANIZATION_recall": 0.9239766081871345, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04825916886329651, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9300613496932515, "eval_overall_precision": 0.9243902439024391, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.24, "eval_samples_per_second": 708.359, "eval_steps_per_second": 12.5, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.9778196811676025, "learning_rate": 4.1e-05, "loss": 0.0518, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.9194312796208531, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8981481481481481, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9058823529411764, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9112426035502958, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9657794676806084, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9621212121212122, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04554278403520584, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9287469287469288, "eval_overall_precision": 0.9242053789731052, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2399, "eval_samples_per_second": 708.565, "eval_steps_per_second": 12.504, "step": 1728 }, { "epoch": 19.0, "grad_norm": 0.4489806890487671, "learning_rate": 4.05e-05, "loss": 0.049, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.9186602870813397, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9056603773584906, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9047619047619047, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9212121212121213, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9584905660377357, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9477611940298507, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.05024198815226555, "eval_overall_accuracy": 0.9834299917149959, "eval_overall_f1": 0.9259259259259259, "eval_overall_precision": 0.9259259259259259, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2391, "eval_samples_per_second": 711.062, "eval_steps_per_second": 12.548, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.6007758378982544, "learning_rate": 4e-05, "loss": 0.0495, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.9282296650717704, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9150943396226415, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8901734104046243, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.88, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.044688038527965546, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9230769230769231, "eval_overall_precision": 0.9130434782608695, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2328, "eval_samples_per_second": 730.087, "eval_steps_per_second": 12.884, "step": 1920 }, { "epoch": 21.0, "grad_norm": 0.7353448867797852, "learning_rate": 3.9500000000000005e-05, "loss": 0.0444, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.9245283018867926, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8990825688073395, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8961424332344213, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9096385542168675, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04771556705236435, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9249692496924968, "eval_overall_precision": 0.9215686274509803, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2391, "eval_samples_per_second": 710.875, "eval_steps_per_second": 12.545, "step": 2016 }, { "epoch": 22.0, "grad_norm": 1.9100626707077026, "learning_rate": 3.9000000000000006e-05, "loss": 0.0432, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.9282296650717704, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9150943396226415, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.893371757925072, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8806818181818182, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04187900200486183, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9279609279609279, "eval_overall_precision": 0.9178743961352657, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2383, "eval_samples_per_second": 713.505, "eval_steps_per_second": 12.591, "step": 2112 }, { "epoch": 23.0, "grad_norm": 0.9582666158676147, "learning_rate": 3.85e-05, "loss": 0.0419, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.9245283018867926, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8990825688073395, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8914956011730205, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8941176470588236, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04341152310371399, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.922888616891065, "eval_overall_precision": 0.9150485436893204, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2387, "eval_samples_per_second": 712.134, "eval_steps_per_second": 12.567, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.7949368357658386, "learning_rate": 3.8e-05, "loss": 0.0401, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.9116279069767441, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.875, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8830409356725146, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8830409356725146, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.046432796865701675, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9159561510353228, "eval_overall_precision": 0.9038461538461539, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2382, "eval_samples_per_second": 713.61, "eval_steps_per_second": 12.593, "step": 2304 }, { "epoch": 25.0, "grad_norm": 0.45651164650917053, "learning_rate": 3.7500000000000003e-05, "loss": 0.0388, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8928571428571428, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9090909090909091, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04689078778028488, "eval_overall_accuracy": 0.983706158519746, "eval_overall_f1": 0.922509225092251, "eval_overall_precision": 0.9191176470588235, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.2398, "eval_samples_per_second": 708.966, "eval_steps_per_second": 12.511, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.6172522306442261, "learning_rate": 3.7e-05, "loss": 0.0364, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.9326923076923078, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9238095238095239, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9037900874635568, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9011627906976745, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.039931874722242355, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9336609336609337, "eval_overall_precision": 0.9290953545232273, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2385, "eval_samples_per_second": 712.864, "eval_steps_per_second": 12.58, "step": 2496 }, { "epoch": 27.0, "grad_norm": 1.0252867937088013, "learning_rate": 3.65e-05, "loss": 0.036, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.9377990430622011, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9245283018867925, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9005847953216374, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9005847953216374, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.04199051484465599, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9300613496932515, "eval_overall_precision": 0.9243902439024391, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2399, "eval_samples_per_second": 708.5, "eval_steps_per_second": 12.503, "step": 2592 }, { "epoch": 28.0, "grad_norm": 0.27090853452682495, "learning_rate": 3.6e-05, "loss": 0.0349, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.9282296650717704, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9150943396226415, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.904899135446686, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8920454545454546, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.03930835798382759, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9292682926829269, "eval_overall_precision": 0.9180722891566265, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2392, "eval_samples_per_second": 710.608, "eval_steps_per_second": 12.54, "step": 2688 }, { "epoch": 29.0, "grad_norm": 0.9562463760375977, "learning_rate": 3.55e-05, "loss": 0.0328, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.9282296650717704, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9150943396226415, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9026548672566371, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9107142857142857, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.962121212121212, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9548872180451128, "eval_PERSON_recall": 0.9694656488549618, "eval_loss": 0.046227529644966125, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9285714285714285, "eval_overall_precision": 0.9262899262899262, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2391, "eval_samples_per_second": 710.854, "eval_steps_per_second": 12.544, "step": 2784 }, { "epoch": 30.0, "grad_norm": 0.8298453688621521, "learning_rate": 3.5e-05, "loss": 0.0334, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.9326923076923078, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9238095238095239, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9137931034482759, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8983050847457628, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04131978750228882, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9377289377289377, "eval_overall_precision": 0.927536231884058, "eval_overall_recall": 0.9481481481481482, "eval_runtime": 0.233, "eval_samples_per_second": 729.537, "eval_steps_per_second": 12.874, "step": 2880 }, { "epoch": 31.0, "grad_norm": 0.49351659417152405, "learning_rate": 3.45e-05, "loss": 0.0334, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.909952606635071, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9011627906976746, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8959537572254336, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04011217877268791, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9266503667481663, "eval_overall_precision": 0.9176755447941889, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.239, "eval_samples_per_second": 711.196, "eval_steps_per_second": 12.551, "step": 2976 }, { "epoch": 32.0, "grad_norm": 1.0873384475708008, "learning_rate": 3.4000000000000007e-05, "loss": 0.0311, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.9252336448598131, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8918918918918919, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9058823529411764, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9112426035502958, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.03975724056363106, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9326805385556917, "eval_overall_precision": 0.9247572815533981, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2382, "eval_samples_per_second": 713.66, "eval_steps_per_second": 12.594, "step": 3072 }, { "epoch": 33.0, "grad_norm": 19.138242721557617, "learning_rate": 3.35e-05, "loss": 0.0281, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9032258064516129, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9058823529411765, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.03890666365623474, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9302325581395349, "eval_overall_precision": 0.9223300970873787, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2377, "eval_samples_per_second": 715.041, "eval_steps_per_second": 12.618, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.26283007860183716, "learning_rate": 3.3e-05, "loss": 0.0326, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.9238095238095239, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9065420560747663, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8985507246376812, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8908045977011494, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.040688566863536835, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9290953545232274, "eval_overall_precision": 0.9200968523002422, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2375, "eval_samples_per_second": 715.887, "eval_steps_per_second": 12.633, "step": 3264 }, { "epoch": 35.0, "grad_norm": 1.0442869663238525, "learning_rate": 3.2500000000000004e-05, "loss": 0.026, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8994082840236686, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9101796407185628, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.0436968095600605, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9287469287469288, "eval_overall_precision": 0.9242053789731052, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2372, "eval_samples_per_second": 716.703, "eval_steps_per_second": 12.648, "step": 3360 }, { "epoch": 36.0, "grad_norm": 0.8186739087104797, "learning_rate": 3.2000000000000005e-05, "loss": 0.0278, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.9252336448598131, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8918918918918919, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9112426035502958, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9221556886227545, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04156600683927536, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9349693251533743, "eval_overall_precision": 0.9292682926829269, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2396, "eval_samples_per_second": 709.546, "eval_steps_per_second": 12.521, "step": 3456 }, { "epoch": 37.0, "grad_norm": 1.0797454118728638, "learning_rate": 3.15e-05, "loss": 0.0263, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.9215686274509804, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9306930693069307, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.9085714285714285, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.888268156424581, "eval_ORGANIZATION_recall": 0.9298245614035088, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04057058319449425, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9326805385556917, "eval_overall_precision": 0.9247572815533981, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2388, "eval_samples_per_second": 711.808, "eval_steps_per_second": 12.561, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.24539858102798462, "learning_rate": 3.1e-05, "loss": 0.0241, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.9166666666666666, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8761061946902655, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9058823529411764, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9112426035502958, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.043341729789972305, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9304029304029304, "eval_overall_precision": 0.9202898550724637, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2389, "eval_samples_per_second": 711.701, "eval_steps_per_second": 12.559, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.3340110182762146, "learning_rate": 3.05e-05, "loss": 0.0231, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9117647058823529, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9171597633136095, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04085851088166237, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9338235294117647, "eval_overall_precision": 0.927007299270073, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2375, "eval_samples_per_second": 715.726, "eval_steps_per_second": 12.63, "step": 3744 }, { "epoch": 40.0, "grad_norm": 1.1994820833206177, "learning_rate": 3e-05, "loss": 0.0229, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.9377990430622011, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9245283018867925, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9154518950437317, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9127906976744186, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04374144971370697, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.939877300613497, "eval_overall_precision": 0.9341463414634147, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2387, "eval_samples_per_second": 712.188, "eval_steps_per_second": 12.568, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.25557270646095276, "learning_rate": 2.95e-05, "loss": 0.0221, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.9230769230769231, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9142857142857143, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9069767441860466, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9017341040462428, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.039609022438526154, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9325153374233128, "eval_overall_precision": 0.926829268292683, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2399, "eval_samples_per_second": 708.668, "eval_steps_per_second": 12.506, "step": 3936 }, { "epoch": 42.0, "grad_norm": 0.3500101864337921, "learning_rate": 2.9e-05, "loss": 0.0229, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.9245283018867926, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8990825688073395, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9020771513353115, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.042799338698387146, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9310344827586207, "eval_overall_precision": 0.9287469287469288, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2405, "eval_samples_per_second": 706.843, "eval_steps_per_second": 12.474, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.36219123005867004, "learning_rate": 2.8499999999999998e-05, "loss": 0.0225, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9117647058823529, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9171597633136095, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.0407901257276535, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9338235294117647, "eval_overall_precision": 0.927007299270073, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2394, "eval_samples_per_second": 710.16, "eval_steps_per_second": 12.532, "step": 4128 }, { "epoch": 44.0, "grad_norm": 0.6720152497291565, "learning_rate": 2.8000000000000003e-05, "loss": 0.0217, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.9245283018867926, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8990825688073395, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9036144578313253, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9316770186335404, "eval_ORGANIZATION_recall": 0.8771929824561403, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04908669367432594, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9318463444857497, "eval_overall_precision": 0.9353233830845771, "eval_overall_recall": 0.928395061728395, "eval_runtime": 0.2338, "eval_samples_per_second": 727.201, "eval_steps_per_second": 12.833, "step": 4224 }, { "epoch": 45.0, "grad_norm": 0.4248528480529785, "learning_rate": 2.7500000000000004e-05, "loss": 0.0209, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.9166666666666666, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8761061946902655, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9020771513353115, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.046026408672332764, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9289215686274509, "eval_overall_precision": 0.9221411192214112, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2385, "eval_samples_per_second": 712.884, "eval_steps_per_second": 12.58, "step": 4320 }, { "epoch": 46.0, "grad_norm": 0.3786522448062897, "learning_rate": 2.7000000000000002e-05, "loss": 0.0196, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.9209302325581395, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8839285714285714, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9122807017543859, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9122807017543859, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04332219809293747, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9341463414634146, "eval_overall_precision": 0.9228915662650602, "eval_overall_recall": 0.945679012345679, "eval_runtime": 0.2374, "eval_samples_per_second": 715.973, "eval_steps_per_second": 12.635, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.5840160250663757, "learning_rate": 2.6500000000000004e-05, "loss": 0.0201, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.9090909090909091, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8962264150943396, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.8959537572254336, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8857142857142857, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04047170653939247, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9242053789731051, "eval_overall_precision": 0.9152542372881356, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2383, "eval_samples_per_second": 713.529, "eval_steps_per_second": 12.592, "step": 4512 }, { "epoch": 48.0, "grad_norm": 0.4405346214771271, "learning_rate": 2.6000000000000002e-05, "loss": 0.0187, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.9252336448598131, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8918918918918919, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9112426035502958, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9221556886227545, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04593610018491745, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9349693251533743, "eval_overall_precision": 0.9292682926829269, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2386, "eval_samples_per_second": 712.491, "eval_steps_per_second": 12.573, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.03923991695046425, "learning_rate": 2.5500000000000003e-05, "loss": 0.0172, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.909952606635071, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9037900874635568, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9011627906976745, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04445308819413185, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9277845777233782, "eval_overall_precision": 0.9199029126213593, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2334, "eval_samples_per_second": 728.447, "eval_steps_per_second": 12.855, "step": 4704 }, { "epoch": 50.0, "grad_norm": 0.5399296879768372, "learning_rate": 2.5e-05, "loss": 0.0171, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.909952606635071, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9064327485380118, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9064327485380117, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.046150218695402145, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9289215686274509, "eval_overall_precision": 0.9221411192214112, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2332, "eval_samples_per_second": 728.869, "eval_steps_per_second": 12.862, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.4086000323295593, "learning_rate": 2.45e-05, "loss": 0.018, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.9238095238095239, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9065420560747663, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9198813056379821, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9337349397590361, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.048966407775878906, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9382716049382716, "eval_overall_precision": 0.9382716049382716, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2394, "eval_samples_per_second": 710.201, "eval_steps_per_second": 12.533, "step": 4896 }, { "epoch": 52.0, "grad_norm": 0.5035057067871094, "learning_rate": 2.4e-05, "loss": 0.0165, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.909952606635071, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9064327485380118, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9064327485380117, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04487072303891182, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9289215686274509, "eval_overall_precision": 0.9221411192214112, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2397, "eval_samples_per_second": 709.237, "eval_steps_per_second": 12.516, "step": 4992 }, { "epoch": 53.0, "grad_norm": 0.760168194770813, "learning_rate": 2.35e-05, "loss": 0.0156, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.9047619047619048, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8878504672897196, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.9011627906976746, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8959537572254336, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04452786222100258, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9253365973072215, "eval_overall_precision": 0.9174757281553398, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.238, "eval_samples_per_second": 714.388, "eval_steps_per_second": 12.607, "step": 5088 }, { "epoch": 54.0, "grad_norm": 0.6284216642379761, "learning_rate": 2.3000000000000003e-05, "loss": 0.0159, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.9178743961352658, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9134615384615384, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.9106628242074928, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8977272727272727, "eval_ORGANIZATION_recall": 0.9239766081871345, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.044425297528505325, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9326805385556917, "eval_overall_precision": 0.9247572815533981, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2384, "eval_samples_per_second": 713.232, "eval_steps_per_second": 12.586, "step": 5184 }, { "epoch": 55.0, "grad_norm": 0.9918360114097595, "learning_rate": 2.25e-05, "loss": 0.0171, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.9252336448598131, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8918918918918919, "eval_LOCATION_recall": 0.9611650485436893, "eval_ORGANIZATION_f1": 0.9014925373134329, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9207317073170732, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.051084306091070175, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9310344827586207, "eval_overall_precision": 0.9287469287469288, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2383, "eval_samples_per_second": 713.5, "eval_steps_per_second": 12.591, "step": 5280 }, { "epoch": 56.0, "grad_norm": 0.6938837170600891, "learning_rate": 2.2000000000000003e-05, "loss": 0.0174, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9134615384615385, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9047619047619048, "eval_LOCATION_recall": 0.9223300970873787, "eval_ORGANIZATION_f1": 0.899135446685879, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8863636363636364, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04707374423742294, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9266503667481663, "eval_overall_precision": 0.9176755447941889, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2381, "eval_samples_per_second": 714.132, "eval_steps_per_second": 12.602, "step": 5376 }, { "epoch": 57.0, "grad_norm": 0.6816030740737915, "learning_rate": 2.15e-05, "loss": 0.0157, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9069767441860466, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9017341040462428, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04758083075284958, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9317073170731707, "eval_overall_precision": 0.9204819277108434, "eval_overall_recall": 0.9432098765432099, "eval_runtime": 0.2372, "eval_samples_per_second": 716.698, "eval_steps_per_second": 12.648, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.37026599049568176, "learning_rate": 2.1e-05, "loss": 0.0162, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9064327485380118, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9064327485380117, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04582762345671654, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9302325581395349, "eval_overall_precision": 0.9223300970873787, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2385, "eval_samples_per_second": 712.675, "eval_steps_per_second": 12.577, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.4450501501560211, "learning_rate": 2.05e-05, "loss": 0.0141, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.9082125603864736, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9038461538461539, "eval_LOCATION_recall": 0.912621359223301, "eval_ORGANIZATION_f1": 0.9075144508670521, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8971428571428571, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04398062080144882, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9289215686274509, "eval_overall_precision": 0.9221411192214112, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2377, "eval_samples_per_second": 715.128, "eval_steps_per_second": 12.62, "step": 5664 }, { "epoch": 60.0, "grad_norm": 0.09225308150053024, "learning_rate": 2e-05, "loss": 0.014, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9085545722713864, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9166666666666666, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04666078835725784, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9312039312039312, "eval_overall_precision": 0.9266503667481663, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2385, "eval_samples_per_second": 712.873, "eval_steps_per_second": 12.58, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.38753741979599, "learning_rate": 1.9500000000000003e-05, "loss": 0.014, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.897196261682243, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.8921282798833821, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8895348837209303, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04967240244150162, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9240196078431373, "eval_overall_precision": 0.9172749391727494, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2324, "eval_samples_per_second": 731.555, "eval_steps_per_second": 12.91, "step": 5856 }, { "epoch": 62.0, "grad_norm": 1.20694100856781, "learning_rate": 1.9e-05, "loss": 0.0139, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9194312796208531, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8981481481481481, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9037900874635568, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9011627906976745, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04574815556406975, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9302325581395349, "eval_overall_precision": 0.9223300970873787, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2388, "eval_samples_per_second": 711.977, "eval_steps_per_second": 12.564, "step": 5952 }, { "epoch": 63.0, "grad_norm": 1.1634835004806519, "learning_rate": 1.85e-05, "loss": 0.0142, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9112426035502958, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9221556886227545, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04668914154171944, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9312039312039312, "eval_overall_precision": 0.9266503667481663, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2375, "eval_samples_per_second": 715.707, "eval_steps_per_second": 12.63, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.6986099481582642, "learning_rate": 1.8e-05, "loss": 0.0137, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9053254437869822, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9161676646706587, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.047451265156269073, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9287469287469288, "eval_overall_precision": 0.9242053789731052, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2377, "eval_samples_per_second": 715.27, "eval_steps_per_second": 12.622, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.4387049674987793, "learning_rate": 1.75e-05, "loss": 0.0129, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9112426035502958, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9221556886227545, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.047847893089056015, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9323493234932348, "eval_overall_precision": 0.928921568627451, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.239, "eval_samples_per_second": 711.324, "eval_steps_per_second": 12.553, "step": 6240 }, { "epoch": 66.0, "grad_norm": 0.28086650371551514, "learning_rate": 1.7000000000000003e-05, "loss": 0.0119, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9005847953216374, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9005847953216374, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.046711619943380356, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9277845777233782, "eval_overall_precision": 0.9199029126213593, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2337, "eval_samples_per_second": 727.428, "eval_steps_per_second": 12.837, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.06994430720806122, "learning_rate": 1.65e-05, "loss": 0.0132, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8921282798833821, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8895348837209303, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04856061935424805, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9242053789731051, "eval_overall_precision": 0.9152542372881356, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2377, "eval_samples_per_second": 715.143, "eval_steps_per_second": 12.62, "step": 6432 }, { "epoch": 68.0, "grad_norm": 0.23654058575630188, "learning_rate": 1.6000000000000003e-05, "loss": 0.0137, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9064327485380118, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9064327485380117, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04489968344569206, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9290953545232274, "eval_overall_precision": 0.9200968523002422, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2338, "eval_samples_per_second": 727.098, "eval_steps_per_second": 12.831, "step": 6528 }, { "epoch": 69.0, "grad_norm": 0.5306476950645447, "learning_rate": 1.55e-05, "loss": 0.0121, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.9194312796208531, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8981481481481481, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8927536231884058, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8850574712643678, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04589081555604935, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9255189255189255, "eval_overall_precision": 0.9154589371980676, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.238, "eval_samples_per_second": 714.16, "eval_steps_per_second": 12.603, "step": 6624 }, { "epoch": 70.0, "grad_norm": 0.11097829788923264, "learning_rate": 1.5e-05, "loss": 0.012, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9026548672566371, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9107142857142857, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04666482284665108, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9287469287469288, "eval_overall_precision": 0.9242053789731052, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2327, "eval_samples_per_second": 730.502, "eval_steps_per_second": 12.891, "step": 6720 }, { "epoch": 71.0, "grad_norm": 0.7256884574890137, "learning_rate": 1.45e-05, "loss": 0.0116, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9186602870813397, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.9056603773584906, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9101449275362319, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9022988505747126, "eval_ORGANIZATION_recall": 0.9181286549707602, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04491443559527397, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9326805385556917, "eval_overall_precision": 0.9247572815533981, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.239, "eval_samples_per_second": 711.21, "eval_steps_per_second": 12.551, "step": 6816 }, { "epoch": 72.0, "grad_norm": 1.0254273414611816, "learning_rate": 1.4000000000000001e-05, "loss": 0.012, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8988095238095237, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9151515151515152, "eval_ORGANIZATION_recall": 0.8830409356725146, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.05158749595284462, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9285714285714285, "eval_overall_precision": 0.9262899262899262, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2327, "eval_samples_per_second": 730.475, "eval_steps_per_second": 12.891, "step": 6912 }, { "epoch": 73.0, "grad_norm": 0.3432978093624115, "learning_rate": 1.3500000000000001e-05, "loss": 0.0116, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8979591836734693, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8953488372093024, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04868904501199722, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9266503667481663, "eval_overall_precision": 0.9176755447941889, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2323, "eval_samples_per_second": 731.663, "eval_steps_per_second": 12.912, "step": 7008 }, { "epoch": 74.0, "grad_norm": 0.07163161784410477, "learning_rate": 1.3000000000000001e-05, "loss": 0.0118, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.9194312796208531, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8981481481481481, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9037900874635568, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9011627906976745, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.048750292509794235, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.9302325581395349, "eval_overall_precision": 0.9223300970873787, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2389, "eval_samples_per_second": 711.644, "eval_steps_per_second": 12.558, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.5576338768005371, "learning_rate": 1.25e-05, "loss": 0.0126, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9080118694362017, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9216867469879518, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.0506761334836483, "eval_overall_accuracy": 0.9848108257387462, "eval_overall_f1": 0.929889298892989, "eval_overall_precision": 0.9264705882352942, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2324, "eval_samples_per_second": 731.414, "eval_steps_per_second": 12.907, "step": 7200 }, { "epoch": 76.0, "grad_norm": 0.28490886092185974, "learning_rate": 1.2e-05, "loss": 0.0111, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9058823529411764, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9112426035502958, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.05136576294898987, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9300613496932515, "eval_overall_precision": 0.9243902439024391, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2387, "eval_samples_per_second": 712.317, "eval_steps_per_second": 12.57, "step": 7296 }, { "epoch": 77.0, "grad_norm": 0.34466353058815, "learning_rate": 1.1500000000000002e-05, "loss": 0.0107, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.9245283018867926, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8990825688073395, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.8816568047337278, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8922155688622755, "eval_ORGANIZATION_recall": 0.8713450292397661, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.05405063182115555, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.922509225092251, "eval_overall_precision": 0.9191176470588235, "eval_overall_recall": 0.9259259259259259, "eval_runtime": 0.238, "eval_samples_per_second": 714.373, "eval_steps_per_second": 12.607, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.9143396019935608, "learning_rate": 1.1000000000000001e-05, "loss": 0.0109, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8979591836734693, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8953488372093024, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04905899986624718, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9266503667481663, "eval_overall_precision": 0.9176755447941889, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2326, "eval_samples_per_second": 730.807, "eval_steps_per_second": 12.897, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.15043137967586517, "learning_rate": 1.05e-05, "loss": 0.0107, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8967551622418879, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9047619047619048, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.052930157631635666, "eval_overall_accuracy": 0.9845346589339962, "eval_overall_f1": 0.9251533742331289, "eval_overall_precision": 0.9195121951219513, "eval_overall_recall": 0.9308641975308642, "eval_runtime": 0.2382, "eval_samples_per_second": 713.614, "eval_steps_per_second": 12.593, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.25833311676979065, "learning_rate": 1e-05, "loss": 0.0099, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.8921282798833821, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.8895348837209303, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.050141818821430206, "eval_overall_accuracy": 0.9842584921292461, "eval_overall_f1": 0.9242053789731051, "eval_overall_precision": 0.9152542372881356, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2388, "eval_samples_per_second": 711.827, "eval_steps_per_second": 12.562, "step": 7680 }, { "epoch": 81.0, "grad_norm": 0.20771318674087524, "learning_rate": 9.5e-06, "loss": 0.0109, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9005847953216374, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9005847953216374, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04843030124902725, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9277845777233782, "eval_overall_precision": 0.9199029126213593, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2329, "eval_samples_per_second": 730.007, "eval_steps_per_second": 12.882, "step": 7776 }, { "epoch": 82.0, "grad_norm": 0.49792373180389404, "learning_rate": 9e-06, "loss": 0.011, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9064327485380118, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9064327485380117, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.05032794177532196, "eval_overall_accuracy": 0.9850869925434963, "eval_overall_f1": 0.9302325581395349, "eval_overall_precision": 0.9223300970873787, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2326, "eval_samples_per_second": 730.991, "eval_steps_per_second": 12.9, "step": 7872 }, { "epoch": 83.0, "grad_norm": 0.5063265562057495, "learning_rate": 8.500000000000002e-06, "loss": 0.0119, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.9142857142857143, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.897196261682243, "eval_LOCATION_recall": 0.9320388349514563, "eval_ORGANIZATION_f1": 0.9058823529411764, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9112426035502958, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04876190796494484, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.929889298892989, "eval_overall_precision": 0.9264705882352942, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2324, "eval_samples_per_second": 731.357, "eval_steps_per_second": 12.906, "step": 7968 }, { "epoch": 84.0, "grad_norm": 0.266640841960907, "learning_rate": 8.000000000000001e-06, "loss": 0.0102, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9144542772861357, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9226190476190477, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04977697879076004, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9325153374233128, "eval_overall_precision": 0.926829268292683, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2388, "eval_samples_per_second": 711.839, "eval_steps_per_second": 12.562, "step": 8064 }, { "epoch": 85.0, "grad_norm": 1.013655662536621, "learning_rate": 7.5e-06, "loss": 0.012, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9245283018867926, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8990825688073395, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9117647058823529, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9171597633136095, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.048490799963474274, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9349693251533743, "eval_overall_precision": 0.9292682926829269, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2389, "eval_samples_per_second": 711.467, "eval_steps_per_second": 12.555, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.9031422138214111, "learning_rate": 7.000000000000001e-06, "loss": 0.0105, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9158878504672898, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8828828828828829, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9020771513353115, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9156626506024096, "eval_ORGANIZATION_recall": 0.8888888888888888, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04886508360505104, "eval_overall_accuracy": 0.9853631593482464, "eval_overall_f1": 0.9287469287469288, "eval_overall_precision": 0.9242053789731052, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2371, "eval_samples_per_second": 717.084, "eval_steps_per_second": 12.654, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.9565138220787048, "learning_rate": 6.5000000000000004e-06, "loss": 0.0113, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9194312796208531, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8981481481481481, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9144542772861357, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9226190476190477, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.0481349378824234, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9348093480934809, "eval_overall_precision": 0.9313725490196079, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2391, "eval_samples_per_second": 711.11, "eval_steps_per_second": 12.549, "step": 8352 }, { "epoch": 88.0, "grad_norm": 0.3963314890861511, "learning_rate": 6e-06, "loss": 0.0106, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9117647058823529, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9171597633136095, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.047724656760692596, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9338235294117647, "eval_overall_precision": 0.927007299270073, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2394, "eval_samples_per_second": 710.103, "eval_steps_per_second": 12.531, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.3354659080505371, "learning_rate": 5.500000000000001e-06, "loss": 0.0108, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.9201877934272301, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8909090909090909, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9090909090909091, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9117647058823529, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04729631170630455, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9326805385556917, "eval_overall_precision": 0.9247572815533981, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2382, "eval_samples_per_second": 713.664, "eval_steps_per_second": 12.594, "step": 8544 }, { "epoch": 90.0, "grad_norm": 0.5694552063941956, "learning_rate": 5e-06, "loss": 0.0107, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.9194312796208531, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8981481481481481, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9090909090909091, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9117647058823529, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04821091145277023, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9325153374233128, "eval_overall_precision": 0.926829268292683, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2396, "eval_samples_per_second": 709.403, "eval_steps_per_second": 12.519, "step": 8640 }, { "epoch": 91.0, "grad_norm": 0.4703254699707031, "learning_rate": 4.5e-06, "loss": 0.0097, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.9245283018867926, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8990825688073395, "eval_LOCATION_recall": 0.9514563106796117, "eval_ORGANIZATION_f1": 0.9005847953216374, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9005847953216374, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.047950390726327896, "eval_overall_accuracy": 0.9864678265672466, "eval_overall_f1": 0.9302325581395349, "eval_overall_precision": 0.9223300970873787, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2391, "eval_samples_per_second": 710.967, "eval_steps_per_second": 12.546, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.08553267270326614, "learning_rate": 4.000000000000001e-06, "loss": 0.0104, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9149560117302051, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9176470588235294, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04772806167602539, "eval_overall_accuracy": 0.9870201601767468, "eval_overall_f1": 0.9338235294117647, "eval_overall_precision": 0.927007299270073, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2385, "eval_samples_per_second": 712.779, "eval_steps_per_second": 12.578, "step": 8832 }, { "epoch": 93.0, "grad_norm": 0.15816359221935272, "learning_rate": 3.5000000000000004e-06, "loss": 0.0101, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9149560117302051, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9176470588235294, "eval_ORGANIZATION_recall": 0.9122807017543859, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.047919295728206635, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9338235294117647, "eval_overall_precision": 0.927007299270073, "eval_overall_recall": 0.9407407407407408, "eval_runtime": 0.2391, "eval_samples_per_second": 710.922, "eval_steps_per_second": 12.546, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.020390264689922333, "learning_rate": 3e-06, "loss": 0.0099, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.9107981220657277, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8818181818181818, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9026548672566371, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9107142857142857, "eval_ORGANIZATION_recall": 0.8947368421052632, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04879921302199364, "eval_overall_accuracy": 0.9856393261529964, "eval_overall_f1": 0.9276073619631902, "eval_overall_precision": 0.9219512195121952, "eval_overall_recall": 0.9333333333333333, "eval_runtime": 0.2391, "eval_samples_per_second": 711.072, "eval_steps_per_second": 12.548, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.3915695548057556, "learning_rate": 2.5e-06, "loss": 0.0098, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9090909090909091, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9117647058823529, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.047256771475076675, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9313725490196078, "eval_overall_precision": 0.9245742092457421, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2392, "eval_samples_per_second": 710.718, "eval_steps_per_second": 12.542, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.07055274397134781, "learning_rate": 2.0000000000000003e-06, "loss": 0.0085, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9090909090909091, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9117647058823529, "eval_ORGANIZATION_recall": 0.9064327485380117, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04745207354426384, "eval_overall_accuracy": 0.9867439933719967, "eval_overall_f1": 0.9313725490196078, "eval_overall_precision": 0.9245742092457421, "eval_overall_recall": 0.9382716049382716, "eval_runtime": 0.2388, "eval_samples_per_second": 711.929, "eval_steps_per_second": 12.563, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.09564412385225296, "learning_rate": 1.5e-06, "loss": 0.0096, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9005847953216374, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9005847953216374, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.047618940472602844, "eval_overall_accuracy": 0.9861916597624966, "eval_overall_f1": 0.9277845777233782, "eval_overall_precision": 0.9199029126213593, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2387, "eval_samples_per_second": 712.044, "eval_steps_per_second": 12.565, "step": 9312 }, { "epoch": 98.0, "grad_norm": 1.170229196548462, "learning_rate": 1.0000000000000002e-06, "loss": 0.0097, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9005847953216374, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9005847953216374, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04772994667291641, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9277845777233782, "eval_overall_precision": 0.9199029126213593, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2382, "eval_samples_per_second": 713.632, "eval_steps_per_second": 12.594, "step": 9408 }, { "epoch": 99.0, "grad_norm": 0.40140441060066223, "learning_rate": 5.000000000000001e-07, "loss": 0.0087, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9005847953216374, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9005847953216374, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04780791699886322, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9277845777233782, "eval_overall_precision": 0.9199029126213593, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2387, "eval_samples_per_second": 712.337, "eval_steps_per_second": 12.571, "step": 9504 }, { "epoch": 100.0, "grad_norm": 0.05598459392786026, "learning_rate": 0.0, "loss": 0.0085, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9150943396226414, "eval_LOCATION_number": 103, "eval_LOCATION_precision": 0.8899082568807339, "eval_LOCATION_recall": 0.941747572815534, "eval_ORGANIZATION_f1": 0.9005847953216374, "eval_ORGANIZATION_number": 171, "eval_ORGANIZATION_precision": 0.9005847953216374, "eval_ORGANIZATION_recall": 0.9005847953216374, "eval_PERSON_f1": 0.9733840304182511, "eval_PERSON_number": 131, "eval_PERSON_precision": 0.9696969696969697, "eval_PERSON_recall": 0.9770992366412213, "eval_loss": 0.04764658957719803, "eval_overall_accuracy": 0.9859154929577465, "eval_overall_f1": 0.9277845777233782, "eval_overall_precision": 0.9199029126213593, "eval_overall_recall": 0.9358024691358025, "eval_runtime": 0.2405, "eval_samples_per_second": 706.733, "eval_steps_per_second": 12.472, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3917323399962396.0, "train_loss": 0.04564088595410188, "train_runtime": 560.1092, "train_samples_per_second": 273.34, "train_steps_per_second": 17.14 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 3917323399962396.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }