nerui-base-1 / trainer_state.json
apwic's picture
End of training
85f8380 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"eval_steps": 500,
"global_step": 9600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 3.049309492111206,
"learning_rate": 4.9500000000000004e-05,
"loss": 0.2668,
"step": 96
},
{
"epoch": 1.0,
"eval_LOCATION_f1": 0.9184549356223175,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9145299145299145,
"eval_LOCATION_recall": 0.9224137931034483,
"eval_ORGANIZATION_f1": 0.9283489096573209,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9141104294478528,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.03940283879637718,
"eval_overall_accuracy": 0.9879220422728521,
"eval_overall_f1": 0.9439601494396015,
"eval_overall_precision": 0.9358024691358025,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.2756,
"eval_samples_per_second": 616.868,
"eval_steps_per_second": 10.886,
"step": 96
},
{
"epoch": 2.0,
"grad_norm": 1.6137810945510864,
"learning_rate": 4.9e-05,
"loss": 0.0634,
"step": 192
},
{
"epoch": 2.0,
"eval_LOCATION_f1": 0.9316239316239315,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.923728813559322,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.932475884244373,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9477124183006536,
"eval_ORGANIZATION_recall": 0.9177215189873418,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04595618322491646,
"eval_overall_accuracy": 0.9881965413121054,
"eval_overall_f1": 0.9496221662468514,
"eval_overall_precision": 0.952020202020202,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.2866,
"eval_samples_per_second": 593.078,
"eval_steps_per_second": 10.466,
"step": 192
},
{
"epoch": 3.0,
"grad_norm": 1.8293578624725342,
"learning_rate": 4.85e-05,
"loss": 0.032,
"step": 288
},
{
"epoch": 3.0,
"eval_LOCATION_f1": 0.9391304347826087,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9473684210526315,
"eval_LOCATION_recall": 0.9310344827586207,
"eval_ORGANIZATION_f1": 0.9396825396825397,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9426751592356688,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.04407869651913643,
"eval_overall_accuracy": 0.9890200384298655,
"eval_overall_f1": 0.9534591194968555,
"eval_overall_precision": 0.9546599496221663,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.2736,
"eval_samples_per_second": 621.343,
"eval_steps_per_second": 10.965,
"step": 288
},
{
"epoch": 4.0,
"grad_norm": 1.4507431983947754,
"learning_rate": 4.8e-05,
"loss": 0.022,
"step": 384
},
{
"epoch": 4.0,
"eval_LOCATION_f1": 0.9561403508771931,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9732142857142857,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9341692789968652,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9254658385093167,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.044241927564144135,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.957286432160804,
"eval_overall_precision": 0.957286432160804,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2828,
"eval_samples_per_second": 601.144,
"eval_steps_per_second": 10.608,
"step": 384
},
{
"epoch": 5.0,
"grad_norm": 2.4056336879730225,
"learning_rate": 4.75e-05,
"loss": 0.0143,
"step": 480
},
{
"epoch": 5.0,
"eval_LOCATION_f1": 0.9535864978902953,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9338842975206612,
"eval_LOCATION_recall": 0.9741379310344828,
"eval_ORGANIZATION_f1": 0.9483870967741935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9671052631578947,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.04740007221698761,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.9597989949748744,
"eval_overall_precision": 0.9597989949748744,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2733,
"eval_samples_per_second": 621.921,
"eval_steps_per_second": 10.975,
"step": 480
},
{
"epoch": 6.0,
"grad_norm": 0.2397124469280243,
"learning_rate": 4.7e-05,
"loss": 0.0122,
"step": 576
},
{
"epoch": 6.0,
"eval_LOCATION_f1": 0.9446808510638298,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9327731092436975,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.934640522875817,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9662162162162162,
"eval_ORGANIZATION_recall": 0.9050632911392406,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.058103881776332855,
"eval_overall_accuracy": 0.9884710403513588,
"eval_overall_f1": 0.9518987341772152,
"eval_overall_precision": 0.9591836734693877,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.2739,
"eval_samples_per_second": 620.573,
"eval_steps_per_second": 10.951,
"step": 576
},
{
"epoch": 7.0,
"grad_norm": 0.054269399493932724,
"learning_rate": 4.6500000000000005e-05,
"loss": 0.0062,
"step": 672
},
{
"epoch": 7.0,
"eval_LOCATION_f1": 0.9568965517241379,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9568965517241379,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9456869009584665,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9548387096774194,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.05784749239683151,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9622166246851386,
"eval_overall_precision": 0.9646464646464646,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2745,
"eval_samples_per_second": 619.39,
"eval_steps_per_second": 10.93,
"step": 672
},
{
"epoch": 8.0,
"grad_norm": 0.038999129086732864,
"learning_rate": 4.600000000000001e-05,
"loss": 0.007,
"step": 768
},
{
"epoch": 8.0,
"eval_LOCATION_f1": 0.9655172413793104,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9655172413793104,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9490445859872612,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9551282051282052,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.06084027141332626,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9660377358490567,
"eval_overall_precision": 0.9672544080604534,
"eval_overall_recall": 0.964824120603015,
"eval_runtime": 0.2827,
"eval_samples_per_second": 601.28,
"eval_steps_per_second": 10.611,
"step": 768
},
{
"epoch": 9.0,
"grad_norm": 0.102794349193573,
"learning_rate": 4.55e-05,
"loss": 0.0049,
"step": 864
},
{
"epoch": 9.0,
"eval_LOCATION_f1": 0.9446808510638298,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9327731092436975,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9250814332247557,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9530201342281879,
"eval_ORGANIZATION_recall": 0.8987341772151899,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.06557527184486389,
"eval_overall_accuracy": 0.9873730441943454,
"eval_overall_f1": 0.9506953223767383,
"eval_overall_precision": 0.9567430025445293,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.2897,
"eval_samples_per_second": 586.732,
"eval_steps_per_second": 10.354,
"step": 864
},
{
"epoch": 10.0,
"grad_norm": 0.3394320011138916,
"learning_rate": 4.5e-05,
"loss": 0.0056,
"step": 960
},
{
"epoch": 10.0,
"eval_LOCATION_f1": 0.9568965517241379,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9568965517241379,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9363057324840764,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9423076923076923,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.05663624778389931,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.957286432160804,
"eval_overall_precision": 0.957286432160804,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2755,
"eval_samples_per_second": 617.0,
"eval_steps_per_second": 10.888,
"step": 960
},
{
"epoch": 11.0,
"grad_norm": 0.001915093045681715,
"learning_rate": 4.4500000000000004e-05,
"loss": 0.0046,
"step": 1056
},
{
"epoch": 11.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9196141479099678,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.934640522875817,
"eval_ORGANIZATION_recall": 0.9050632911392406,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07088989019393921,
"eval_overall_accuracy": 0.9879220422728521,
"eval_overall_f1": 0.9521410579345088,
"eval_overall_precision": 0.9545454545454546,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.2767,
"eval_samples_per_second": 614.422,
"eval_steps_per_second": 10.843,
"step": 1056
},
{
"epoch": 12.0,
"grad_norm": 0.015670381486415863,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.0022,
"step": 1152
},
{
"epoch": 12.0,
"eval_LOCATION_f1": 0.9531914893617022,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9411764705882353,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9456869009584665,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9548387096774194,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07214923948049545,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9611041405269761,
"eval_overall_precision": 0.9598997493734336,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2868,
"eval_samples_per_second": 592.77,
"eval_steps_per_second": 10.461,
"step": 1152
},
{
"epoch": 13.0,
"grad_norm": 0.002585033653303981,
"learning_rate": 4.35e-05,
"loss": 0.0048,
"step": 1248
},
{
"epoch": 13.0,
"eval_LOCATION_f1": 0.9527896995708154,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9487179487179487,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.946031746031746,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9490445859872612,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.05439727008342743,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9598997493734336,
"eval_overall_precision": 0.9575,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2779,
"eval_samples_per_second": 611.673,
"eval_steps_per_second": 10.794,
"step": 1248
},
{
"epoch": 14.0,
"grad_norm": 0.06117913872003555,
"learning_rate": 4.3e-05,
"loss": 0.0029,
"step": 1344
},
{
"epoch": 14.0,
"eval_LOCATION_f1": 0.9565217391304347,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9649122807017544,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9463722397476341,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9433962264150944,
"eval_ORGANIZATION_recall": 0.9493670886075949,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.06017274409532547,
"eval_overall_accuracy": 0.9917650288223991,
"eval_overall_f1": 0.9623115577889447,
"eval_overall_precision": 0.9623115577889447,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2774,
"eval_samples_per_second": 612.896,
"eval_steps_per_second": 10.816,
"step": 1344
},
{
"epoch": 15.0,
"grad_norm": 4.67892599105835,
"learning_rate": 4.25e-05,
"loss": 0.0031,
"step": 1440
},
{
"epoch": 15.0,
"eval_LOCATION_f1": 0.9437229437229439,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9478260869565217,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.939297124600639,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9483870967741935,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.06778218597173691,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9546599496221662,
"eval_overall_precision": 0.9570707070707071,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.2794,
"eval_samples_per_second": 608.342,
"eval_steps_per_second": 10.735,
"step": 1440
},
{
"epoch": 16.0,
"grad_norm": 0.5296499729156494,
"learning_rate": 4.2e-05,
"loss": 0.0039,
"step": 1536
},
{
"epoch": 16.0,
"eval_LOCATION_f1": 0.9361702127659575,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9243697478991597,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.935064935064935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.96,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9838709677419355,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.08199746906757355,
"eval_overall_accuracy": 0.987098545155092,
"eval_overall_f1": 0.9506953223767383,
"eval_overall_precision": 0.9567430025445293,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.2762,
"eval_samples_per_second": 615.429,
"eval_steps_per_second": 10.861,
"step": 1536
},
{
"epoch": 17.0,
"grad_norm": 0.0023074958007782698,
"learning_rate": 4.15e-05,
"loss": 0.0021,
"step": 1632
},
{
"epoch": 17.0,
"eval_LOCATION_f1": 0.9620253164556961,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9421487603305785,
"eval_LOCATION_recall": 0.9827586206896551,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07931017875671387,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.964824120603015,
"eval_overall_precision": 0.964824120603015,
"eval_overall_recall": 0.964824120603015,
"eval_runtime": 0.2743,
"eval_samples_per_second": 619.839,
"eval_steps_per_second": 10.938,
"step": 1632
},
{
"epoch": 18.0,
"grad_norm": 1.223575472831726,
"learning_rate": 4.1e-05,
"loss": 0.0035,
"step": 1728
},
{
"epoch": 18.0,
"eval_LOCATION_f1": 0.9310344827586207,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9310344827586207,
"eval_LOCATION_recall": 0.9310344827586207,
"eval_ORGANIZATION_f1": 0.9423076923076923,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9545454545454546,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9800796812749005,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08444110304117203,
"eval_overall_accuracy": 0.9879220422728521,
"eval_overall_f1": 0.950943396226415,
"eval_overall_precision": 0.9521410579345088,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.2749,
"eval_samples_per_second": 618.352,
"eval_steps_per_second": 10.912,
"step": 1728
},
{
"epoch": 19.0,
"grad_norm": 0.030457496643066406,
"learning_rate": 4.05e-05,
"loss": 0.0039,
"step": 1824
},
{
"epoch": 19.0,
"eval_LOCATION_f1": 0.9372384937238494,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9105691056910569,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9342105263157895,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9726027397260274,
"eval_ORGANIZATION_recall": 0.8987341772151899,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.090733103454113,
"eval_overall_accuracy": 0.9868240461158386,
"eval_overall_f1": 0.9508196721311476,
"eval_overall_precision": 0.9544303797468354,
"eval_overall_recall": 0.9472361809045227,
"eval_runtime": 0.2734,
"eval_samples_per_second": 621.759,
"eval_steps_per_second": 10.972,
"step": 1824
},
{
"epoch": 20.0,
"grad_norm": 0.002068708650767803,
"learning_rate": 4e-05,
"loss": 0.0014,
"step": 1920
},
{
"epoch": 20.0,
"eval_LOCATION_f1": 0.9531914893617022,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9411764705882353,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.06293763220310211,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9610062893081761,
"eval_overall_precision": 0.9622166246851386,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2744,
"eval_samples_per_second": 619.493,
"eval_steps_per_second": 10.932,
"step": 1920
},
{
"epoch": 21.0,
"grad_norm": 0.05384279042482376,
"learning_rate": 3.9500000000000005e-05,
"loss": 0.0019,
"step": 2016
},
{
"epoch": 21.0,
"eval_LOCATION_f1": 0.9531914893617022,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9411764705882353,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.06547907739877701,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.964735516372796,
"eval_overall_precision": 0.9671717171717171,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2814,
"eval_samples_per_second": 604.105,
"eval_steps_per_second": 10.661,
"step": 2016
},
{
"epoch": 22.0,
"grad_norm": 0.06973671913146973,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.0021,
"step": 2112
},
{
"epoch": 22.0,
"eval_LOCATION_f1": 0.9527896995708154,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9487179487179487,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9400630914826499,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9371069182389937,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.059321921318769455,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.9586983729662076,
"eval_overall_precision": 0.9551122194513716,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2794,
"eval_samples_per_second": 608.393,
"eval_steps_per_second": 10.736,
"step": 2112
},
{
"epoch": 23.0,
"grad_norm": 0.0021264716051518917,
"learning_rate": 3.85e-05,
"loss": 0.0038,
"step": 2208
},
{
"epoch": 23.0,
"eval_LOCATION_f1": 0.94017094017094,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9322033898305084,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.069790780544281,
"eval_overall_accuracy": 0.9890200384298655,
"eval_overall_f1": 0.9559748427672956,
"eval_overall_precision": 0.9571788413098237,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.2838,
"eval_samples_per_second": 598.989,
"eval_steps_per_second": 10.57,
"step": 2208
},
{
"epoch": 24.0,
"grad_norm": 0.0016756883123889565,
"learning_rate": 3.8e-05,
"loss": 0.0024,
"step": 2304
},
{
"epoch": 24.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9449838187702266,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9668874172185431,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.06858450919389725,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.962121212121212,
"eval_overall_precision": 0.9670050761421319,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2771,
"eval_samples_per_second": 613.591,
"eval_steps_per_second": 10.828,
"step": 2304
},
{
"epoch": 25.0,
"grad_norm": 2.1548383235931396,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.0032,
"step": 2400
},
{
"epoch": 25.0,
"eval_LOCATION_f1": 0.9367088607594937,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9173553719008265,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9260450160771704,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9411764705882353,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.0781761035323143,
"eval_overall_accuracy": 0.9873730441943454,
"eval_overall_f1": 0.9485570890840652,
"eval_overall_precision": 0.9473684210526315,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.2779,
"eval_samples_per_second": 611.738,
"eval_steps_per_second": 10.795,
"step": 2400
},
{
"epoch": 26.0,
"grad_norm": 0.028054367750883102,
"learning_rate": 3.7e-05,
"loss": 0.0028,
"step": 2496
},
{
"epoch": 26.0,
"eval_LOCATION_f1": 0.9322033898305084,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9166666666666666,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.954248366013072,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9864864864864865,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08411029726266861,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.9582806573957017,
"eval_overall_precision": 0.9643765903307888,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.2758,
"eval_samples_per_second": 616.493,
"eval_steps_per_second": 10.879,
"step": 2496
},
{
"epoch": 27.0,
"grad_norm": 0.059956323355436325,
"learning_rate": 3.65e-05,
"loss": 0.0024,
"step": 2592
},
{
"epoch": 27.0,
"eval_LOCATION_f1": 0.9367088607594937,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9173553719008265,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9523809523809524,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9554140127388535,
"eval_ORGANIZATION_recall": 0.9493670886075949,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07615868002176285,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.958801498127341,
"eval_overall_precision": 0.9528535980148883,
"eval_overall_recall": 0.964824120603015,
"eval_runtime": 0.2778,
"eval_samples_per_second": 611.955,
"eval_steps_per_second": 10.799,
"step": 2592
},
{
"epoch": 28.0,
"grad_norm": 0.02544810064136982,
"learning_rate": 3.6e-05,
"loss": 0.0065,
"step": 2688
},
{
"epoch": 28.0,
"eval_LOCATION_f1": 0.9482758620689655,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9482758620689655,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.934640522875817,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9662162162162162,
"eval_ORGANIZATION_recall": 0.9050632911392406,
"eval_PERSON_f1": 0.9682539682539683,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.953125,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.09427817910909653,
"eval_overall_accuracy": 0.9887455393906122,
"eval_overall_f1": 0.949367088607595,
"eval_overall_precision": 0.9566326530612245,
"eval_overall_recall": 0.9422110552763819,
"eval_runtime": 0.2757,
"eval_samples_per_second": 616.558,
"eval_steps_per_second": 10.88,
"step": 2688
},
{
"epoch": 29.0,
"grad_norm": 0.006958332844078541,
"learning_rate": 3.55e-05,
"loss": 0.0026,
"step": 2784
},
{
"epoch": 29.0,
"eval_LOCATION_f1": 0.9527896995708154,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9487179487179487,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9381107491856678,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9664429530201343,
"eval_ORGANIZATION_recall": 0.9113924050632911,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.09594732522964478,
"eval_overall_accuracy": 0.9873730441943454,
"eval_overall_f1": 0.9581749049429658,
"eval_overall_precision": 0.9667519181585678,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.2894,
"eval_samples_per_second": 587.475,
"eval_steps_per_second": 10.367,
"step": 2784
},
{
"epoch": 30.0,
"grad_norm": 0.0011697375448420644,
"learning_rate": 3.5e-05,
"loss": 0.002,
"step": 2880
},
{
"epoch": 30.0,
"eval_LOCATION_f1": 0.944206008583691,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9401709401709402,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9456869009584665,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9548387096774194,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07324973493814468,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9584905660377357,
"eval_overall_precision": 0.9596977329974811,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2758,
"eval_samples_per_second": 616.404,
"eval_steps_per_second": 10.878,
"step": 2880
},
{
"epoch": 31.0,
"grad_norm": 0.0018024586606770754,
"learning_rate": 3.45e-05,
"loss": 0.0012,
"step": 2976
},
{
"epoch": 31.0,
"eval_LOCATION_f1": 0.9527896995708154,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9487179487179487,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08078110963106155,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9633375474083439,
"eval_overall_precision": 0.9694656488549618,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2895,
"eval_samples_per_second": 587.277,
"eval_steps_per_second": 10.364,
"step": 2976
},
{
"epoch": 32.0,
"grad_norm": 0.001483693951740861,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.001,
"step": 3072
},
{
"epoch": 32.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9545454545454545,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.98,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08459720760583878,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.965865992414665,
"eval_overall_precision": 0.9720101781170484,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2764,
"eval_samples_per_second": 614.944,
"eval_steps_per_second": 10.852,
"step": 3072
},
{
"epoch": 33.0,
"grad_norm": 4.3353071212768555,
"learning_rate": 3.35e-05,
"loss": 0.0018,
"step": 3168
},
{
"epoch": 33.0,
"eval_LOCATION_f1": 0.9527896995708154,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9487179487179487,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.09489604830741882,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.9633375474083439,
"eval_overall_precision": 0.9694656488549618,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2981,
"eval_samples_per_second": 570.187,
"eval_steps_per_second": 10.062,
"step": 3168
},
{
"epoch": 34.0,
"grad_norm": 0.0007446123054251075,
"learning_rate": 3.3e-05,
"loss": 0.0012,
"step": 3264
},
{
"epoch": 34.0,
"eval_LOCATION_f1": 0.94017094017094,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9322033898305084,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9449838187702266,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9668874172185431,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9800796812749005,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.968503937007874,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.09648539870977402,
"eval_overall_accuracy": 0.9879220422728521,
"eval_overall_f1": 0.9546599496221662,
"eval_overall_precision": 0.9570707070707071,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.2797,
"eval_samples_per_second": 607.836,
"eval_steps_per_second": 10.727,
"step": 3264
},
{
"epoch": 35.0,
"grad_norm": 0.04847164824604988,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.0025,
"step": 3360
},
{
"epoch": 35.0,
"eval_LOCATION_f1": 0.9385964912280702,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9553571428571429,
"eval_LOCATION_recall": 0.9224137931034483,
"eval_ORGANIZATION_f1": 0.9367088607594937,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9367088607594937,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.10111605376005173,
"eval_overall_accuracy": 0.9879220422728521,
"eval_overall_f1": 0.9521410579345088,
"eval_overall_precision": 0.9545454545454546,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.2814,
"eval_samples_per_second": 604.206,
"eval_steps_per_second": 10.662,
"step": 3360
},
{
"epoch": 36.0,
"grad_norm": 0.006004320923238993,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0029,
"step": 3456
},
{
"epoch": 36.0,
"eval_LOCATION_f1": 0.9527896995708154,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9487179487179487,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9423076923076923,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9545454545454546,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.09128763526678085,
"eval_overall_accuracy": 0.9881965413121054,
"eval_overall_f1": 0.9584905660377357,
"eval_overall_precision": 0.9596977329974811,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2762,
"eval_samples_per_second": 615.538,
"eval_steps_per_second": 10.862,
"step": 3456
},
{
"epoch": 37.0,
"grad_norm": 0.5727871060371399,
"learning_rate": 3.15e-05,
"loss": 0.0037,
"step": 3552
},
{
"epoch": 37.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9430379746835443,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9430379746835443,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.05428982526063919,
"eval_overall_accuracy": 0.9923140269009059,
"eval_overall_f1": 0.96,
"eval_overall_precision": 0.9552238805970149,
"eval_overall_recall": 0.964824120603015,
"eval_runtime": 0.2785,
"eval_samples_per_second": 610.43,
"eval_steps_per_second": 10.772,
"step": 3552
},
{
"epoch": 38.0,
"grad_norm": 0.0067481170408427715,
"learning_rate": 3.1e-05,
"loss": 0.002,
"step": 3648
},
{
"epoch": 38.0,
"eval_LOCATION_f1": 0.9527896995708154,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9487179487179487,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9430379746835443,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9430379746835443,
"eval_ORGANIZATION_recall": 0.9430379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.06546945124864578,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9598997493734336,
"eval_overall_precision": 0.9575,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2747,
"eval_samples_per_second": 618.844,
"eval_steps_per_second": 10.921,
"step": 3648
},
{
"epoch": 39.0,
"grad_norm": 0.0010467551182955503,
"learning_rate": 3.05e-05,
"loss": 0.0015,
"step": 3744
},
{
"epoch": 39.0,
"eval_LOCATION_f1": 0.9523809523809523,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9565217391304348,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9483870967741935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9671052631578947,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.0786169022321701,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.9620253164556962,
"eval_overall_precision": 0.9693877551020408,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.2771,
"eval_samples_per_second": 613.511,
"eval_steps_per_second": 10.827,
"step": 3744
},
{
"epoch": 40.0,
"grad_norm": 0.0016867171507328749,
"learning_rate": 3e-05,
"loss": 0.001,
"step": 3840
},
{
"epoch": 40.0,
"eval_LOCATION_f1": 0.9446808510638298,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9327731092436975,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07218839973211288,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9584905660377357,
"eval_overall_precision": 0.9596977329974811,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2825,
"eval_samples_per_second": 601.73,
"eval_steps_per_second": 10.619,
"step": 3840
},
{
"epoch": 41.0,
"grad_norm": 0.02849876880645752,
"learning_rate": 2.95e-05,
"loss": 0.0021,
"step": 3936
},
{
"epoch": 41.0,
"eval_LOCATION_f1": 0.9623430962343097,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9349593495934959,
"eval_LOCATION_recall": 0.9913793103448276,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07220359891653061,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9674185463659147,
"eval_overall_precision": 0.965,
"eval_overall_recall": 0.9698492462311558,
"eval_runtime": 0.2929,
"eval_samples_per_second": 580.454,
"eval_steps_per_second": 10.243,
"step": 3936
},
{
"epoch": 42.0,
"grad_norm": 0.010168996639549732,
"learning_rate": 2.9e-05,
"loss": 0.0018,
"step": 4032
},
{
"epoch": 42.0,
"eval_LOCATION_f1": 0.9482758620689655,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9482758620689655,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07643670588731766,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.9583858764186632,
"eval_overall_precision": 0.9620253164556962,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.2741,
"eval_samples_per_second": 620.184,
"eval_steps_per_second": 10.944,
"step": 4032
},
{
"epoch": 43.0,
"grad_norm": 0.0018144345376640558,
"learning_rate": 2.8499999999999998e-05,
"loss": 0.0009,
"step": 4128
},
{
"epoch": 43.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9838709677419355,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.08542946726083755,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.9633375474083439,
"eval_overall_precision": 0.9694656488549618,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.278,
"eval_samples_per_second": 611.507,
"eval_steps_per_second": 10.791,
"step": 4128
},
{
"epoch": 44.0,
"grad_norm": 0.00032525003189221025,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.0007,
"step": 4224
},
{
"epoch": 44.0,
"eval_LOCATION_f1": 0.9531914893617022,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9411764705882353,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9838709677419355,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.07778704911470413,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.962121212121212,
"eval_overall_precision": 0.9670050761421319,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2764,
"eval_samples_per_second": 614.952,
"eval_steps_per_second": 10.852,
"step": 4224
},
{
"epoch": 45.0,
"grad_norm": 8.090188026428223,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.0018,
"step": 4320
},
{
"epoch": 45.0,
"eval_LOCATION_f1": 0.943231441048035,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9557522123893806,
"eval_LOCATION_recall": 0.9310344827586207,
"eval_ORGANIZATION_f1": 0.9358974358974359,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.948051948051948,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.08798850327730179,
"eval_overall_accuracy": 0.9887455393906122,
"eval_overall_f1": 0.9518987341772152,
"eval_overall_precision": 0.9591836734693877,
"eval_overall_recall": 0.9447236180904522,
"eval_runtime": 0.2772,
"eval_samples_per_second": 613.239,
"eval_steps_per_second": 10.822,
"step": 4320
},
{
"epoch": 46.0,
"grad_norm": 0.06047314405441284,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.0022,
"step": 4416
},
{
"epoch": 46.0,
"eval_LOCATION_f1": 0.9531914893617022,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9411764705882353,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.961038961038961,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9866666666666667,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9799196787148594,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.976,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.08229512721300125,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9646464646464646,
"eval_overall_precision": 0.9695431472081218,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2747,
"eval_samples_per_second": 618.874,
"eval_steps_per_second": 10.921,
"step": 4416
},
{
"epoch": 47.0,
"grad_norm": 11.46757698059082,
"learning_rate": 2.6500000000000004e-05,
"loss": 0.0013,
"step": 4512
},
{
"epoch": 47.0,
"eval_LOCATION_f1": 0.9482758620689655,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9482758620689655,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9545454545454545,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.98,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9840000000000001,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9761904761904762,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.09129085391759872,
"eval_overall_accuracy": 0.9895690365083722,
"eval_overall_f1": 0.9620253164556962,
"eval_overall_precision": 0.9693877551020408,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.275,
"eval_samples_per_second": 618.168,
"eval_steps_per_second": 10.909,
"step": 4512
},
{
"epoch": 48.0,
"grad_norm": 0.003439373802393675,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.0013,
"step": 4608
},
{
"epoch": 48.0,
"eval_LOCATION_f1": 0.9576271186440678,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9416666666666667,
"eval_LOCATION_recall": 0.9741379310344828,
"eval_ORGANIZATION_f1": 0.9579288025889968,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9801324503311258,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08189716935157776,
"eval_overall_accuracy": 0.990118034586879,
"eval_overall_f1": 0.9672544080604534,
"eval_overall_precision": 0.9696969696969697,
"eval_overall_recall": 0.964824120603015,
"eval_runtime": 0.2766,
"eval_samples_per_second": 614.694,
"eval_steps_per_second": 10.848,
"step": 4608
},
{
"epoch": 49.0,
"grad_norm": 0.0003379171248525381,
"learning_rate": 2.5500000000000003e-05,
"loss": 0.0005,
"step": 4704
},
{
"epoch": 49.0,
"eval_LOCATION_f1": 0.9531914893617022,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9411764705882353,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.0734885111451149,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.964735516372796,
"eval_overall_precision": 0.9671717171717171,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2755,
"eval_samples_per_second": 617.144,
"eval_steps_per_second": 10.891,
"step": 4704
},
{
"epoch": 50.0,
"grad_norm": 0.0015403638826683164,
"learning_rate": 2.5e-05,
"loss": 0.0011,
"step": 4800
},
{
"epoch": 50.0,
"eval_LOCATION_f1": 0.9482758620689655,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9482758620689655,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.939297124600639,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9483870967741935,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07724250108003616,
"eval_overall_accuracy": 0.9906670326653857,
"eval_overall_f1": 0.9571788413098236,
"eval_overall_precision": 0.9595959595959596,
"eval_overall_recall": 0.9547738693467337,
"eval_runtime": 0.2751,
"eval_samples_per_second": 618.052,
"eval_steps_per_second": 10.907,
"step": 4800
},
{
"epoch": 51.0,
"grad_norm": 0.005287709180265665,
"learning_rate": 2.45e-05,
"loss": 0.0021,
"step": 4896
},
{
"epoch": 51.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08125069737434387,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9646464646464646,
"eval_overall_precision": 0.9695431472081218,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2767,
"eval_samples_per_second": 614.409,
"eval_steps_per_second": 10.843,
"step": 4896
},
{
"epoch": 52.0,
"grad_norm": 0.055303167551755905,
"learning_rate": 2.4e-05,
"loss": 0.0006,
"step": 4992
},
{
"epoch": 52.0,
"eval_LOCATION_f1": 0.9658119658119658,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9576271186440678,
"eval_LOCATION_recall": 0.9741379310344828,
"eval_ORGANIZATION_f1": 0.9545454545454545,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.98,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.09267558157444,
"eval_overall_accuracy": 0.9903925336261323,
"eval_overall_f1": 0.9683944374209861,
"eval_overall_precision": 0.9745547073791349,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.273,
"eval_samples_per_second": 622.619,
"eval_steps_per_second": 10.987,
"step": 4992
},
{
"epoch": 53.0,
"grad_norm": 0.0012606492964550853,
"learning_rate": 2.35e-05,
"loss": 0.0007,
"step": 5088
},
{
"epoch": 53.0,
"eval_LOCATION_f1": 0.9617021276595743,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9495798319327731,
"eval_LOCATION_recall": 0.9741379310344828,
"eval_ORGANIZATION_f1": 0.9615384615384615,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.974025974025974,
"eval_ORGANIZATION_recall": 0.9493670886075949,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07907352596521378,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9698492462311558,
"eval_overall_precision": 0.9698492462311558,
"eval_overall_recall": 0.9698492462311558,
"eval_runtime": 0.2759,
"eval_samples_per_second": 616.184,
"eval_steps_per_second": 10.874,
"step": 5088
},
{
"epoch": 54.0,
"grad_norm": 0.0005766572430729866,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.0011,
"step": 5184
},
{
"epoch": 54.0,
"eval_LOCATION_f1": 0.9617021276595743,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9495798319327731,
"eval_LOCATION_recall": 0.9741379310344828,
"eval_ORGANIZATION_f1": 0.9617834394904459,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.967948717948718,
"eval_ORGANIZATION_recall": 0.9556962025316456,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07217950373888016,
"eval_overall_accuracy": 0.9928630249794126,
"eval_overall_f1": 0.9699248120300752,
"eval_overall_precision": 0.9675,
"eval_overall_recall": 0.9723618090452262,
"eval_runtime": 0.2765,
"eval_samples_per_second": 614.717,
"eval_steps_per_second": 10.848,
"step": 5184
},
{
"epoch": 55.0,
"grad_norm": 0.000538014282938093,
"learning_rate": 2.25e-05,
"loss": 0.0005,
"step": 5280
},
{
"epoch": 55.0,
"eval_LOCATION_f1": 0.9446808510638298,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9327731092436975,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9556962025316456,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9556962025316456,
"eval_ORGANIZATION_recall": 0.9556962025316456,
"eval_PERSON_f1": 0.9838709677419355,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.07206634432077408,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9612015018773465,
"eval_overall_precision": 0.9576059850374065,
"eval_overall_recall": 0.964824120603015,
"eval_runtime": 0.2784,
"eval_samples_per_second": 610.702,
"eval_steps_per_second": 10.777,
"step": 5280
},
{
"epoch": 56.0,
"grad_norm": 0.00032307393848896027,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.0005,
"step": 5376
},
{
"epoch": 56.0,
"eval_LOCATION_f1": 0.9617021276595743,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9495798319327731,
"eval_LOCATION_recall": 0.9741379310344828,
"eval_ORGANIZATION_f1": 0.9712460063897763,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9806451612903225,
"eval_ORGANIZATION_recall": 0.9620253164556962,
"eval_PERSON_f1": 0.9838709677419355,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.07047928124666214,
"eval_overall_accuracy": 0.993137524018666,
"eval_overall_f1": 0.9723618090452262,
"eval_overall_precision": 0.9723618090452262,
"eval_overall_recall": 0.9723618090452262,
"eval_runtime": 0.2765,
"eval_samples_per_second": 614.867,
"eval_steps_per_second": 10.851,
"step": 5376
},
{
"epoch": 57.0,
"grad_norm": 0.43909594416618347,
"learning_rate": 2.15e-05,
"loss": 0.0003,
"step": 5472
},
{
"epoch": 57.0,
"eval_LOCATION_f1": 0.9527896995708154,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9487179487179487,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9584664536741213,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.967741935483871,
"eval_ORGANIZATION_recall": 0.9493670886075949,
"eval_PERSON_f1": 0.9838709677419355,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.06509443372488022,
"eval_overall_accuracy": 0.9923140269009059,
"eval_overall_f1": 0.964735516372796,
"eval_overall_precision": 0.9671717171717171,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2841,
"eval_samples_per_second": 598.449,
"eval_steps_per_second": 10.561,
"step": 5472
},
{
"epoch": 58.0,
"grad_norm": 0.020889485254883766,
"learning_rate": 2.1e-05,
"loss": 0.0011,
"step": 5568
},
{
"epoch": 58.0,
"eval_LOCATION_f1": 0.9568965517241379,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9568965517241379,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9617834394904459,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.967948717948718,
"eval_ORGANIZATION_recall": 0.9556962025316456,
"eval_PERSON_f1": 0.9838709677419355,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.07538726180791855,
"eval_overall_accuracy": 0.9928630249794126,
"eval_overall_f1": 0.9672544080604534,
"eval_overall_precision": 0.9696969696969697,
"eval_overall_recall": 0.964824120603015,
"eval_runtime": 0.2767,
"eval_samples_per_second": 614.294,
"eval_steps_per_second": 10.84,
"step": 5568
},
{
"epoch": 59.0,
"grad_norm": 0.00022353450185619295,
"learning_rate": 2.05e-05,
"loss": 0.0006,
"step": 5664
},
{
"epoch": 59.0,
"eval_LOCATION_f1": 0.9396551724137931,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9396551724137931,
"eval_LOCATION_recall": 0.9396551724137931,
"eval_ORGANIZATION_f1": 0.9587301587301587,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9617834394904459,
"eval_ORGANIZATION_recall": 0.9556962025316456,
"eval_PERSON_f1": 0.9838709677419355,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.07177454978227615,
"eval_overall_accuracy": 0.9923140269009059,
"eval_overall_f1": 0.9610062893081761,
"eval_overall_precision": 0.9622166246851386,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2865,
"eval_samples_per_second": 593.345,
"eval_steps_per_second": 10.471,
"step": 5664
},
{
"epoch": 60.0,
"grad_norm": 0.00033137862919829786,
"learning_rate": 2e-05,
"loss": 0.0005,
"step": 5760
},
{
"epoch": 60.0,
"eval_LOCATION_f1": 0.9617021276595743,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9495798319327731,
"eval_LOCATION_recall": 0.9741379310344828,
"eval_ORGANIZATION_f1": 0.9545454545454545,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.98,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08699022233486176,
"eval_overall_accuracy": 0.9898435355476256,
"eval_overall_f1": 0.9671717171717171,
"eval_overall_precision": 0.9720812182741116,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2736,
"eval_samples_per_second": 621.399,
"eval_steps_per_second": 10.966,
"step": 5760
},
{
"epoch": 61.0,
"grad_norm": 0.0024979726877063513,
"learning_rate": 1.9500000000000003e-05,
"loss": 0.0004,
"step": 5856
},
{
"epoch": 61.0,
"eval_LOCATION_f1": 0.9391304347826087,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9473684210526315,
"eval_LOCATION_recall": 0.9310344827586207,
"eval_ORGANIZATION_f1": 0.9496855345911951,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.94375,
"eval_ORGANIZATION_recall": 0.9556962025316456,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.0686868205666542,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.958594730238394,
"eval_overall_precision": 0.9573934837092731,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.28,
"eval_samples_per_second": 607.233,
"eval_steps_per_second": 10.716,
"step": 5856
},
{
"epoch": 62.0,
"grad_norm": 0.0027951186057180166,
"learning_rate": 1.9e-05,
"loss": 0.0002,
"step": 5952
},
{
"epoch": 62.0,
"eval_LOCATION_f1": 0.944206008583691,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9401709401709402,
"eval_LOCATION_recall": 0.9482758620689655,
"eval_ORGANIZATION_f1": 0.9511400651465798,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9798657718120806,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9838709677419355,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9838709677419355,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.0982985645532608,
"eval_overall_accuracy": 0.9892945374691189,
"eval_overall_f1": 0.9593908629441623,
"eval_overall_precision": 0.9692307692307692,
"eval_overall_recall": 0.949748743718593,
"eval_runtime": 0.2763,
"eval_samples_per_second": 615.344,
"eval_steps_per_second": 10.859,
"step": 5952
},
{
"epoch": 63.0,
"grad_norm": 0.0001827309897635132,
"learning_rate": 1.85e-05,
"loss": 0.0006,
"step": 6048
},
{
"epoch": 63.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9483870967741935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9671052631578947,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08179256319999695,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9646464646464646,
"eval_overall_precision": 0.9695431472081218,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2898,
"eval_samples_per_second": 586.599,
"eval_steps_per_second": 10.352,
"step": 6048
},
{
"epoch": 64.0,
"grad_norm": 0.0003153543220832944,
"learning_rate": 1.8e-05,
"loss": 0.0002,
"step": 6144
},
{
"epoch": 64.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08577439934015274,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2729,
"eval_samples_per_second": 622.862,
"eval_steps_per_second": 10.992,
"step": 6144
},
{
"epoch": 65.0,
"grad_norm": 0.000563719542697072,
"learning_rate": 1.75e-05,
"loss": 0.0005,
"step": 6240
},
{
"epoch": 65.0,
"eval_LOCATION_f1": 0.9568965517241379,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9568965517241379,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9517684887459807,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9673202614379085,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.08837004005908966,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.9609079445145018,
"eval_overall_precision": 0.9645569620253165,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.275,
"eval_samples_per_second": 618.13,
"eval_steps_per_second": 10.908,
"step": 6240
},
{
"epoch": 66.0,
"grad_norm": 0.001452375203371048,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.001,
"step": 6336
},
{
"epoch": 66.0,
"eval_LOCATION_f1": 0.9527896995708154,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9487179487179487,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9389067524115756,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.954248366013072,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.976,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.9682539682539683,
"eval_PERSON_recall": 0.9838709677419355,
"eval_loss": 0.0770520567893982,
"eval_overall_accuracy": 0.9912160307438924,
"eval_overall_f1": 0.9546599496221662,
"eval_overall_precision": 0.9570707070707071,
"eval_overall_recall": 0.9522613065326633,
"eval_runtime": 0.2871,
"eval_samples_per_second": 592.039,
"eval_steps_per_second": 10.448,
"step": 6336
},
{
"epoch": 67.0,
"grad_norm": 0.0012776756193488836,
"learning_rate": 1.65e-05,
"loss": 0.0006,
"step": 6432
},
{
"epoch": 67.0,
"eval_LOCATION_f1": 0.9527896995708154,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9487179487179487,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08081617951393127,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.9633375474083439,
"eval_overall_precision": 0.9694656488549618,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2769,
"eval_samples_per_second": 614.016,
"eval_steps_per_second": 10.836,
"step": 6432
},
{
"epoch": 68.0,
"grad_norm": 0.1566210240125656,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0002,
"step": 6528
},
{
"epoch": 68.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9487179487179488,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.961038961038961,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.074882373213768,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.964735516372796,
"eval_overall_precision": 0.9671717171717171,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2768,
"eval_samples_per_second": 614.249,
"eval_steps_per_second": 10.84,
"step": 6528
},
{
"epoch": 69.0,
"grad_norm": 0.000284359062789008,
"learning_rate": 1.55e-05,
"loss": 0.0011,
"step": 6624
},
{
"epoch": 69.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07836616784334183,
"eval_overall_accuracy": 0.9917650288223991,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2749,
"eval_samples_per_second": 618.479,
"eval_steps_per_second": 10.914,
"step": 6624
},
{
"epoch": 70.0,
"grad_norm": 0.0007131195743568242,
"learning_rate": 1.5e-05,
"loss": 0.0005,
"step": 6720
},
{
"epoch": 70.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9483870967741935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9671052631578947,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07496281713247299,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9646464646464646,
"eval_overall_precision": 0.9695431472081218,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2777,
"eval_samples_per_second": 612.098,
"eval_steps_per_second": 10.802,
"step": 6720
},
{
"epoch": 71.0,
"grad_norm": 0.0003653345920611173,
"learning_rate": 1.45e-05,
"loss": 0.0001,
"step": 6816
},
{
"epoch": 71.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9483870967741935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9671052631578947,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07584992796182632,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9646464646464646,
"eval_overall_precision": 0.9695431472081218,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2767,
"eval_samples_per_second": 614.357,
"eval_steps_per_second": 10.842,
"step": 6816
},
{
"epoch": 72.0,
"grad_norm": 0.00019452819833531976,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.0005,
"step": 6912
},
{
"epoch": 72.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9483870967741935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9671052631578947,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07706008106470108,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9646464646464646,
"eval_overall_precision": 0.9695431472081218,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2789,
"eval_samples_per_second": 609.626,
"eval_steps_per_second": 10.758,
"step": 6912
},
{
"epoch": 73.0,
"grad_norm": 0.00044385006185621023,
"learning_rate": 1.3500000000000001e-05,
"loss": 0.0004,
"step": 7008
},
{
"epoch": 73.0,
"eval_LOCATION_f1": 0.9531914893617022,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9411764705882353,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9389067524115756,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.954248366013072,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07329727709293365,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.9584905660377357,
"eval_overall_precision": 0.9596977329974811,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2761,
"eval_samples_per_second": 615.718,
"eval_steps_per_second": 10.866,
"step": 7008
},
{
"epoch": 74.0,
"grad_norm": 0.0001623950811335817,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.0001,
"step": 7104
},
{
"epoch": 74.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9389067524115756,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.954248366013072,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07398699223995209,
"eval_overall_accuracy": 0.9917650288223991,
"eval_overall_f1": 0.9596977329974811,
"eval_overall_precision": 0.9621212121212122,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2763,
"eval_samples_per_second": 615.183,
"eval_steps_per_second": 10.856,
"step": 7104
},
{
"epoch": 75.0,
"grad_norm": 0.00019397769938223064,
"learning_rate": 1.25e-05,
"loss": 0.0001,
"step": 7200
},
{
"epoch": 75.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9449838187702266,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9668874172185431,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07948914915323257,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.962121212121212,
"eval_overall_precision": 0.9670050761421319,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.288,
"eval_samples_per_second": 590.181,
"eval_steps_per_second": 10.415,
"step": 7200
},
{
"epoch": 76.0,
"grad_norm": 0.00020719191525131464,
"learning_rate": 1.2e-05,
"loss": 0.0002,
"step": 7296
},
{
"epoch": 76.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9449838187702266,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9668874172185431,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08004683256149292,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.962121212121212,
"eval_overall_precision": 0.9670050761421319,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2748,
"eval_samples_per_second": 618.639,
"eval_steps_per_second": 10.917,
"step": 7296
},
{
"epoch": 77.0,
"grad_norm": 0.0001302505552303046,
"learning_rate": 1.1500000000000002e-05,
"loss": 0.0002,
"step": 7392
},
{
"epoch": 77.0,
"eval_LOCATION_f1": 0.9568965517241379,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9568965517241379,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07812328636646271,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.962121212121212,
"eval_overall_precision": 0.9670050761421319,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2724,
"eval_samples_per_second": 624.165,
"eval_steps_per_second": 11.015,
"step": 7392
},
{
"epoch": 78.0,
"grad_norm": 0.00012877046538051218,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.0002,
"step": 7488
},
{
"epoch": 78.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9514563106796117,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9735099337748344,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07981810718774796,
"eval_overall_accuracy": 0.9917650288223991,
"eval_overall_f1": 0.9646464646464646,
"eval_overall_precision": 0.9695431472081218,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2748,
"eval_samples_per_second": 618.678,
"eval_steps_per_second": 10.918,
"step": 7488
},
{
"epoch": 79.0,
"grad_norm": 0.00036683276994153857,
"learning_rate": 1.05e-05,
"loss": 0.0002,
"step": 7584
},
{
"epoch": 79.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07850603759288788,
"eval_overall_accuracy": 0.9925885259401592,
"eval_overall_f1": 0.9671717171717171,
"eval_overall_precision": 0.9720812182741116,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2871,
"eval_samples_per_second": 592.19,
"eval_steps_per_second": 10.45,
"step": 7584
},
{
"epoch": 80.0,
"grad_norm": 0.00012351616169326007,
"learning_rate": 1e-05,
"loss": 0.0001,
"step": 7680
},
{
"epoch": 80.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07939465343952179,
"eval_overall_accuracy": 0.9925885259401592,
"eval_overall_f1": 0.9671717171717171,
"eval_overall_precision": 0.9720812182741116,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2831,
"eval_samples_per_second": 600.495,
"eval_steps_per_second": 10.597,
"step": 7680
},
{
"epoch": 81.0,
"grad_norm": 0.00012122365296818316,
"learning_rate": 9.5e-06,
"loss": 0.0004,
"step": 7776
},
{
"epoch": 81.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9548387096774194,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9736842105263158,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08115226775407791,
"eval_overall_accuracy": 0.9925885259401592,
"eval_overall_f1": 0.9671717171717171,
"eval_overall_precision": 0.9720812182741116,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2786,
"eval_samples_per_second": 610.221,
"eval_steps_per_second": 10.769,
"step": 7776
},
{
"epoch": 82.0,
"grad_norm": 0.00014256218855734915,
"learning_rate": 9e-06,
"loss": 0.0001,
"step": 7872
},
{
"epoch": 82.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9449838187702266,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9668874172185431,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08801557123661041,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.962121212121212,
"eval_overall_precision": 0.9670050761421319,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2757,
"eval_samples_per_second": 616.648,
"eval_steps_per_second": 10.882,
"step": 7872
},
{
"epoch": 83.0,
"grad_norm": 0.00044690616778098047,
"learning_rate": 8.500000000000002e-06,
"loss": 0.0001,
"step": 7968
},
{
"epoch": 83.0,
"eval_LOCATION_f1": 0.9658119658119658,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9576271186440678,
"eval_LOCATION_recall": 0.9741379310344828,
"eval_ORGANIZATION_f1": 0.9483870967741935,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9671052631578947,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08324732631444931,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.965952080706179,
"eval_overall_precision": 0.9696202531645569,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.278,
"eval_samples_per_second": 611.537,
"eval_steps_per_second": 10.792,
"step": 7968
},
{
"epoch": 84.0,
"grad_norm": 0.00016260806296486408,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0007,
"step": 8064
},
{
"epoch": 84.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9449838187702266,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9668874172185431,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08543501794338226,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.962121212121212,
"eval_overall_precision": 0.9670050761421319,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2754,
"eval_samples_per_second": 617.266,
"eval_steps_per_second": 10.893,
"step": 8064
},
{
"epoch": 85.0,
"grad_norm": 0.00022077209723647684,
"learning_rate": 7.5e-06,
"loss": 0.0001,
"step": 8160
},
{
"epoch": 85.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9449838187702266,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9668874172185431,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.086331307888031,
"eval_overall_accuracy": 0.9914905297831458,
"eval_overall_f1": 0.962121212121212,
"eval_overall_precision": 0.9670050761421319,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2842,
"eval_samples_per_second": 598.232,
"eval_steps_per_second": 10.557,
"step": 8160
},
{
"epoch": 86.0,
"grad_norm": 0.00011886875290656462,
"learning_rate": 7.000000000000001e-06,
"loss": 0.0001,
"step": 8256
},
{
"epoch": 86.0,
"eval_LOCATION_f1": 0.9572649572649573,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9491525423728814,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9449838187702266,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9668874172185431,
"eval_ORGANIZATION_recall": 0.9240506329113924,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.0853876993060112,
"eval_overall_accuracy": 0.9909415317046391,
"eval_overall_f1": 0.962121212121212,
"eval_overall_precision": 0.9670050761421319,
"eval_overall_recall": 0.957286432160804,
"eval_runtime": 0.2739,
"eval_samples_per_second": 620.608,
"eval_steps_per_second": 10.952,
"step": 8256
},
{
"epoch": 87.0,
"grad_norm": 0.00010190217290073633,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0001,
"step": 8352
},
{
"epoch": 87.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9517684887459807,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9673202614379085,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07888607680797577,
"eval_overall_accuracy": 0.9923140269009059,
"eval_overall_f1": 0.965952080706179,
"eval_overall_precision": 0.9696202531645569,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.276,
"eval_samples_per_second": 616.031,
"eval_steps_per_second": 10.871,
"step": 8352
},
{
"epoch": 88.0,
"grad_norm": 8.675308345118538e-05,
"learning_rate": 6e-06,
"loss": 0.0001,
"step": 8448
},
{
"epoch": 88.0,
"eval_LOCATION_f1": 0.9699570815450644,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9658119658119658,
"eval_LOCATION_recall": 0.9741379310344828,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07756155729293823,
"eval_overall_accuracy": 0.9923140269009059,
"eval_overall_f1": 0.965952080706179,
"eval_overall_precision": 0.9696202531645569,
"eval_overall_recall": 0.9623115577889447,
"eval_runtime": 0.2738,
"eval_samples_per_second": 620.93,
"eval_steps_per_second": 10.958,
"step": 8448
},
{
"epoch": 89.0,
"grad_norm": 0.16685304045677185,
"learning_rate": 5.500000000000001e-06,
"loss": 0.0002,
"step": 8544
},
{
"epoch": 89.0,
"eval_LOCATION_f1": 0.9568965517241379,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9568965517241379,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9487179487179488,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.961038961038961,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07858122885227203,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2787,
"eval_samples_per_second": 609.957,
"eval_steps_per_second": 10.764,
"step": 8544
},
{
"epoch": 90.0,
"grad_norm": 9.697148198029026e-05,
"learning_rate": 5e-06,
"loss": 0.0001,
"step": 8640
},
{
"epoch": 90.0,
"eval_LOCATION_f1": 0.9568965517241379,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9568965517241379,
"eval_LOCATION_recall": 0.9568965517241379,
"eval_ORGANIZATION_f1": 0.9487179487179488,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.961038961038961,
"eval_ORGANIZATION_recall": 0.9367088607594937,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.07982868701219559,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2747,
"eval_samples_per_second": 618.797,
"eval_steps_per_second": 10.92,
"step": 8640
},
{
"epoch": 91.0,
"grad_norm": 0.00033092033118009567,
"learning_rate": 4.5e-06,
"loss": 0.0001,
"step": 8736
},
{
"epoch": 91.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08155547082424164,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2787,
"eval_samples_per_second": 609.866,
"eval_steps_per_second": 10.762,
"step": 8736
},
{
"epoch": 92.0,
"grad_norm": 0.00010464302613399923,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0005,
"step": 8832
},
{
"epoch": 92.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08192423731088638,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2824,
"eval_samples_per_second": 602.029,
"eval_steps_per_second": 10.624,
"step": 8832
},
{
"epoch": 93.0,
"grad_norm": 0.002409059088677168,
"learning_rate": 3.5000000000000004e-06,
"loss": 0.0003,
"step": 8928
},
{
"epoch": 93.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08189983665943146,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2756,
"eval_samples_per_second": 616.941,
"eval_steps_per_second": 10.887,
"step": 8928
},
{
"epoch": 94.0,
"grad_norm": 0.16550596058368683,
"learning_rate": 3e-06,
"loss": 0.0003,
"step": 9024
},
{
"epoch": 94.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08141722530126572,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2729,
"eval_samples_per_second": 622.955,
"eval_steps_per_second": 10.993,
"step": 9024
},
{
"epoch": 95.0,
"grad_norm": 0.0001592171611264348,
"learning_rate": 2.5e-06,
"loss": 0.0001,
"step": 9120
},
{
"epoch": 95.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08141204714775085,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2846,
"eval_samples_per_second": 597.272,
"eval_steps_per_second": 10.54,
"step": 9120
},
{
"epoch": 96.0,
"grad_norm": 0.00010054935410153121,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0001,
"step": 9216
},
{
"epoch": 96.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08156644552946091,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2743,
"eval_samples_per_second": 619.773,
"eval_steps_per_second": 10.937,
"step": 9216
},
{
"epoch": 97.0,
"grad_norm": 7.648551400052384e-05,
"learning_rate": 1.5e-06,
"loss": 0.0001,
"step": 9312
},
{
"epoch": 97.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08174903690814972,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2863,
"eval_samples_per_second": 593.712,
"eval_steps_per_second": 10.477,
"step": 9312
},
{
"epoch": 98.0,
"grad_norm": 0.0001711288350634277,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0001,
"step": 9408
},
{
"epoch": 98.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08208905160427094,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2744,
"eval_samples_per_second": 619.524,
"eval_steps_per_second": 10.933,
"step": 9408
},
{
"epoch": 99.0,
"grad_norm": 0.00011955283116549253,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0001,
"step": 9504
},
{
"epoch": 99.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08216139674186707,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2911,
"eval_samples_per_second": 584.081,
"eval_steps_per_second": 10.307,
"step": 9504
},
{
"epoch": 100.0,
"grad_norm": 7.644743891432881e-05,
"learning_rate": 0.0,
"loss": 0.0001,
"step": 9600
},
{
"epoch": 100.0,
"eval_LOCATION_f1": 0.9613733905579399,
"eval_LOCATION_number": 116,
"eval_LOCATION_precision": 0.9572649572649573,
"eval_LOCATION_recall": 0.9655172413793104,
"eval_ORGANIZATION_f1": 0.9453376205787781,
"eval_ORGANIZATION_number": 158,
"eval_ORGANIZATION_precision": 0.9607843137254902,
"eval_ORGANIZATION_recall": 0.930379746835443,
"eval_PERSON_f1": 0.9879518072289156,
"eval_PERSON_number": 124,
"eval_PERSON_precision": 0.984,
"eval_PERSON_recall": 0.9919354838709677,
"eval_loss": 0.08217138797044754,
"eval_overall_accuracy": 0.9920395278616525,
"eval_overall_f1": 0.9634300126103404,
"eval_overall_precision": 0.9670886075949368,
"eval_overall_recall": 0.9597989949748744,
"eval_runtime": 0.2751,
"eval_samples_per_second": 618.039,
"eval_steps_per_second": 10.907,
"step": 9600
},
{
"epoch": 100.0,
"step": 9600,
"total_flos": 3848029980850176.0,
"train_loss": 0.005479360639195269,
"train_runtime": 897.0572,
"train_samples_per_second": 170.669,
"train_steps_per_second": 10.702
}
],
"logging_steps": 500,
"max_steps": 9600,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 3848029980850176.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}