arabert_fulldata_checkpoint / trainer_state.json
batoulnn's picture
Upload folder using huggingface_hub
ad61f6e verified
{
"best_metric": 0.8982632597992836,
"best_model_checkpoint": "./arabert_author_model/checkpoint-34500",
"epoch": 3.9863325740318905,
"eval_steps": 500,
"global_step": 35000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05694760820045558,
"grad_norm": 15.19107437133789,
"learning_rate": 1.1343963553530753e-05,
"loss": 2.7205,
"step": 500
},
{
"epoch": 0.05694760820045558,
"eval_accuracy": 0.3750300697618475,
"eval_f1_macro": 0.28596660166175775,
"eval_f1_micro": 0.3750300697618475,
"eval_loss": 2.32218337059021,
"eval_precision_macro": 0.3865217733215992,
"eval_precision_micro": 0.3750300697618475,
"eval_recall_macro": 0.3672924033460141,
"eval_recall_micro": 0.3750300697618475,
"eval_runtime": 9.1613,
"eval_samples_per_second": 453.755,
"eval_steps_per_second": 56.76,
"step": 500
},
{
"epoch": 0.11389521640091116,
"grad_norm": 21.725738525390625,
"learning_rate": 2.2710706150341686e-05,
"loss": 1.6025,
"step": 1000
},
{
"epoch": 0.11389521640091116,
"eval_accuracy": 0.4892951647822949,
"eval_f1_macro": 0.4406771878616986,
"eval_f1_micro": 0.4892951647822949,
"eval_loss": 1.6631975173950195,
"eval_precision_macro": 0.56733686366756,
"eval_precision_micro": 0.4892951647822949,
"eval_recall_macro": 0.48704653807602066,
"eval_recall_micro": 0.4892951647822949,
"eval_runtime": 8.477,
"eval_samples_per_second": 490.385,
"eval_steps_per_second": 61.342,
"step": 1000
},
{
"epoch": 0.17084282460136674,
"grad_norm": 13.716214179992676,
"learning_rate": 3.4100227790432806e-05,
"loss": 1.0328,
"step": 1500
},
{
"epoch": 0.17084282460136674,
"eval_accuracy": 0.6615347606446957,
"eval_f1_macro": 0.5830637518928246,
"eval_f1_micro": 0.6615347606446957,
"eval_loss": 1.1024270057678223,
"eval_precision_macro": 0.697472543022928,
"eval_precision_micro": 0.6615347606446957,
"eval_recall_macro": 0.6033302465966366,
"eval_recall_micro": 0.6615347606446957,
"eval_runtime": 8.4612,
"eval_samples_per_second": 491.299,
"eval_steps_per_second": 61.457,
"step": 1500
},
{
"epoch": 0.22779043280182232,
"grad_norm": 17.078737258911133,
"learning_rate": 4.548974943052392e-05,
"loss": 0.8504,
"step": 2000
},
{
"epoch": 0.22779043280182232,
"eval_accuracy": 0.639162857830166,
"eval_f1_macro": 0.5476482518700577,
"eval_f1_micro": 0.639162857830166,
"eval_loss": 1.2488256692886353,
"eval_precision_macro": 0.668531963924838,
"eval_precision_micro": 0.639162857830166,
"eval_recall_macro": 0.5729925283587635,
"eval_recall_micro": 0.639162857830166,
"eval_runtime": 8.6485,
"eval_samples_per_second": 480.664,
"eval_steps_per_second": 60.126,
"step": 2000
},
{
"epoch": 0.2847380410022779,
"grad_norm": 45.41240692138672,
"learning_rate": 5.6856492027334856e-05,
"loss": 0.7882,
"step": 2500
},
{
"epoch": 0.2847380410022779,
"eval_accuracy": 0.6523935530430599,
"eval_f1_macro": 0.5994264876841328,
"eval_f1_micro": 0.6523935530430599,
"eval_loss": 1.2463157176971436,
"eval_precision_macro": 0.6529647074997418,
"eval_precision_micro": 0.6523935530430599,
"eval_recall_macro": 0.6436371671271747,
"eval_recall_micro": 0.6523935530430599,
"eval_runtime": 8.4613,
"eval_samples_per_second": 491.293,
"eval_steps_per_second": 61.456,
"step": 2500
},
{
"epoch": 0.3416856492027335,
"grad_norm": 17.272947311401367,
"learning_rate": 6.824601366742597e-05,
"loss": 0.8087,
"step": 3000
},
{
"epoch": 0.3416856492027335,
"eval_accuracy": 0.7055568919894154,
"eval_f1_macro": 0.6265017927625092,
"eval_f1_micro": 0.7055568919894154,
"eval_loss": 1.2766073942184448,
"eval_precision_macro": 0.6878658131219406,
"eval_precision_micro": 0.7055568919894154,
"eval_recall_macro": 0.6658140911489481,
"eval_recall_micro": 0.7055568919894154,
"eval_runtime": 8.9536,
"eval_samples_per_second": 464.281,
"eval_steps_per_second": 58.077,
"step": 3000
},
{
"epoch": 0.39863325740318906,
"grad_norm": 36.27668380737305,
"learning_rate": 7.96355353075171e-05,
"loss": 0.7887,
"step": 3500
},
{
"epoch": 0.39863325740318906,
"eval_accuracy": 0.7149386576858311,
"eval_f1_macro": 0.6616726950770662,
"eval_f1_micro": 0.7149386576858311,
"eval_loss": 1.1627144813537598,
"eval_precision_macro": 0.7514554135519156,
"eval_precision_micro": 0.7149386576858311,
"eval_recall_macro": 0.6834968200252539,
"eval_recall_micro": 0.7149386576858311,
"eval_runtime": 8.4816,
"eval_samples_per_second": 490.118,
"eval_steps_per_second": 61.309,
"step": 3500
},
{
"epoch": 0.45558086560364464,
"grad_norm": 3.0386502742767334,
"learning_rate": 7.995372539966228e-05,
"loss": 0.7357,
"step": 4000
},
{
"epoch": 0.45558086560364464,
"eval_accuracy": 0.7611258118835699,
"eval_f1_macro": 0.6745353046335729,
"eval_f1_micro": 0.7611258118835699,
"eval_loss": 0.9807717800140381,
"eval_precision_macro": 0.7007756446431578,
"eval_precision_micro": 0.7611258118835699,
"eval_recall_macro": 0.7425048520300992,
"eval_recall_micro": 0.7611258118835699,
"eval_runtime": 8.4828,
"eval_samples_per_second": 490.05,
"eval_steps_per_second": 61.3,
"step": 4000
},
{
"epoch": 0.5125284738041003,
"grad_norm": 2.7381744384765625,
"learning_rate": 7.980884762455173e-05,
"loss": 0.7022,
"step": 4500
},
{
"epoch": 0.5125284738041003,
"eval_accuracy": 0.76088525378879,
"eval_f1_macro": 0.6871238458955197,
"eval_f1_micro": 0.76088525378879,
"eval_loss": 1.1158560514450073,
"eval_precision_macro": 0.7591168248124284,
"eval_precision_micro": 0.76088525378879,
"eval_recall_macro": 0.7099914791461782,
"eval_recall_micro": 0.76088525378879,
"eval_runtime": 9.0538,
"eval_samples_per_second": 459.145,
"eval_steps_per_second": 57.435,
"step": 4500
},
{
"epoch": 0.5694760820045558,
"grad_norm": 32.207054138183594,
"learning_rate": 7.956625791551662e-05,
"loss": 0.6587,
"step": 5000
},
{
"epoch": 0.5694760820045558,
"eval_accuracy": 0.8008178975222516,
"eval_f1_macro": 0.76243086871798,
"eval_f1_micro": 0.8008178975222516,
"eval_loss": 0.8932181000709534,
"eval_precision_macro": 0.7876270109291943,
"eval_precision_micro": 0.8008178975222516,
"eval_recall_macro": 0.7876260076814952,
"eval_recall_micro": 0.8008178975222516,
"eval_runtime": 8.5148,
"eval_samples_per_second": 488.207,
"eval_steps_per_second": 61.07,
"step": 5000
},
{
"epoch": 0.6264236902050114,
"grad_norm": 0.29054978489875793,
"learning_rate": 7.922558317223566e-05,
"loss": 0.5719,
"step": 5500
},
{
"epoch": 0.6264236902050114,
"eval_accuracy": 0.773875390906904,
"eval_f1_macro": 0.7249768597960354,
"eval_f1_micro": 0.773875390906904,
"eval_loss": 0.9966481328010559,
"eval_precision_macro": 0.7358501532705917,
"eval_precision_micro": 0.773875390906904,
"eval_recall_macro": 0.7614420126439011,
"eval_recall_micro": 0.773875390906904,
"eval_runtime": 8.4793,
"eval_samples_per_second": 490.251,
"eval_steps_per_second": 61.326,
"step": 5500
},
{
"epoch": 0.683371298405467,
"grad_norm": 1.0872896909713745,
"learning_rate": 7.878805260363261e-05,
"loss": 0.6425,
"step": 6000
},
{
"epoch": 0.683371298405467,
"eval_accuracy": 0.771710368053885,
"eval_f1_macro": 0.7254677224325083,
"eval_f1_micro": 0.771710368053885,
"eval_loss": 0.9605371952056885,
"eval_precision_macro": 0.8005336996598393,
"eval_precision_micro": 0.771710368053885,
"eval_recall_macro": 0.7547512504988463,
"eval_recall_micro": 0.771710368053885,
"eval_runtime": 8.4743,
"eval_samples_per_second": 490.54,
"eval_steps_per_second": 61.362,
"step": 6000
},
{
"epoch": 0.7403189066059226,
"grad_norm": 189.1195068359375,
"learning_rate": 7.82547465603587e-05,
"loss": 0.5433,
"step": 6500
},
{
"epoch": 0.7403189066059226,
"eval_accuracy": 0.7700264613904257,
"eval_f1_macro": 0.740451647791416,
"eval_f1_micro": 0.7700264613904257,
"eval_loss": 1.0783036947250366,
"eval_precision_macro": 0.769666104173451,
"eval_precision_micro": 0.7700264613904257,
"eval_recall_macro": 0.7659213860833466,
"eval_recall_micro": 0.7700264613904257,
"eval_runtime": 8.4608,
"eval_samples_per_second": 491.328,
"eval_steps_per_second": 61.46,
"step": 6500
},
{
"epoch": 0.7972665148063781,
"grad_norm": 0.5625237822532654,
"learning_rate": 7.762833068916386e-05,
"loss": 0.5736,
"step": 7000
},
{
"epoch": 0.7972665148063781,
"eval_accuracy": 0.7782054366129421,
"eval_f1_macro": 0.6779030343365783,
"eval_f1_micro": 0.7782054366129421,
"eval_loss": 1.0509027242660522,
"eval_precision_macro": 0.7339555194909612,
"eval_precision_micro": 0.7782054366129421,
"eval_recall_macro": 0.6828886626680506,
"eval_recall_micro": 0.7782054366129421,
"eval_runtime": 8.481,
"eval_samples_per_second": 490.156,
"eval_steps_per_second": 61.314,
"step": 7000
},
{
"epoch": 0.8542141230068337,
"grad_norm": 64.8875503540039,
"learning_rate": 7.690784156928418e-05,
"loss": 0.5273,
"step": 7500
},
{
"epoch": 0.8542141230068337,
"eval_accuracy": 0.7774837623286024,
"eval_f1_macro": 0.7006759117702724,
"eval_f1_micro": 0.7774837623286024,
"eval_loss": 1.083090901374817,
"eval_precision_macro": 0.7615638623524419,
"eval_precision_micro": 0.7774837623286024,
"eval_recall_macro": 0.7385809772355115,
"eval_recall_micro": 0.7774837623286024,
"eval_runtime": 8.4704,
"eval_samples_per_second": 490.769,
"eval_steps_per_second": 61.39,
"step": 7500
},
{
"epoch": 0.9111617312072893,
"grad_norm": 0.9078112840652466,
"learning_rate": 7.609621959255558e-05,
"loss": 0.5268,
"step": 8000
},
{
"epoch": 0.9111617312072893,
"eval_accuracy": 0.7445273033437575,
"eval_f1_macro": 0.699580887402278,
"eval_f1_micro": 0.7445273033437575,
"eval_loss": 1.3979923725128174,
"eval_precision_macro": 0.7001048011829232,
"eval_precision_micro": 0.7445273033437575,
"eval_recall_macro": 0.7726266308290096,
"eval_recall_micro": 0.7445273033437575,
"eval_runtime": 8.4384,
"eval_samples_per_second": 492.631,
"eval_steps_per_second": 61.623,
"step": 8000
},
{
"epoch": 0.9681093394077449,
"grad_norm": 42.65549850463867,
"learning_rate": 7.519735782617663e-05,
"loss": 0.5462,
"step": 8500
},
{
"epoch": 0.9681093394077449,
"eval_accuracy": 0.8287226365167187,
"eval_f1_macro": 0.7785313462283256,
"eval_f1_micro": 0.8287226365167187,
"eval_loss": 0.9026873111724854,
"eval_precision_macro": 0.7868255645583773,
"eval_precision_micro": 0.8287226365167187,
"eval_recall_macro": 0.8024053906273642,
"eval_recall_micro": 0.8287226365167187,
"eval_runtime": 8.4335,
"eval_samples_per_second": 492.916,
"eval_steps_per_second": 61.659,
"step": 8500
},
{
"epoch": 1.0250569476082005,
"grad_norm": 0.9462873935699463,
"learning_rate": 7.420987383057407e-05,
"loss": 0.486,
"step": 9000
},
{
"epoch": 1.0250569476082005,
"eval_accuracy": 0.7211931681501083,
"eval_f1_macro": 0.6932948143152837,
"eval_f1_micro": 0.7211931681501083,
"eval_loss": 1.7263842821121216,
"eval_precision_macro": 0.7360552900892146,
"eval_precision_micro": 0.7211931681501083,
"eval_recall_macro": 0.7409941847523269,
"eval_recall_micro": 0.7211931681501083,
"eval_runtime": 8.473,
"eval_samples_per_second": 490.615,
"eval_steps_per_second": 61.371,
"step": 9000
},
{
"epoch": 1.082004555808656,
"grad_norm": 0.8366897106170654,
"learning_rate": 7.314014528807089e-05,
"loss": 0.4458,
"step": 9500
},
{
"epoch": 1.082004555808656,
"eval_accuracy": 0.8186191965359635,
"eval_f1_macro": 0.7675006381723768,
"eval_f1_micro": 0.8186191965359635,
"eval_loss": 0.9773014783859253,
"eval_precision_macro": 0.7844621076644617,
"eval_precision_micro": 0.8186191965359635,
"eval_recall_macro": 0.8041851318048103,
"eval_recall_micro": 0.8186191965359635,
"eval_runtime": 8.4706,
"eval_samples_per_second": 490.754,
"eval_steps_per_second": 61.389,
"step": 9500
},
{
"epoch": 1.1389521640091116,
"grad_norm": 2.115098237991333,
"learning_rate": 7.198652696785955e-05,
"loss": 0.4102,
"step": 10000
},
{
"epoch": 1.1389521640091116,
"eval_accuracy": 0.8316093336540774,
"eval_f1_macro": 0.7933624073358755,
"eval_f1_micro": 0.8316093336540774,
"eval_loss": 0.967036247253418,
"eval_precision_macro": 0.7949251475033909,
"eval_precision_micro": 0.8316093336540774,
"eval_recall_macro": 0.8274655786829063,
"eval_recall_micro": 0.8316093336540774,
"eval_runtime": 8.4509,
"eval_samples_per_second": 491.9,
"eval_steps_per_second": 61.532,
"step": 10000
},
{
"epoch": 1.1958997722095672,
"grad_norm": 1.0413740873336792,
"learning_rate": 7.075392750273938e-05,
"loss": 0.3773,
"step": 10500
},
{
"epoch": 1.1958997722095672,
"eval_accuracy": 0.8171758479672842,
"eval_f1_macro": 0.7981622962245145,
"eval_f1_micro": 0.8171758479672842,
"eval_loss": 0.9198176860809326,
"eval_precision_macro": 0.819291401144176,
"eval_precision_micro": 0.8171758479672842,
"eval_recall_macro": 0.8245482183363974,
"eval_recall_micro": 0.8171758479672842,
"eval_runtime": 8.4577,
"eval_samples_per_second": 491.505,
"eval_steps_per_second": 61.482,
"step": 10500
},
{
"epoch": 1.2528473804100229,
"grad_norm": 7.1126203536987305,
"learning_rate": 6.94453904277921e-05,
"loss": 0.3796,
"step": 11000
},
{
"epoch": 1.2528473804100229,
"eval_accuracy": 0.7741159490016839,
"eval_f1_macro": 0.7334267874337288,
"eval_f1_micro": 0.7741159490016839,
"eval_loss": 1.324471116065979,
"eval_precision_macro": 0.7554977548108798,
"eval_precision_micro": 0.7741159490016839,
"eval_recall_macro": 0.7865189643712818,
"eval_recall_micro": 0.7741159490016839,
"eval_runtime": 8.4546,
"eval_samples_per_second": 491.688,
"eval_steps_per_second": 61.505,
"step": 11000
},
{
"epoch": 1.3097949886104785,
"grad_norm": 85.53689575195312,
"learning_rate": 6.806414678327537e-05,
"loss": 0.4432,
"step": 11500
},
{
"epoch": 1.3097949886104785,
"eval_accuracy": 0.8092374308395478,
"eval_f1_macro": 0.7438442519057924,
"eval_f1_micro": 0.8092374308395478,
"eval_loss": 1.1105079650878906,
"eval_precision_macro": 0.7748012629794462,
"eval_precision_micro": 0.8092374308395478,
"eval_recall_macro": 0.7783932685192183,
"eval_recall_micro": 0.8092374308395478,
"eval_runtime": 8.4968,
"eval_samples_per_second": 489.244,
"eval_steps_per_second": 61.2,
"step": 11500
},
{
"epoch": 1.366742596810934,
"grad_norm": 1.0569897890090942,
"learning_rate": 6.661360713653681e-05,
"loss": 0.389,
"step": 12000
},
{
"epoch": 1.366742596810934,
"eval_accuracy": 0.8128458022612461,
"eval_f1_macro": 0.7595338289100665,
"eval_f1_micro": 0.8128458022612461,
"eval_loss": 1.0267034769058228,
"eval_precision_macro": 0.7590761169934961,
"eval_precision_micro": 0.8128458022612461,
"eval_recall_macro": 0.8036606747506161,
"eval_recall_micro": 0.8128458022612461,
"eval_runtime": 8.4648,
"eval_samples_per_second": 491.09,
"eval_steps_per_second": 61.431,
"step": 12000
},
{
"epoch": 1.4236902050113895,
"grad_norm": 14.686357498168945,
"learning_rate": 6.509735316063996e-05,
"loss": 0.6087,
"step": 12500
},
{
"epoch": 1.4236902050113895,
"eval_accuracy": 0.8443589126774116,
"eval_f1_macro": 0.7997974993396904,
"eval_f1_micro": 0.8443589126774116,
"eval_loss": 0.9193519353866577,
"eval_precision_macro": 0.7894219117254144,
"eval_precision_micro": 0.8443589126774116,
"eval_recall_macro": 0.833108777347993,
"eval_recall_micro": 0.8443589126774116,
"eval_runtime": 8.4379,
"eval_samples_per_second": 492.657,
"eval_steps_per_second": 61.627,
"step": 12500
},
{
"epoch": 1.4806378132118452,
"grad_norm": 0.05014890432357788,
"learning_rate": 6.352234452003862e-05,
"loss": 0.4803,
"step": 13000
},
{
"epoch": 1.4806378132118452,
"eval_accuracy": 0.8535001202790474,
"eval_f1_macro": 0.8265458036830905,
"eval_f1_micro": 0.8535001202790474,
"eval_loss": 0.879317581653595,
"eval_precision_macro": 0.8329036145166532,
"eval_precision_micro": 0.8535001202790474,
"eval_recall_macro": 0.8340628674797995,
"eval_recall_micro": 0.8535001202790474,
"eval_runtime": 8.4157,
"eval_samples_per_second": 493.96,
"eval_steps_per_second": 61.79,
"step": 13000
},
{
"epoch": 1.5375854214123006,
"grad_norm": 40.28041076660156,
"learning_rate": 6.188948654276723e-05,
"loss": 0.5513,
"step": 13500
},
{
"epoch": 1.5375854214123006,
"eval_accuracy": 0.7782054366129421,
"eval_f1_macro": 0.7066835334217871,
"eval_f1_micro": 0.7782054366129421,
"eval_loss": 1.1909141540527344,
"eval_precision_macro": 0.7255202837659691,
"eval_precision_micro": 0.7782054366129421,
"eval_recall_macro": 0.7402758652568006,
"eval_recall_micro": 0.7782054366129421,
"eval_runtime": 8.4835,
"eval_samples_per_second": 490.012,
"eval_steps_per_second": 61.296,
"step": 13500
},
{
"epoch": 1.5945330296127562,
"grad_norm": 8.929847717285156,
"learning_rate": 6.019936353958699e-05,
"loss": 0.693,
"step": 14000
},
{
"epoch": 1.5945330296127562,
"eval_accuracy": 0.2460909309598268,
"eval_f1_macro": 0.20336556842910194,
"eval_f1_micro": 0.2460909309598268,
"eval_loss": 2.8277578353881836,
"eval_precision_macro": 0.5226671539146224,
"eval_precision_micro": 0.2460909309598268,
"eval_recall_macro": 0.1936798727163823,
"eval_recall_micro": 0.2460909309598268,
"eval_runtime": 8.4782,
"eval_samples_per_second": 490.315,
"eval_steps_per_second": 61.334,
"step": 14000
},
{
"epoch": 1.6514806378132119,
"grad_norm": 28.1862735748291,
"learning_rate": 5.8459364260048594e-05,
"loss": 0.9646,
"step": 14500
},
{
"epoch": 1.6514806378132119,
"eval_accuracy": 0.8246331489054607,
"eval_f1_macro": 0.7913907456133474,
"eval_f1_micro": 0.8246331489054607,
"eval_loss": 0.948131799697876,
"eval_precision_macro": 0.799701208597097,
"eval_precision_micro": 0.8246331489054607,
"eval_recall_macro": 0.8123299807423895,
"eval_recall_micro": 0.8246331489054607,
"eval_runtime": 8.4643,
"eval_samples_per_second": 491.124,
"eval_steps_per_second": 61.435,
"step": 14500
},
{
"epoch": 1.7084282460136673,
"grad_norm": 7.083284854888916,
"learning_rate": 5.6673785111054136e-05,
"loss": 0.462,
"step": 15000
},
{
"epoch": 1.7084282460136673,
"eval_accuracy": 0.8472456098147703,
"eval_f1_macro": 0.8091993297668394,
"eval_f1_micro": 0.8472456098147703,
"eval_loss": 0.8667464256286621,
"eval_precision_macro": 0.8198322314440802,
"eval_precision_micro": 0.8472456098147703,
"eval_recall_macro": 0.8266325847969769,
"eval_recall_micro": 0.8472456098147703,
"eval_runtime": 8.4589,
"eval_samples_per_second": 491.435,
"eval_steps_per_second": 61.474,
"step": 15000
},
{
"epoch": 1.7653758542141231,
"grad_norm": 65.16443634033203,
"learning_rate": 5.484703504533721e-05,
"loss": 0.4093,
"step": 15500
},
{
"epoch": 1.7653758542141231,
"eval_accuracy": 0.8116430117873467,
"eval_f1_macro": 0.7700885656504642,
"eval_f1_micro": 0.8116430117873467,
"eval_loss": 1.1932649612426758,
"eval_precision_macro": 0.8232095109547221,
"eval_precision_micro": 0.8116430117873467,
"eval_recall_macro": 0.775800293436143,
"eval_recall_micro": 0.8116430117873467,
"eval_runtime": 8.4735,
"eval_samples_per_second": 490.589,
"eval_steps_per_second": 61.368,
"step": 15500
},
{
"epoch": 1.8223234624145785,
"grad_norm": 15.238636016845703,
"learning_rate": 5.2983624674875084e-05,
"loss": 0.349,
"step": 16000
},
{
"epoch": 1.8223234624145785,
"eval_accuracy": 0.8799615107048352,
"eval_f1_macro": 0.8377281287378512,
"eval_f1_micro": 0.8799615107048352,
"eval_loss": 0.7749123573303223,
"eval_precision_macro": 0.8358272736538074,
"eval_precision_micro": 0.8799615107048352,
"eval_recall_macro": 0.8723899950094214,
"eval_recall_micro": 0.8799615107048352,
"eval_runtime": 8.4617,
"eval_samples_per_second": 491.272,
"eval_steps_per_second": 61.453,
"step": 16000
},
{
"epoch": 1.8792710706150342,
"grad_norm": 1.931815505027771,
"learning_rate": 5.108815513328386e-05,
"loss": 0.3333,
"step": 16500
},
{
"epoch": 1.8792710706150342,
"eval_accuracy": 0.8799615107048352,
"eval_f1_macro": 0.851689807067872,
"eval_f1_micro": 0.8799615107048352,
"eval_loss": 0.6482954025268555,
"eval_precision_macro": 0.8439980638748368,
"eval_precision_micro": 0.8799615107048352,
"eval_recall_macro": 0.8709009740944947,
"eval_recall_micro": 0.8799615107048352,
"eval_runtime": 8.4707,
"eval_samples_per_second": 490.751,
"eval_steps_per_second": 61.388,
"step": 16500
},
{
"epoch": 1.9362186788154898,
"grad_norm": 14.734030723571777,
"learning_rate": 4.916530671469754e-05,
"loss": 0.3449,
"step": 17000
},
{
"epoch": 1.9362186788154898,
"eval_accuracy": 0.7890305508780371,
"eval_f1_macro": 0.7375164837600725,
"eval_f1_micro": 0.7890305508780371,
"eval_loss": 1.2130000591278076,
"eval_precision_macro": 0.7696829158091395,
"eval_precision_micro": 0.7890305508780371,
"eval_recall_macro": 0.7545432686041814,
"eval_recall_micro": 0.7890305508780371,
"eval_runtime": 8.448,
"eval_samples_per_second": 492.07,
"eval_steps_per_second": 61.553,
"step": 17000
},
{
"epoch": 1.9931662870159452,
"grad_norm": 4.0362091064453125,
"learning_rate": 4.7219827317183907e-05,
"loss": 0.2982,
"step": 17500
},
{
"epoch": 1.9931662870159452,
"eval_accuracy": 0.8599951888381044,
"eval_f1_macro": 0.8354955249556116,
"eval_f1_micro": 0.8599951888381044,
"eval_loss": 0.8803524374961853,
"eval_precision_macro": 0.8342493543222153,
"eval_precision_micro": 0.8599951888381044,
"eval_recall_macro": 0.8556466165427711,
"eval_recall_micro": 0.8599951888381044,
"eval_runtime": 8.4723,
"eval_samples_per_second": 490.66,
"eval_steps_per_second": 61.377,
"step": 17500
},
{
"epoch": 2.050113895216401,
"grad_norm": 0.7534123659133911,
"learning_rate": 4.525652071923279e-05,
"loss": 0.2348,
"step": 18000
},
{
"epoch": 2.050113895216401,
"eval_accuracy": 0.8448400288669714,
"eval_f1_macro": 0.8190720986060728,
"eval_f1_micro": 0.8448400288669714,
"eval_loss": 1.1245763301849365,
"eval_precision_macro": 0.8337767355676724,
"eval_precision_micro": 0.8448400288669714,
"eval_recall_macro": 0.8557743090961959,
"eval_recall_micro": 0.8448400288669714,
"eval_runtime": 8.4721,
"eval_samples_per_second": 490.667,
"eval_steps_per_second": 61.378,
"step": 18000
},
{
"epoch": 2.1070615034168565,
"grad_norm": 1.4927374124526978,
"learning_rate": 4.328023471826429e-05,
"loss": 0.2299,
"step": 18500
},
{
"epoch": 2.1070615034168565,
"eval_accuracy": 0.8708203031031995,
"eval_f1_macro": 0.8303241771039817,
"eval_f1_micro": 0.8708203031031995,
"eval_loss": 0.8329204320907593,
"eval_precision_macro": 0.83267401976066,
"eval_precision_micro": 0.8708203031031995,
"eval_recall_macro": 0.8610797711013763,
"eval_recall_micro": 0.8708203031031995,
"eval_runtime": 8.4578,
"eval_samples_per_second": 491.498,
"eval_steps_per_second": 61.482,
"step": 18500
},
{
"epoch": 2.164009111617312,
"grad_norm": 1.7383619546890259,
"learning_rate": 4.129584916044555e-05,
"loss": 0.2468,
"step": 19000
},
{
"epoch": 2.164009111617312,
"eval_accuracy": 0.8773153716622565,
"eval_f1_macro": 0.846177762458016,
"eval_f1_micro": 0.8773153716622565,
"eval_loss": 0.7664415836334229,
"eval_precision_macro": 0.8368864405451267,
"eval_precision_micro": 0.8773153716622565,
"eval_recall_macro": 0.8780489137762446,
"eval_recall_micro": 0.8773153716622565,
"eval_runtime": 8.4381,
"eval_samples_per_second": 492.645,
"eval_steps_per_second": 61.625,
"step": 19000
},
{
"epoch": 2.2209567198177678,
"grad_norm": 400.013916015625,
"learning_rate": 3.930826389137262e-05,
"loss": 0.1899,
"step": 19500
},
{
"epoch": 2.2209567198177678,
"eval_accuracy": 0.8862160211691124,
"eval_f1_macro": 0.8558563382552302,
"eval_f1_micro": 0.8862160211691124,
"eval_loss": 0.7495226263999939,
"eval_precision_macro": 0.8555231307289006,
"eval_precision_micro": 0.8862160211691124,
"eval_recall_macro": 0.8758801511510593,
"eval_recall_micro": 0.8862160211691124,
"eval_runtime": 8.4445,
"eval_samples_per_second": 492.274,
"eval_steps_per_second": 61.579,
"step": 19500
},
{
"epoch": 2.277904328018223,
"grad_norm": 2.0449647903442383,
"learning_rate": 3.732635344608829e-05,
"loss": 0.1977,
"step": 20000
},
{
"epoch": 2.277904328018223,
"eval_accuracy": 0.8551840269425066,
"eval_f1_macro": 0.8198359165864053,
"eval_f1_micro": 0.8551840269425066,
"eval_loss": 1.0149922370910645,
"eval_precision_macro": 0.8328051392132652,
"eval_precision_micro": 0.8551840269425066,
"eval_recall_macro": 0.853902169918103,
"eval_recall_micro": 0.8551840269425066,
"eval_runtime": 8.4355,
"eval_samples_per_second": 492.798,
"eval_steps_per_second": 61.644,
"step": 20000
},
{
"epoch": 2.334851936218679,
"grad_norm": 0.7749654650688171,
"learning_rate": 3.534706966798757e-05,
"loss": 0.2314,
"step": 20500
},
{
"epoch": 2.334851936218679,
"eval_accuracy": 0.8948761125811884,
"eval_f1_macro": 0.8659539030629905,
"eval_f1_micro": 0.8948761125811884,
"eval_loss": 0.6898870468139648,
"eval_precision_macro": 0.8564830162508421,
"eval_precision_micro": 0.8948761125811884,
"eval_recall_macro": 0.8978688795664334,
"eval_recall_micro": 0.8948761125811884,
"eval_runtime": 8.4412,
"eval_samples_per_second": 492.466,
"eval_steps_per_second": 61.603,
"step": 20500
},
{
"epoch": 2.3917995444191344,
"grad_norm": 0.007438243832439184,
"learning_rate": 3.337927490728384e-05,
"loss": 0.203,
"step": 21000
},
{
"epoch": 2.3917995444191344,
"eval_accuracy": 0.8984844840028867,
"eval_f1_macro": 0.8654367599963082,
"eval_f1_micro": 0.8984844840028867,
"eval_loss": 0.6225568652153015,
"eval_precision_macro": 0.8710584795534867,
"eval_precision_micro": 0.8984844840028867,
"eval_recall_macro": 0.8907316535034207,
"eval_recall_micro": 0.8984844840028867,
"eval_runtime": 8.4582,
"eval_samples_per_second": 491.477,
"eval_steps_per_second": 61.479,
"step": 21000
},
{
"epoch": 2.44874715261959,
"grad_norm": 1.0429240465164185,
"learning_rate": 3.142782804357047e-05,
"loss": 0.1784,
"step": 21500
},
{
"epoch": 2.44874715261959,
"eval_accuracy": 0.8806831849891749,
"eval_f1_macro": 0.8516903935082014,
"eval_f1_micro": 0.8806831849891749,
"eval_loss": 0.8446455001831055,
"eval_precision_macro": 0.86002376045135,
"eval_precision_micro": 0.8806831849891749,
"eval_recall_macro": 0.8768325982997476,
"eval_recall_micro": 0.8806831849891749,
"eval_runtime": 8.4659,
"eval_samples_per_second": 491.026,
"eval_steps_per_second": 61.423,
"step": 21500
},
{
"epoch": 2.5056947608200457,
"grad_norm": 89.87359619140625,
"learning_rate": 2.9497547590207118e-05,
"loss": 0.2044,
"step": 22000
},
{
"epoch": 2.5056947608200457,
"eval_accuracy": 0.8556651431320664,
"eval_f1_macro": 0.8466413367690651,
"eval_f1_micro": 0.8556651431320664,
"eval_loss": 0.9901952743530273,
"eval_precision_macro": 0.8512325160929325,
"eval_precision_micro": 0.8556651431320664,
"eval_recall_macro": 0.875509109034969,
"eval_recall_micro": 0.8556651431320664,
"eval_runtime": 8.5077,
"eval_samples_per_second": 488.617,
"eval_steps_per_second": 61.121,
"step": 22000
},
{
"epoch": 2.562642369020501,
"grad_norm": 0.013815644197165966,
"learning_rate": 2.759319979644478e-05,
"loss": 0.1914,
"step": 22500
},
{
"epoch": 2.562642369020501,
"eval_accuracy": 0.8927110897281694,
"eval_f1_macro": 0.8644517343056812,
"eval_f1_micro": 0.8927110897281694,
"eval_loss": 0.7158553004264832,
"eval_precision_macro": 0.8493787111870335,
"eval_precision_micro": 0.8927110897281694,
"eval_recall_macro": 0.898066885887026,
"eval_recall_micro": 0.8927110897281694,
"eval_runtime": 8.4749,
"eval_samples_per_second": 490.509,
"eval_steps_per_second": 61.358,
"step": 22500
},
{
"epoch": 2.619589977220957,
"grad_norm": 4.245390892028809,
"learning_rate": 2.5719486878601176e-05,
"loss": 0.1639,
"step": 23000
},
{
"epoch": 2.619589977220957,
"eval_accuracy": 0.8864565792638922,
"eval_f1_macro": 0.8458899447458572,
"eval_f1_micro": 0.8864565792638922,
"eval_loss": 0.729199230670929,
"eval_precision_macro": 0.8401968974647541,
"eval_precision_micro": 0.8864565792638922,
"eval_recall_macro": 0.8805776786360812,
"eval_recall_micro": 0.8864565792638922,
"eval_runtime": 8.4586,
"eval_samples_per_second": 491.451,
"eval_steps_per_second": 61.476,
"step": 23000
},
{
"epoch": 2.6765375854214124,
"grad_norm": 1.427230715751648,
"learning_rate": 2.3881035409346452e-05,
"loss": 0.218,
"step": 23500
},
{
"epoch": 2.6765375854214124,
"eval_accuracy": 0.8965600192446476,
"eval_f1_macro": 0.8710100150868682,
"eval_f1_micro": 0.8965600192446476,
"eval_loss": 0.6507639288902283,
"eval_precision_macro": 0.8627987859589317,
"eval_precision_micro": 0.8965600192446476,
"eval_recall_macro": 0.8951286903819264,
"eval_recall_micro": 0.8965600192446476,
"eval_runtime": 8.4672,
"eval_samples_per_second": 490.955,
"eval_steps_per_second": 61.414,
"step": 23500
},
{
"epoch": 2.733485193621868,
"grad_norm": 0.07296980172395706,
"learning_rate": 2.208238489376805e-05,
"loss": 0.1723,
"step": 24000
},
{
"epoch": 2.733485193621868,
"eval_accuracy": 0.9023334135193649,
"eval_f1_macro": 0.8816239296246752,
"eval_f1_micro": 0.9023334135193649,
"eval_loss": 0.6424487233161926,
"eval_precision_macro": 0.8835852425011356,
"eval_precision_micro": 0.9023334135193649,
"eval_recall_macro": 0.8920920740693067,
"eval_recall_micro": 0.9023334135193649,
"eval_runtime": 8.4613,
"eval_samples_per_second": 491.296,
"eval_steps_per_second": 61.456,
"step": 24000
},
{
"epoch": 2.7904328018223232,
"grad_norm": 125.23475646972656,
"learning_rate": 2.0331438324793375e-05,
"loss": 0.1682,
"step": 24500
},
{
"epoch": 2.7904328018223232,
"eval_accuracy": 0.8972816935289872,
"eval_f1_macro": 0.8822737330613647,
"eval_f1_micro": 0.8972816935289872,
"eval_loss": 0.6842340230941772,
"eval_precision_macro": 0.8827282408108892,
"eval_precision_micro": 0.8972816935289872,
"eval_recall_macro": 0.8944889961638287,
"eval_recall_micro": 0.8972816935289872,
"eval_runtime": 8.4764,
"eval_samples_per_second": 490.419,
"eval_steps_per_second": 61.347,
"step": 24500
},
{
"epoch": 2.847380410022779,
"grad_norm": 0.5098503828048706,
"learning_rate": 1.8628863347570347e-05,
"loss": 0.1441,
"step": 25000
},
{
"epoch": 2.847380410022779,
"eval_accuracy": 0.8948761125811884,
"eval_f1_macro": 0.8736085811230334,
"eval_f1_micro": 0.8948761125811884,
"eval_loss": 0.7110973596572876,
"eval_precision_macro": 0.8674915707955687,
"eval_precision_micro": 0.8948761125811884,
"eval_recall_macro": 0.901699859992757,
"eval_recall_micro": 0.8948761125811884,
"eval_runtime": 8.4537,
"eval_samples_per_second": 491.739,
"eval_steps_per_second": 61.512,
"step": 25000
},
{
"epoch": 2.9043280182232345,
"grad_norm": 0.8350435495376587,
"learning_rate": 1.6975596030661532e-05,
"loss": 0.1625,
"step": 25500
},
{
"epoch": 2.9043280182232345,
"eval_accuracy": 0.9030550878037046,
"eval_f1_macro": 0.8779275841497265,
"eval_f1_micro": 0.9030550878037046,
"eval_loss": 0.6505866050720215,
"eval_precision_macro": 0.8690908487688577,
"eval_precision_micro": 0.9030550878037046,
"eval_recall_macro": 0.9045801541095179,
"eval_recall_micro": 0.9030550878037046,
"eval_runtime": 8.4694,
"eval_samples_per_second": 490.823,
"eval_steps_per_second": 61.397,
"step": 25500
},
{
"epoch": 2.9612756264236904,
"grad_norm": 0.08747211843729019,
"learning_rate": 1.537918058104578e-05,
"loss": 0.1494,
"step": 26000
},
{
"epoch": 2.9612756264236904,
"eval_accuracy": 0.9090690401732018,
"eval_f1_macro": 0.8830141025055214,
"eval_f1_micro": 0.9090690401732018,
"eval_loss": 0.5693129301071167,
"eval_precision_macro": 0.8816974949312945,
"eval_precision_micro": 0.9090690401732018,
"eval_recall_macro": 0.9015040014659215,
"eval_recall_micro": 0.9090690401732018,
"eval_runtime": 8.5221,
"eval_samples_per_second": 487.793,
"eval_steps_per_second": 61.018,
"step": 26000
},
{
"epoch": 3.0182232346241458,
"grad_norm": 2.089958906173706,
"learning_rate": 1.3843558868376073e-05,
"loss": 0.1306,
"step": 26500
},
{
"epoch": 3.0182232346241458,
"eval_accuracy": 0.9179696896800578,
"eval_f1_macro": 0.8947539710239703,
"eval_f1_micro": 0.9179696896800578,
"eval_loss": 0.534771203994751,
"eval_precision_macro": 0.8923066730381018,
"eval_precision_micro": 0.9179696896800578,
"eval_recall_macro": 0.9121332598176911,
"eval_recall_micro": 0.9179696896800578,
"eval_runtime": 8.5905,
"eval_samples_per_second": 483.909,
"eval_steps_per_second": 60.532,
"step": 26500
},
{
"epoch": 3.075170842824601,
"grad_norm": 0.0017847216222435236,
"learning_rate": 1.2372522650386443e-05,
"loss": 0.0929,
"step": 27000
},
{
"epoch": 3.075170842824601,
"eval_accuracy": 0.9162857830165985,
"eval_f1_macro": 0.8899287169854497,
"eval_f1_micro": 0.9162857830165985,
"eval_loss": 0.5843378305435181,
"eval_precision_macro": 0.8796301981250076,
"eval_precision_micro": 0.9162857830165985,
"eval_recall_macro": 0.9152639619070458,
"eval_recall_micro": 0.9162857830165985,
"eval_runtime": 8.4184,
"eval_samples_per_second": 493.798,
"eval_steps_per_second": 61.769,
"step": 27000
},
{
"epoch": 3.132118451025057,
"grad_norm": 0.7512030601501465,
"learning_rate": 1.096970421028209e-05,
"loss": 0.1351,
"step": 27500
},
{
"epoch": 3.132118451025057,
"eval_accuracy": 0.9023334135193649,
"eval_f1_macro": 0.8733006387364801,
"eval_f1_micro": 0.9023334135193649,
"eval_loss": 0.6913191080093384,
"eval_precision_macro": 0.8611014409270759,
"eval_precision_micro": 0.9023334135193649,
"eval_recall_macro": 0.9038150903313851,
"eval_recall_micro": 0.9023334135193649,
"eval_runtime": 8.4379,
"eval_samples_per_second": 492.656,
"eval_steps_per_second": 61.626,
"step": 27500
},
{
"epoch": 3.1890660592255125,
"grad_norm": 0.005041371565312147,
"learning_rate": 9.638567387904402e-06,
"loss": 0.0907,
"step": 28000
},
{
"epoch": 3.1890660592255125,
"eval_accuracy": 0.9206158287226365,
"eval_f1_macro": 0.8974407788451421,
"eval_f1_micro": 0.9206158287226365,
"eval_loss": 0.5801523327827454,
"eval_precision_macro": 0.8882950671220116,
"eval_precision_micro": 0.9206158287226365,
"eval_recall_macro": 0.9136820970599899,
"eval_recall_micro": 0.9206158287226365,
"eval_runtime": 8.4725,
"eval_samples_per_second": 490.649,
"eval_steps_per_second": 61.375,
"step": 28000
},
{
"epoch": 3.2460136674259683,
"grad_norm": 0.6128404140472412,
"learning_rate": 8.382399026816216e-06,
"loss": 0.117,
"step": 28500
},
{
"epoch": 3.2460136674259683,
"eval_accuracy": 0.9105123887418811,
"eval_f1_macro": 0.8877368293575257,
"eval_f1_micro": 0.9105123887418811,
"eval_loss": 0.6602935791015625,
"eval_precision_macro": 0.8839485939859522,
"eval_precision_micro": 0.9105123887418811,
"eval_recall_macro": 0.905732720534913,
"eval_recall_micro": 0.9105123887418811,
"eval_runtime": 8.482,
"eval_samples_per_second": 490.097,
"eval_steps_per_second": 61.306,
"step": 28500
},
{
"epoch": 3.3029612756264237,
"grad_norm": 0.0018906695768237114,
"learning_rate": 7.2065771743884275e-06,
"loss": 0.0986,
"step": 29000
},
{
"epoch": 3.3029612756264237,
"eval_accuracy": 0.9105123887418811,
"eval_f1_macro": 0.887646044144043,
"eval_f1_micro": 0.9105123887418811,
"eval_loss": 0.6138319969177246,
"eval_precision_macro": 0.887907584612917,
"eval_precision_micro": 0.9105123887418811,
"eval_recall_macro": 0.9029879731406512,
"eval_recall_micro": 0.9105123887418811,
"eval_runtime": 8.4603,
"eval_samples_per_second": 491.355,
"eval_steps_per_second": 61.464,
"step": 29000
},
{
"epoch": 3.359908883826879,
"grad_norm": 1.5009195804595947,
"learning_rate": 6.109293429462298e-06,
"loss": 0.1196,
"step": 29500
},
{
"epoch": 3.359908883826879,
"eval_accuracy": 0.9131585277844599,
"eval_f1_macro": 0.8922838073892356,
"eval_f1_micro": 0.9131585277844599,
"eval_loss": 0.6528560519218445,
"eval_precision_macro": 0.8910787251464412,
"eval_precision_micro": 0.9131585277844599,
"eval_recall_macro": 0.9099849910959319,
"eval_recall_micro": 0.9131585277844599,
"eval_runtime": 8.4579,
"eval_samples_per_second": 491.492,
"eval_steps_per_second": 61.481,
"step": 29500
},
{
"epoch": 3.416856492027335,
"grad_norm": 2.331648349761963,
"learning_rate": 5.0956926304652455e-06,
"loss": 0.1056,
"step": 30000
},
{
"epoch": 3.416856492027335,
"eval_accuracy": 0.9131585277844599,
"eval_f1_macro": 0.8919235957427174,
"eval_f1_micro": 0.9131585277844599,
"eval_loss": 0.6215759515762329,
"eval_precision_macro": 0.884609220404625,
"eval_precision_micro": 0.9131585277844599,
"eval_recall_macro": 0.9135941017288176,
"eval_recall_micro": 0.9131585277844599,
"eval_runtime": 8.4442,
"eval_samples_per_second": 492.29,
"eval_steps_per_second": 61.581,
"step": 30000
},
{
"epoch": 3.4738041002277904,
"grad_norm": 148.2794189453125,
"learning_rate": 4.168277560886878e-06,
"loss": 0.1106,
"step": 30500
},
{
"epoch": 3.4738041002277904,
"eval_accuracy": 0.9201347125330768,
"eval_f1_macro": 0.891345440347701,
"eval_f1_micro": 0.9201347125330768,
"eval_loss": 0.5669803023338318,
"eval_precision_macro": 0.8817511779041877,
"eval_precision_micro": 0.9201347125330768,
"eval_recall_macro": 0.9129521474780394,
"eval_recall_micro": 0.9201347125330768,
"eval_runtime": 8.4316,
"eval_samples_per_second": 493.024,
"eval_steps_per_second": 61.672,
"step": 30500
},
{
"epoch": 3.5307517084282463,
"grad_norm": 4.748849868774414,
"learning_rate": 3.3293381943799983e-06,
"loss": 0.0953,
"step": 31000
},
{
"epoch": 3.5307517084282463,
"eval_accuracy": 0.9189319220591773,
"eval_f1_macro": 0.8934567988898604,
"eval_f1_micro": 0.9189319220591773,
"eval_loss": 0.569622278213501,
"eval_precision_macro": 0.8825638399763271,
"eval_precision_micro": 0.9189319220591773,
"eval_recall_macro": 0.9187150647269768,
"eval_recall_micro": 0.9189319220591773,
"eval_runtime": 8.4421,
"eval_samples_per_second": 492.411,
"eval_steps_per_second": 61.596,
"step": 31000
},
{
"epoch": 3.5876993166287017,
"grad_norm": 1.284857153892517,
"learning_rate": 2.580946040356764e-06,
"loss": 0.0989,
"step": 31500
},
{
"epoch": 3.5876993166287017,
"eval_accuracy": 0.9194130382487371,
"eval_f1_macro": 0.8981523298720768,
"eval_f1_micro": 0.9194130382487371,
"eval_loss": 0.5652771592140198,
"eval_precision_macro": 0.8854580096444836,
"eval_precision_micro": 0.9194130382487371,
"eval_recall_macro": 0.9219999897271275,
"eval_recall_micro": 0.9194130382487371,
"eval_runtime": 8.43,
"eval_samples_per_second": 493.122,
"eval_steps_per_second": 61.685,
"step": 31500
},
{
"epoch": 3.644646924829157,
"grad_norm": 1.4339042901992798,
"learning_rate": 1.9249490290167914e-06,
"loss": 0.0989,
"step": 32000
},
{
"epoch": 3.644646924829157,
"eval_accuracy": 0.9138802020687996,
"eval_f1_macro": 0.89412218234686,
"eval_f1_micro": 0.9138802020687996,
"eval_loss": 0.6019502282142639,
"eval_precision_macro": 0.882568682117879,
"eval_precision_micro": 0.9138802020687996,
"eval_recall_macro": 0.9175339639156622,
"eval_recall_micro": 0.9138802020687996,
"eval_runtime": 8.4297,
"eval_samples_per_second": 493.138,
"eval_steps_per_second": 61.687,
"step": 32000
},
{
"epoch": 3.7015945330296125,
"grad_norm": 0.7662363648414612,
"learning_rate": 1.3629669484372722e-06,
"loss": 0.0876,
"step": 32500
},
{
"epoch": 3.7015945330296125,
"eval_accuracy": 0.9167668992061583,
"eval_f1_macro": 0.8960433945398601,
"eval_f1_micro": 0.9167668992061583,
"eval_loss": 0.578229546546936,
"eval_precision_macro": 0.8823539415499122,
"eval_precision_micro": 0.9167668992061583,
"eval_recall_macro": 0.9211442367709737,
"eval_recall_micro": 0.9167668992061583,
"eval_runtime": 8.4419,
"eval_samples_per_second": 492.426,
"eval_steps_per_second": 61.598,
"step": 32500
},
{
"epoch": 3.7585421412300684,
"grad_norm": 0.8838233947753906,
"learning_rate": 8.963874449915156e-07,
"loss": 0.1004,
"step": 33000
},
{
"epoch": 3.7585421412300684,
"eval_accuracy": 0.9186913639643974,
"eval_f1_macro": 0.8974164477796313,
"eval_f1_micro": 0.9186913639643974,
"eval_loss": 0.5638399720191956,
"eval_precision_macro": 0.8850042534249818,
"eval_precision_micro": 0.9186913639643974,
"eval_recall_macro": 0.9204573653096998,
"eval_recall_micro": 0.9186913639643974,
"eval_runtime": 8.4287,
"eval_samples_per_second": 493.198,
"eval_steps_per_second": 61.694,
"step": 33000
},
{
"epoch": 3.8154897494305238,
"grad_norm": 0.8862270712852478,
"learning_rate": 5.263625969720654e-07,
"loss": 0.1163,
"step": 33500
},
{
"epoch": 3.8154897494305238,
"eval_accuracy": 0.9189319220591773,
"eval_f1_macro": 0.8978554181419179,
"eval_f1_micro": 0.9189319220591773,
"eval_loss": 0.5575982332229614,
"eval_precision_macro": 0.8842809438001717,
"eval_precision_micro": 0.9189319220591773,
"eval_recall_macro": 0.9207880969765441,
"eval_recall_micro": 0.9189319220591773,
"eval_runtime": 8.4411,
"eval_samples_per_second": 492.47,
"eval_steps_per_second": 61.603,
"step": 33500
},
{
"epoch": 3.8724373576309796,
"grad_norm": 0.0018380646361038089,
"learning_rate": 2.5380606987847725e-07,
"loss": 0.1014,
"step": 34000
},
{
"epoch": 3.8724373576309796,
"eval_accuracy": 0.9184508058696175,
"eval_f1_macro": 0.8970972466165426,
"eval_f1_micro": 0.9184508058696175,
"eval_loss": 0.5561444759368896,
"eval_precision_macro": 0.8836350789167062,
"eval_precision_micro": 0.9184508058696175,
"eval_recall_macro": 0.9206973516595941,
"eval_recall_micro": 0.9184508058696175,
"eval_runtime": 8.432,
"eval_samples_per_second": 493.002,
"eval_steps_per_second": 61.67,
"step": 34000
},
{
"epoch": 3.929384965831435,
"grad_norm": 1.3534623384475708,
"learning_rate": 7.939086039413291e-08,
"loss": 0.0948,
"step": 34500
},
{
"epoch": 3.929384965831435,
"eval_accuracy": 0.9191724801539571,
"eval_f1_macro": 0.8982632597992836,
"eval_f1_micro": 0.9191724801539571,
"eval_loss": 0.5567488074302673,
"eval_precision_macro": 0.8854065999210586,
"eval_precision_micro": 0.9191724801539571,
"eval_recall_macro": 0.9212323971384597,
"eval_recall_micro": 0.9191724801539571,
"eval_runtime": 8.4196,
"eval_samples_per_second": 493.73,
"eval_steps_per_second": 61.761,
"step": 34500
},
{
"epoch": 3.9863325740318905,
"grad_norm": 0.00499533349648118,
"learning_rate": 3.6007811694149795e-09,
"loss": 0.0915,
"step": 35000
},
{
"epoch": 3.9863325740318905,
"eval_accuracy": 0.9189319220591773,
"eval_f1_macro": 0.8979067522497397,
"eval_f1_micro": 0.9189319220591773,
"eval_loss": 0.5568965673446655,
"eval_precision_macro": 0.8849842519873105,
"eval_precision_micro": 0.9189319220591773,
"eval_recall_macro": 0.9210492469553095,
"eval_recall_micro": 0.9189319220591773,
"eval_runtime": 8.4437,
"eval_samples_per_second": 492.321,
"eval_steps_per_second": 61.585,
"step": 35000
}
],
"logging_steps": 500,
"max_steps": 35120,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.368366329856e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}