square_run_age_gender / trainer_state.json
corranm's picture
End of training
a605c7a verified
{
"best_metric": 1.1161140203475952,
"best_model_checkpoint": "square_run_age_gender/checkpoint-261",
"epoch": 35.0,
"eval_steps": 500,
"global_step": 1015,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06896551724137931,
"grad_norm": 13.471597671508789,
"learning_rate": 1.96078431372549e-06,
"loss": 1.979,
"step": 2
},
{
"epoch": 0.13793103448275862,
"grad_norm": 6.632089138031006,
"learning_rate": 3.92156862745098e-06,
"loss": 1.8207,
"step": 4
},
{
"epoch": 0.20689655172413793,
"grad_norm": 10.463000297546387,
"learning_rate": 5.882352941176471e-06,
"loss": 1.9296,
"step": 6
},
{
"epoch": 0.27586206896551724,
"grad_norm": 7.69188117980957,
"learning_rate": 7.84313725490196e-06,
"loss": 1.9443,
"step": 8
},
{
"epoch": 0.3448275862068966,
"grad_norm": 10.529653549194336,
"learning_rate": 9.803921568627451e-06,
"loss": 1.9129,
"step": 10
},
{
"epoch": 0.41379310344827586,
"grad_norm": 9.45837116241455,
"learning_rate": 1.1764705882352942e-05,
"loss": 2.0835,
"step": 12
},
{
"epoch": 0.4827586206896552,
"grad_norm": 6.899144649505615,
"learning_rate": 1.3725490196078432e-05,
"loss": 1.905,
"step": 14
},
{
"epoch": 0.5517241379310345,
"grad_norm": 6.275972843170166,
"learning_rate": 1.568627450980392e-05,
"loss": 2.0193,
"step": 16
},
{
"epoch": 0.6206896551724138,
"grad_norm": 7.337122440338135,
"learning_rate": 1.7647058823529414e-05,
"loss": 1.7395,
"step": 18
},
{
"epoch": 0.6896551724137931,
"grad_norm": 8.741500854492188,
"learning_rate": 1.9607843137254903e-05,
"loss": 1.8184,
"step": 20
},
{
"epoch": 0.7586206896551724,
"grad_norm": 5.90638542175293,
"learning_rate": 2.1568627450980395e-05,
"loss": 1.8613,
"step": 22
},
{
"epoch": 0.8275862068965517,
"grad_norm": 5.549325942993164,
"learning_rate": 2.3529411764705884e-05,
"loss": 1.8278,
"step": 24
},
{
"epoch": 0.896551724137931,
"grad_norm": 9.570854187011719,
"learning_rate": 2.5490196078431373e-05,
"loss": 1.8713,
"step": 26
},
{
"epoch": 0.9655172413793104,
"grad_norm": 5.850081920623779,
"learning_rate": 2.7450980392156865e-05,
"loss": 1.8891,
"step": 28
},
{
"epoch": 1.0,
"eval_accuracy": 0.25757575757575757,
"eval_f1_macro": 0.17420690764344018,
"eval_f1_micro": 0.25757575757575757,
"eval_f1_weighted": 0.2100702351405982,
"eval_loss": 1.867130160331726,
"eval_precision_macro": 0.1681240063593005,
"eval_precision_micro": 0.25757575757575757,
"eval_precision_weighted": 0.20448824492942141,
"eval_recall_macro": 0.21421012849584278,
"eval_recall_micro": 0.25757575757575757,
"eval_recall_weighted": 0.25757575757575757,
"eval_runtime": 2.1698,
"eval_samples_per_second": 60.835,
"eval_steps_per_second": 7.835,
"step": 29
},
{
"epoch": 1.0344827586206897,
"grad_norm": 8.4002103805542,
"learning_rate": 2.9411764705882354e-05,
"loss": 1.8553,
"step": 30
},
{
"epoch": 1.103448275862069,
"grad_norm": 6.214775085449219,
"learning_rate": 3.137254901960784e-05,
"loss": 1.8232,
"step": 32
},
{
"epoch": 1.1724137931034484,
"grad_norm": 5.48581600189209,
"learning_rate": 3.3333333333333335e-05,
"loss": 1.9136,
"step": 34
},
{
"epoch": 1.2413793103448276,
"grad_norm": 3.806295871734619,
"learning_rate": 3.529411764705883e-05,
"loss": 1.9254,
"step": 36
},
{
"epoch": 1.3103448275862069,
"grad_norm": 5.554256439208984,
"learning_rate": 3.725490196078432e-05,
"loss": 1.8243,
"step": 38
},
{
"epoch": 1.3793103448275863,
"grad_norm": 7.334174156188965,
"learning_rate": 3.9215686274509805e-05,
"loss": 1.9214,
"step": 40
},
{
"epoch": 1.4482758620689655,
"grad_norm": 4.776826858520508,
"learning_rate": 4.11764705882353e-05,
"loss": 1.8654,
"step": 42
},
{
"epoch": 1.5172413793103448,
"grad_norm": 10.249964714050293,
"learning_rate": 4.313725490196079e-05,
"loss": 1.9152,
"step": 44
},
{
"epoch": 1.5862068965517242,
"grad_norm": 5.083812713623047,
"learning_rate": 4.5098039215686275e-05,
"loss": 1.9904,
"step": 46
},
{
"epoch": 1.6551724137931034,
"grad_norm": 4.7975029945373535,
"learning_rate": 4.705882352941177e-05,
"loss": 1.839,
"step": 48
},
{
"epoch": 1.7241379310344827,
"grad_norm": 4.822813987731934,
"learning_rate": 4.901960784313725e-05,
"loss": 1.9303,
"step": 50
},
{
"epoch": 1.793103448275862,
"grad_norm": 6.845904350280762,
"learning_rate": 5.0980392156862745e-05,
"loss": 1.8255,
"step": 52
},
{
"epoch": 1.8620689655172413,
"grad_norm": 7.084125518798828,
"learning_rate": 5.294117647058824e-05,
"loss": 1.871,
"step": 54
},
{
"epoch": 1.9310344827586206,
"grad_norm": 8.454116821289062,
"learning_rate": 5.490196078431373e-05,
"loss": 1.817,
"step": 56
},
{
"epoch": 2.0,
"grad_norm": 3.273526430130005,
"learning_rate": 5.6862745098039215e-05,
"loss": 1.8327,
"step": 58
},
{
"epoch": 2.0,
"eval_accuracy": 0.3181818181818182,
"eval_f1_macro": 0.15698350488823806,
"eval_f1_micro": 0.3181818181818182,
"eval_f1_weighted": 0.19373566841158035,
"eval_loss": 1.8123832941055298,
"eval_precision_macro": 0.13350340136054423,
"eval_precision_micro": 0.3181818181818182,
"eval_precision_weighted": 0.16110209235209236,
"eval_recall_macro": 0.25083144368858656,
"eval_recall_micro": 0.3181818181818182,
"eval_recall_weighted": 0.3181818181818182,
"eval_runtime": 2.1978,
"eval_samples_per_second": 60.059,
"eval_steps_per_second": 7.735,
"step": 58
},
{
"epoch": 2.0689655172413794,
"grad_norm": 7.48805570602417,
"learning_rate": 5.882352941176471e-05,
"loss": 1.875,
"step": 60
},
{
"epoch": 2.1379310344827585,
"grad_norm": 3.892385721206665,
"learning_rate": 6.078431372549019e-05,
"loss": 1.7235,
"step": 62
},
{
"epoch": 2.206896551724138,
"grad_norm": 8.482718467712402,
"learning_rate": 6.274509803921569e-05,
"loss": 1.8003,
"step": 64
},
{
"epoch": 2.2758620689655173,
"grad_norm": 13.920659065246582,
"learning_rate": 6.470588235294118e-05,
"loss": 1.984,
"step": 66
},
{
"epoch": 2.344827586206897,
"grad_norm": 7.427146911621094,
"learning_rate": 6.666666666666667e-05,
"loss": 1.7298,
"step": 68
},
{
"epoch": 2.413793103448276,
"grad_norm": 9.012772560119629,
"learning_rate": 6.862745098039216e-05,
"loss": 1.8282,
"step": 70
},
{
"epoch": 2.4827586206896552,
"grad_norm": 8.024530410766602,
"learning_rate": 7.058823529411765e-05,
"loss": 1.8988,
"step": 72
},
{
"epoch": 2.5517241379310347,
"grad_norm": 6.818090438842773,
"learning_rate": 7.254901960784314e-05,
"loss": 1.6532,
"step": 74
},
{
"epoch": 2.6206896551724137,
"grad_norm": 10.93878173828125,
"learning_rate": 7.450980392156864e-05,
"loss": 2.0298,
"step": 76
},
{
"epoch": 2.689655172413793,
"grad_norm": 9.51241397857666,
"learning_rate": 7.647058823529411e-05,
"loss": 2.0224,
"step": 78
},
{
"epoch": 2.7586206896551726,
"grad_norm": 9.287165641784668,
"learning_rate": 7.843137254901961e-05,
"loss": 1.8984,
"step": 80
},
{
"epoch": 2.8275862068965516,
"grad_norm": 8.898433685302734,
"learning_rate": 8.039215686274511e-05,
"loss": 1.746,
"step": 82
},
{
"epoch": 2.896551724137931,
"grad_norm": 6.119718074798584,
"learning_rate": 8.23529411764706e-05,
"loss": 1.9194,
"step": 84
},
{
"epoch": 2.9655172413793105,
"grad_norm": 6.862753391265869,
"learning_rate": 8.431372549019608e-05,
"loss": 1.9127,
"step": 86
},
{
"epoch": 3.0,
"eval_accuracy": 0.3181818181818182,
"eval_f1_macro": 0.20846518382633952,
"eval_f1_micro": 0.3181818181818182,
"eval_f1_weighted": 0.25755123465797625,
"eval_loss": 1.7830312252044678,
"eval_precision_macro": 0.21283015309910777,
"eval_precision_micro": 0.3181818181818182,
"eval_precision_weighted": 0.2617880187903288,
"eval_recall_macro": 0.26250188964474674,
"eval_recall_micro": 0.3181818181818182,
"eval_recall_weighted": 0.3181818181818182,
"eval_runtime": 2.2371,
"eval_samples_per_second": 59.004,
"eval_steps_per_second": 7.599,
"step": 87
},
{
"epoch": 3.0344827586206895,
"grad_norm": 10.097410202026367,
"learning_rate": 8.627450980392158e-05,
"loss": 1.896,
"step": 88
},
{
"epoch": 3.103448275862069,
"grad_norm": 6.835166931152344,
"learning_rate": 8.823529411764706e-05,
"loss": 1.5849,
"step": 90
},
{
"epoch": 3.1724137931034484,
"grad_norm": 7.843909740447998,
"learning_rate": 9.019607843137255e-05,
"loss": 1.7373,
"step": 92
},
{
"epoch": 3.2413793103448274,
"grad_norm": 7.719568252563477,
"learning_rate": 9.215686274509804e-05,
"loss": 1.5069,
"step": 94
},
{
"epoch": 3.310344827586207,
"grad_norm": 8.885810852050781,
"learning_rate": 9.411764705882353e-05,
"loss": 1.523,
"step": 96
},
{
"epoch": 3.3793103448275863,
"grad_norm": 6.505782127380371,
"learning_rate": 9.607843137254903e-05,
"loss": 1.5585,
"step": 98
},
{
"epoch": 3.4482758620689653,
"grad_norm": 7.679609298706055,
"learning_rate": 9.80392156862745e-05,
"loss": 1.5167,
"step": 100
},
{
"epoch": 3.5172413793103448,
"grad_norm": 12.342342376708984,
"learning_rate": 0.0001,
"loss": 1.9155,
"step": 102
},
{
"epoch": 3.586206896551724,
"grad_norm": 5.502551078796387,
"learning_rate": 9.978094194961665e-05,
"loss": 1.3553,
"step": 104
},
{
"epoch": 3.655172413793103,
"grad_norm": 8.53254222869873,
"learning_rate": 9.95618838992333e-05,
"loss": 1.6346,
"step": 106
},
{
"epoch": 3.7241379310344827,
"grad_norm": 11.95875072479248,
"learning_rate": 9.934282584884996e-05,
"loss": 1.5988,
"step": 108
},
{
"epoch": 3.793103448275862,
"grad_norm": 7.445318222045898,
"learning_rate": 9.912376779846659e-05,
"loss": 1.613,
"step": 110
},
{
"epoch": 3.862068965517241,
"grad_norm": 12.528891563415527,
"learning_rate": 9.890470974808325e-05,
"loss": 1.7571,
"step": 112
},
{
"epoch": 3.9310344827586206,
"grad_norm": 9.875731468200684,
"learning_rate": 9.86856516976999e-05,
"loss": 1.53,
"step": 114
},
{
"epoch": 4.0,
"grad_norm": 6.541798114776611,
"learning_rate": 9.846659364731654e-05,
"loss": 1.4498,
"step": 116
},
{
"epoch": 4.0,
"eval_accuracy": 0.38636363636363635,
"eval_f1_macro": 0.29356560009870114,
"eval_f1_micro": 0.38636363636363635,
"eval_f1_weighted": 0.3437889854048391,
"eval_loss": 1.579649806022644,
"eval_precision_macro": 0.43422035480859006,
"eval_precision_micro": 0.38636363636363635,
"eval_precision_weighted": 0.4527406417112299,
"eval_recall_macro": 0.3179440665154951,
"eval_recall_micro": 0.38636363636363635,
"eval_recall_weighted": 0.38636363636363635,
"eval_runtime": 2.2188,
"eval_samples_per_second": 59.491,
"eval_steps_per_second": 7.662,
"step": 116
},
{
"epoch": 4.068965517241379,
"grad_norm": 11.127625465393066,
"learning_rate": 9.824753559693319e-05,
"loss": 1.5055,
"step": 118
},
{
"epoch": 4.137931034482759,
"grad_norm": 7.331289768218994,
"learning_rate": 9.802847754654983e-05,
"loss": 1.3358,
"step": 120
},
{
"epoch": 4.206896551724138,
"grad_norm": 8.787720680236816,
"learning_rate": 9.78094194961665e-05,
"loss": 1.6528,
"step": 122
},
{
"epoch": 4.275862068965517,
"grad_norm": 8.822704315185547,
"learning_rate": 9.759036144578314e-05,
"loss": 1.2185,
"step": 124
},
{
"epoch": 4.344827586206897,
"grad_norm": 7.718049049377441,
"learning_rate": 9.737130339539979e-05,
"loss": 1.376,
"step": 126
},
{
"epoch": 4.413793103448276,
"grad_norm": 6.089763641357422,
"learning_rate": 9.715224534501643e-05,
"loss": 1.2114,
"step": 128
},
{
"epoch": 4.482758620689655,
"grad_norm": 11.10245418548584,
"learning_rate": 9.693318729463309e-05,
"loss": 1.5952,
"step": 130
},
{
"epoch": 4.551724137931035,
"grad_norm": 9.193964958190918,
"learning_rate": 9.671412924424972e-05,
"loss": 1.691,
"step": 132
},
{
"epoch": 4.620689655172414,
"grad_norm": 7.160553455352783,
"learning_rate": 9.649507119386638e-05,
"loss": 1.7942,
"step": 134
},
{
"epoch": 4.689655172413794,
"grad_norm": 10.51407527923584,
"learning_rate": 9.627601314348302e-05,
"loss": 1.541,
"step": 136
},
{
"epoch": 4.758620689655173,
"grad_norm": 7.315539836883545,
"learning_rate": 9.605695509309968e-05,
"loss": 1.4463,
"step": 138
},
{
"epoch": 4.827586206896552,
"grad_norm": 5.069845676422119,
"learning_rate": 9.583789704271632e-05,
"loss": 1.2094,
"step": 140
},
{
"epoch": 4.896551724137931,
"grad_norm": 7.61802864074707,
"learning_rate": 9.561883899233297e-05,
"loss": 1.3717,
"step": 142
},
{
"epoch": 4.9655172413793105,
"grad_norm": 5.559959888458252,
"learning_rate": 9.539978094194963e-05,
"loss": 1.2166,
"step": 144
},
{
"epoch": 5.0,
"eval_accuracy": 0.4772727272727273,
"eval_f1_macro": 0.3867993464710291,
"eval_f1_micro": 0.4772727272727273,
"eval_f1_weighted": 0.4441923804921774,
"eval_loss": 1.3484843969345093,
"eval_precision_macro": 0.5067572493188256,
"eval_precision_micro": 0.4772727272727273,
"eval_precision_weighted": 0.5372770333115161,
"eval_recall_macro": 0.4076719576719577,
"eval_recall_micro": 0.4772727272727273,
"eval_recall_weighted": 0.4772727272727273,
"eval_runtime": 2.192,
"eval_samples_per_second": 60.219,
"eval_steps_per_second": 7.756,
"step": 145
},
{
"epoch": 5.0344827586206895,
"grad_norm": 6.8597941398620605,
"learning_rate": 9.518072289156626e-05,
"loss": 1.5412,
"step": 146
},
{
"epoch": 5.103448275862069,
"grad_norm": 5.627894401550293,
"learning_rate": 9.496166484118292e-05,
"loss": 1.0841,
"step": 148
},
{
"epoch": 5.172413793103448,
"grad_norm": 10.26051139831543,
"learning_rate": 9.474260679079957e-05,
"loss": 1.1118,
"step": 150
},
{
"epoch": 5.241379310344827,
"grad_norm": 7.57983922958374,
"learning_rate": 9.452354874041621e-05,
"loss": 1.1301,
"step": 152
},
{
"epoch": 5.310344827586207,
"grad_norm": 7.729267120361328,
"learning_rate": 9.430449069003286e-05,
"loss": 1.3092,
"step": 154
},
{
"epoch": 5.379310344827586,
"grad_norm": 5.462944507598877,
"learning_rate": 9.40854326396495e-05,
"loss": 1.0817,
"step": 156
},
{
"epoch": 5.448275862068965,
"grad_norm": 11.766180992126465,
"learning_rate": 9.386637458926615e-05,
"loss": 1.747,
"step": 158
},
{
"epoch": 5.517241379310345,
"grad_norm": 8.114238739013672,
"learning_rate": 9.364731653888281e-05,
"loss": 1.1059,
"step": 160
},
{
"epoch": 5.586206896551724,
"grad_norm": 7.437196254730225,
"learning_rate": 9.342825848849946e-05,
"loss": 1.2654,
"step": 162
},
{
"epoch": 5.655172413793103,
"grad_norm": 9.772777557373047,
"learning_rate": 9.32092004381161e-05,
"loss": 1.1972,
"step": 164
},
{
"epoch": 5.724137931034483,
"grad_norm": 5.461746692657471,
"learning_rate": 9.299014238773275e-05,
"loss": 1.1775,
"step": 166
},
{
"epoch": 5.793103448275862,
"grad_norm": 6.507452964782715,
"learning_rate": 9.27710843373494e-05,
"loss": 1.0831,
"step": 168
},
{
"epoch": 5.862068965517241,
"grad_norm": 10.11184024810791,
"learning_rate": 9.255202628696606e-05,
"loss": 1.002,
"step": 170
},
{
"epoch": 5.931034482758621,
"grad_norm": 9.433456420898438,
"learning_rate": 9.233296823658269e-05,
"loss": 1.5848,
"step": 172
},
{
"epoch": 6.0,
"grad_norm": 9.081482887268066,
"learning_rate": 9.211391018619935e-05,
"loss": 1.5704,
"step": 174
},
{
"epoch": 6.0,
"eval_accuracy": 0.5606060606060606,
"eval_f1_macro": 0.48525454733832823,
"eval_f1_micro": 0.5606060606060606,
"eval_f1_weighted": 0.5509911571429003,
"eval_loss": 1.2560298442840576,
"eval_precision_macro": 0.4905962384953981,
"eval_precision_micro": 0.5606060606060606,
"eval_precision_weighted": 0.5678953399541635,
"eval_recall_macro": 0.5025774754346183,
"eval_recall_micro": 0.5606060606060606,
"eval_recall_weighted": 0.5606060606060606,
"eval_runtime": 2.2113,
"eval_samples_per_second": 59.694,
"eval_steps_per_second": 7.688,
"step": 174
},
{
"epoch": 6.068965517241379,
"grad_norm": 7.783880710601807,
"learning_rate": 9.1894852135816e-05,
"loss": 1.1088,
"step": 176
},
{
"epoch": 6.137931034482759,
"grad_norm": 8.41838550567627,
"learning_rate": 9.167579408543264e-05,
"loss": 1.2991,
"step": 178
},
{
"epoch": 6.206896551724138,
"grad_norm": 7.9511799812316895,
"learning_rate": 9.14567360350493e-05,
"loss": 1.14,
"step": 180
},
{
"epoch": 6.275862068965517,
"grad_norm": 8.29940128326416,
"learning_rate": 9.123767798466593e-05,
"loss": 1.5207,
"step": 182
},
{
"epoch": 6.344827586206897,
"grad_norm": 7.130605220794678,
"learning_rate": 9.10186199342826e-05,
"loss": 1.2116,
"step": 184
},
{
"epoch": 6.413793103448276,
"grad_norm": 6.235842227935791,
"learning_rate": 9.079956188389924e-05,
"loss": 0.9688,
"step": 186
},
{
"epoch": 6.482758620689655,
"grad_norm": 4.8011980056762695,
"learning_rate": 9.058050383351589e-05,
"loss": 0.8892,
"step": 188
},
{
"epoch": 6.551724137931035,
"grad_norm": 7.332155227661133,
"learning_rate": 9.036144578313253e-05,
"loss": 1.0801,
"step": 190
},
{
"epoch": 6.620689655172414,
"grad_norm": 6.098062515258789,
"learning_rate": 9.014238773274918e-05,
"loss": 0.9725,
"step": 192
},
{
"epoch": 6.689655172413794,
"grad_norm": 7.8977813720703125,
"learning_rate": 8.992332968236583e-05,
"loss": 1.1209,
"step": 194
},
{
"epoch": 6.758620689655173,
"grad_norm": 6.769626617431641,
"learning_rate": 8.970427163198248e-05,
"loss": 1.305,
"step": 196
},
{
"epoch": 6.827586206896552,
"grad_norm": 6.7080793380737305,
"learning_rate": 8.948521358159913e-05,
"loss": 1.097,
"step": 198
},
{
"epoch": 6.896551724137931,
"grad_norm": 6.5601806640625,
"learning_rate": 8.926615553121578e-05,
"loss": 0.9768,
"step": 200
},
{
"epoch": 6.9655172413793105,
"grad_norm": 6.294341564178467,
"learning_rate": 8.904709748083242e-05,
"loss": 1.2465,
"step": 202
},
{
"epoch": 7.0,
"eval_accuracy": 0.49242424242424243,
"eval_f1_macro": 0.38536093384329645,
"eval_f1_micro": 0.49242424242424243,
"eval_f1_weighted": 0.4392739197478552,
"eval_loss": 1.4968072175979614,
"eval_precision_macro": 0.5611372180451127,
"eval_precision_micro": 0.49242424242424243,
"eval_precision_weighted": 0.5975304027113237,
"eval_recall_macro": 0.4107180650037793,
"eval_recall_micro": 0.49242424242424243,
"eval_recall_weighted": 0.49242424242424243,
"eval_runtime": 2.2083,
"eval_samples_per_second": 59.774,
"eval_steps_per_second": 7.698,
"step": 203
},
{
"epoch": 7.0344827586206895,
"grad_norm": 6.664538383483887,
"learning_rate": 8.882803943044907e-05,
"loss": 1.0662,
"step": 204
},
{
"epoch": 7.103448275862069,
"grad_norm": 8.425392150878906,
"learning_rate": 8.860898138006573e-05,
"loss": 1.034,
"step": 206
},
{
"epoch": 7.172413793103448,
"grad_norm": 8.941866874694824,
"learning_rate": 8.838992332968236e-05,
"loss": 1.3566,
"step": 208
},
{
"epoch": 7.241379310344827,
"grad_norm": 7.900031089782715,
"learning_rate": 8.817086527929902e-05,
"loss": 1.3013,
"step": 210
},
{
"epoch": 7.310344827586207,
"grad_norm": 7.721550464630127,
"learning_rate": 8.795180722891567e-05,
"loss": 1.1345,
"step": 212
},
{
"epoch": 7.379310344827586,
"grad_norm": 6.119128704071045,
"learning_rate": 8.773274917853231e-05,
"loss": 0.7996,
"step": 214
},
{
"epoch": 7.448275862068965,
"grad_norm": 6.922367095947266,
"learning_rate": 8.751369112814896e-05,
"loss": 1.1724,
"step": 216
},
{
"epoch": 7.517241379310345,
"grad_norm": 7.787768840789795,
"learning_rate": 8.72946330777656e-05,
"loss": 1.0874,
"step": 218
},
{
"epoch": 7.586206896551724,
"grad_norm": 5.789196014404297,
"learning_rate": 8.707557502738227e-05,
"loss": 0.7744,
"step": 220
},
{
"epoch": 7.655172413793103,
"grad_norm": 8.260876655578613,
"learning_rate": 8.685651697699891e-05,
"loss": 0.9799,
"step": 222
},
{
"epoch": 7.724137931034483,
"grad_norm": 5.3789520263671875,
"learning_rate": 8.663745892661556e-05,
"loss": 0.8668,
"step": 224
},
{
"epoch": 7.793103448275862,
"grad_norm": 10.147786140441895,
"learning_rate": 8.64184008762322e-05,
"loss": 1.1608,
"step": 226
},
{
"epoch": 7.862068965517241,
"grad_norm": 5.489473342895508,
"learning_rate": 8.619934282584885e-05,
"loss": 0.8116,
"step": 228
},
{
"epoch": 7.931034482758621,
"grad_norm": 7.813507080078125,
"learning_rate": 8.59802847754655e-05,
"loss": 1.1165,
"step": 230
},
{
"epoch": 8.0,
"grad_norm": 9.428513526916504,
"learning_rate": 8.576122672508216e-05,
"loss": 1.2531,
"step": 232
},
{
"epoch": 8.0,
"eval_accuracy": 0.5,
"eval_f1_macro": 0.4380420530832049,
"eval_f1_micro": 0.5,
"eval_f1_weighted": 0.48411055093350336,
"eval_loss": 1.4662878513336182,
"eval_precision_macro": 0.46228529523343914,
"eval_precision_micro": 0.5,
"eval_precision_weighted": 0.5301592857204586,
"eval_recall_macro": 0.46928949357520783,
"eval_recall_micro": 0.5,
"eval_recall_weighted": 0.5,
"eval_runtime": 2.2486,
"eval_samples_per_second": 58.704,
"eval_steps_per_second": 7.56,
"step": 232
},
{
"epoch": 8.068965517241379,
"grad_norm": 8.723676681518555,
"learning_rate": 8.55421686746988e-05,
"loss": 0.7241,
"step": 234
},
{
"epoch": 8.137931034482758,
"grad_norm": 5.1509904861450195,
"learning_rate": 8.532311062431545e-05,
"loss": 1.1178,
"step": 236
},
{
"epoch": 8.206896551724139,
"grad_norm": 9.173816680908203,
"learning_rate": 8.51040525739321e-05,
"loss": 1.1048,
"step": 238
},
{
"epoch": 8.275862068965518,
"grad_norm": 5.16646146774292,
"learning_rate": 8.488499452354874e-05,
"loss": 0.8407,
"step": 240
},
{
"epoch": 8.344827586206897,
"grad_norm": 7.476856708526611,
"learning_rate": 8.46659364731654e-05,
"loss": 0.7104,
"step": 242
},
{
"epoch": 8.413793103448276,
"grad_norm": 4.857934474945068,
"learning_rate": 8.444687842278203e-05,
"loss": 0.8153,
"step": 244
},
{
"epoch": 8.482758620689655,
"grad_norm": 4.849685192108154,
"learning_rate": 8.42278203723987e-05,
"loss": 0.7941,
"step": 246
},
{
"epoch": 8.551724137931034,
"grad_norm": 6.878391265869141,
"learning_rate": 8.400876232201533e-05,
"loss": 0.8408,
"step": 248
},
{
"epoch": 8.620689655172415,
"grad_norm": 9.568788528442383,
"learning_rate": 8.378970427163199e-05,
"loss": 1.1011,
"step": 250
},
{
"epoch": 8.689655172413794,
"grad_norm": 6.0624284744262695,
"learning_rate": 8.357064622124863e-05,
"loss": 0.6055,
"step": 252
},
{
"epoch": 8.758620689655173,
"grad_norm": 8.931193351745605,
"learning_rate": 8.335158817086528e-05,
"loss": 1.1554,
"step": 254
},
{
"epoch": 8.827586206896552,
"grad_norm": 9.992157936096191,
"learning_rate": 8.313253012048194e-05,
"loss": 0.9706,
"step": 256
},
{
"epoch": 8.89655172413793,
"grad_norm": 7.536012649536133,
"learning_rate": 8.291347207009858e-05,
"loss": 0.9864,
"step": 258
},
{
"epoch": 8.96551724137931,
"grad_norm": 5.046841144561768,
"learning_rate": 8.269441401971523e-05,
"loss": 0.5318,
"step": 260
},
{
"epoch": 9.0,
"eval_accuracy": 0.5909090909090909,
"eval_f1_macro": 0.4938130613497124,
"eval_f1_micro": 0.5909090909090909,
"eval_f1_weighted": 0.564608679657,
"eval_loss": 1.1161140203475952,
"eval_precision_macro": 0.48919183057838517,
"eval_precision_micro": 0.5909090909090909,
"eval_precision_weighted": 0.5594657793187205,
"eval_recall_macro": 0.5175661375661376,
"eval_recall_micro": 0.5909090909090909,
"eval_recall_weighted": 0.5909090909090909,
"eval_runtime": 2.2385,
"eval_samples_per_second": 58.969,
"eval_steps_per_second": 7.594,
"step": 261
},
{
"epoch": 9.03448275862069,
"grad_norm": 8.074467658996582,
"learning_rate": 8.247535596933188e-05,
"loss": 0.9298,
"step": 262
},
{
"epoch": 9.10344827586207,
"grad_norm": 5.22785758972168,
"learning_rate": 8.225629791894852e-05,
"loss": 0.7831,
"step": 264
},
{
"epoch": 9.172413793103448,
"grad_norm": 9.326375007629395,
"learning_rate": 8.203723986856517e-05,
"loss": 0.8289,
"step": 266
},
{
"epoch": 9.241379310344827,
"grad_norm": 5.424740791320801,
"learning_rate": 8.181818181818183e-05,
"loss": 0.5332,
"step": 268
},
{
"epoch": 9.310344827586206,
"grad_norm": 8.164321899414062,
"learning_rate": 8.159912376779846e-05,
"loss": 0.7783,
"step": 270
},
{
"epoch": 9.379310344827585,
"grad_norm": 7.742315769195557,
"learning_rate": 8.138006571741512e-05,
"loss": 0.7159,
"step": 272
},
{
"epoch": 9.448275862068966,
"grad_norm": 6.30488920211792,
"learning_rate": 8.116100766703177e-05,
"loss": 0.9866,
"step": 274
},
{
"epoch": 9.517241379310345,
"grad_norm": 7.696253776550293,
"learning_rate": 8.094194961664841e-05,
"loss": 0.521,
"step": 276
},
{
"epoch": 9.586206896551724,
"grad_norm": 4.019304275512695,
"learning_rate": 8.072289156626507e-05,
"loss": 0.4281,
"step": 278
},
{
"epoch": 9.655172413793103,
"grad_norm": 4.379205703735352,
"learning_rate": 8.05038335158817e-05,
"loss": 0.3542,
"step": 280
},
{
"epoch": 9.724137931034482,
"grad_norm": 7.670277118682861,
"learning_rate": 8.028477546549837e-05,
"loss": 0.9465,
"step": 282
},
{
"epoch": 9.793103448275861,
"grad_norm": 8.019712448120117,
"learning_rate": 8.0065717415115e-05,
"loss": 1.087,
"step": 284
},
{
"epoch": 9.862068965517242,
"grad_norm": 8.645779609680176,
"learning_rate": 7.984665936473166e-05,
"loss": 0.88,
"step": 286
},
{
"epoch": 9.931034482758621,
"grad_norm": 5.542499542236328,
"learning_rate": 7.96276013143483e-05,
"loss": 0.7297,
"step": 288
},
{
"epoch": 10.0,
"grad_norm": 5.367166042327881,
"learning_rate": 7.940854326396495e-05,
"loss": 0.6824,
"step": 290
},
{
"epoch": 10.0,
"eval_accuracy": 0.5909090909090909,
"eval_f1_macro": 0.48022039225046736,
"eval_f1_micro": 0.5909090909090909,
"eval_f1_weighted": 0.5515227462595883,
"eval_loss": 1.1811466217041016,
"eval_precision_macro": 0.4813612313612314,
"eval_precision_micro": 0.5909090909090909,
"eval_precision_weighted": 0.549845041322314,
"eval_recall_macro": 0.5147770219198791,
"eval_recall_micro": 0.5909090909090909,
"eval_recall_weighted": 0.5909090909090909,
"eval_runtime": 2.1897,
"eval_samples_per_second": 60.283,
"eval_steps_per_second": 7.764,
"step": 290
},
{
"epoch": 10.068965517241379,
"grad_norm": 8.408239364624023,
"learning_rate": 7.918948521358161e-05,
"loss": 0.5506,
"step": 292
},
{
"epoch": 10.137931034482758,
"grad_norm": 4.473087787628174,
"learning_rate": 7.897042716319824e-05,
"loss": 0.4722,
"step": 294
},
{
"epoch": 10.206896551724139,
"grad_norm": 5.755477428436279,
"learning_rate": 7.87513691128149e-05,
"loss": 0.6369,
"step": 296
},
{
"epoch": 10.275862068965518,
"grad_norm": 9.0516939163208,
"learning_rate": 7.853231106243155e-05,
"loss": 0.6167,
"step": 298
},
{
"epoch": 10.344827586206897,
"grad_norm": 5.995102405548096,
"learning_rate": 7.83132530120482e-05,
"loss": 0.6051,
"step": 300
},
{
"epoch": 10.413793103448276,
"grad_norm": 7.3448805809021,
"learning_rate": 7.809419496166484e-05,
"loss": 0.5321,
"step": 302
},
{
"epoch": 10.482758620689655,
"grad_norm": 8.903775215148926,
"learning_rate": 7.78751369112815e-05,
"loss": 0.6208,
"step": 304
},
{
"epoch": 10.551724137931034,
"grad_norm": 9.240314483642578,
"learning_rate": 7.765607886089813e-05,
"loss": 0.9838,
"step": 306
},
{
"epoch": 10.620689655172415,
"grad_norm": 10.112192153930664,
"learning_rate": 7.74370208105148e-05,
"loss": 0.905,
"step": 308
},
{
"epoch": 10.689655172413794,
"grad_norm": 9.252533912658691,
"learning_rate": 7.721796276013144e-05,
"loss": 1.002,
"step": 310
},
{
"epoch": 10.758620689655173,
"grad_norm": 7.741162300109863,
"learning_rate": 7.699890470974809e-05,
"loss": 1.0869,
"step": 312
},
{
"epoch": 10.827586206896552,
"grad_norm": 9.742755889892578,
"learning_rate": 7.677984665936475e-05,
"loss": 0.5421,
"step": 314
},
{
"epoch": 10.89655172413793,
"grad_norm": 8.84914493560791,
"learning_rate": 7.656078860898138e-05,
"loss": 0.523,
"step": 316
},
{
"epoch": 10.96551724137931,
"grad_norm": 7.173616409301758,
"learning_rate": 7.634173055859804e-05,
"loss": 0.6324,
"step": 318
},
{
"epoch": 11.0,
"eval_accuracy": 0.5757575757575758,
"eval_f1_macro": 0.4926929392446634,
"eval_f1_micro": 0.5757575757575758,
"eval_f1_weighted": 0.5506095437129921,
"eval_loss": 1.2358123064041138,
"eval_precision_macro": 0.5015354104024055,
"eval_precision_micro": 0.5757575757575758,
"eval_precision_weighted": 0.5689625015643824,
"eval_recall_macro": 0.5226228269085412,
"eval_recall_micro": 0.5757575757575758,
"eval_recall_weighted": 0.5757575757575758,
"eval_runtime": 2.2118,
"eval_samples_per_second": 59.681,
"eval_steps_per_second": 7.686,
"step": 319
},
{
"epoch": 11.03448275862069,
"grad_norm": 8.598217964172363,
"learning_rate": 7.612267250821467e-05,
"loss": 0.703,
"step": 320
},
{
"epoch": 11.10344827586207,
"grad_norm": 4.415513038635254,
"learning_rate": 7.590361445783133e-05,
"loss": 0.5108,
"step": 322
},
{
"epoch": 11.172413793103448,
"grad_norm": 6.3496479988098145,
"learning_rate": 7.568455640744798e-05,
"loss": 0.673,
"step": 324
},
{
"epoch": 11.241379310344827,
"grad_norm": 5.767419338226318,
"learning_rate": 7.546549835706462e-05,
"loss": 0.3173,
"step": 326
},
{
"epoch": 11.310344827586206,
"grad_norm": 5.924855709075928,
"learning_rate": 7.524644030668127e-05,
"loss": 0.4236,
"step": 328
},
{
"epoch": 11.379310344827585,
"grad_norm": 6.807033538818359,
"learning_rate": 7.502738225629792e-05,
"loss": 0.7664,
"step": 330
},
{
"epoch": 11.448275862068966,
"grad_norm": 11.941972732543945,
"learning_rate": 7.480832420591458e-05,
"loss": 0.8078,
"step": 332
},
{
"epoch": 11.517241379310345,
"grad_norm": 5.140421390533447,
"learning_rate": 7.458926615553122e-05,
"loss": 0.4366,
"step": 334
},
{
"epoch": 11.586206896551724,
"grad_norm": 4.931862831115723,
"learning_rate": 7.437020810514787e-05,
"loss": 0.552,
"step": 336
},
{
"epoch": 11.655172413793103,
"grad_norm": 6.9343647956848145,
"learning_rate": 7.415115005476451e-05,
"loss": 0.5526,
"step": 338
},
{
"epoch": 11.724137931034482,
"grad_norm": 4.292028903961182,
"learning_rate": 7.393209200438116e-05,
"loss": 0.5248,
"step": 340
},
{
"epoch": 11.793103448275861,
"grad_norm": 6.613484859466553,
"learning_rate": 7.371303395399781e-05,
"loss": 0.7224,
"step": 342
},
{
"epoch": 11.862068965517242,
"grad_norm": 5.9594502449035645,
"learning_rate": 7.349397590361447e-05,
"loss": 0.4696,
"step": 344
},
{
"epoch": 11.931034482758621,
"grad_norm": 5.859204292297363,
"learning_rate": 7.327491785323111e-05,
"loss": 0.3449,
"step": 346
},
{
"epoch": 12.0,
"grad_norm": 5.6179046630859375,
"learning_rate": 7.305585980284776e-05,
"loss": 0.4145,
"step": 348
},
{
"epoch": 12.0,
"eval_accuracy": 0.6742424242424242,
"eval_f1_macro": 0.5845780796908616,
"eval_f1_micro": 0.6742424242424242,
"eval_f1_weighted": 0.6643483452693979,
"eval_loss": 1.160757064819336,
"eval_precision_macro": 0.5822360668405294,
"eval_precision_micro": 0.6742424242424242,
"eval_precision_weighted": 0.6680857766304014,
"eval_recall_macro": 0.6004686318972033,
"eval_recall_micro": 0.6742424242424242,
"eval_recall_weighted": 0.6742424242424242,
"eval_runtime": 2.2477,
"eval_samples_per_second": 58.726,
"eval_steps_per_second": 7.563,
"step": 348
},
{
"epoch": 12.068965517241379,
"grad_norm": 8.443059921264648,
"learning_rate": 7.28368017524644e-05,
"loss": 0.5175,
"step": 350
},
{
"epoch": 12.137931034482758,
"grad_norm": 9.789414405822754,
"learning_rate": 7.261774370208105e-05,
"loss": 0.8298,
"step": 352
},
{
"epoch": 12.206896551724139,
"grad_norm": 4.579267978668213,
"learning_rate": 7.239868565169771e-05,
"loss": 0.372,
"step": 354
},
{
"epoch": 12.275862068965518,
"grad_norm": 7.095308303833008,
"learning_rate": 7.217962760131434e-05,
"loss": 0.4208,
"step": 356
},
{
"epoch": 12.344827586206897,
"grad_norm": 2.6553964614868164,
"learning_rate": 7.1960569550931e-05,
"loss": 0.2939,
"step": 358
},
{
"epoch": 12.413793103448276,
"grad_norm": 6.320093631744385,
"learning_rate": 7.174151150054765e-05,
"loss": 0.556,
"step": 360
},
{
"epoch": 12.482758620689655,
"grad_norm": 8.005858421325684,
"learning_rate": 7.15224534501643e-05,
"loss": 0.4923,
"step": 362
},
{
"epoch": 12.551724137931034,
"grad_norm": 4.577536106109619,
"learning_rate": 7.130339539978094e-05,
"loss": 0.2823,
"step": 364
},
{
"epoch": 12.620689655172415,
"grad_norm": 7.0326008796691895,
"learning_rate": 7.108433734939759e-05,
"loss": 0.3633,
"step": 366
},
{
"epoch": 12.689655172413794,
"grad_norm": 8.876154899597168,
"learning_rate": 7.086527929901425e-05,
"loss": 0.5113,
"step": 368
},
{
"epoch": 12.758620689655173,
"grad_norm": 9.319496154785156,
"learning_rate": 7.06462212486309e-05,
"loss": 0.9363,
"step": 370
},
{
"epoch": 12.827586206896552,
"grad_norm": 4.129659175872803,
"learning_rate": 7.042716319824754e-05,
"loss": 0.43,
"step": 372
},
{
"epoch": 12.89655172413793,
"grad_norm": 8.008423805236816,
"learning_rate": 7.020810514786419e-05,
"loss": 0.4344,
"step": 374
},
{
"epoch": 12.96551724137931,
"grad_norm": 6.894300937652588,
"learning_rate": 6.998904709748083e-05,
"loss": 0.4805,
"step": 376
},
{
"epoch": 13.0,
"eval_accuracy": 0.5757575757575758,
"eval_f1_macro": 0.527595781401402,
"eval_f1_micro": 0.5757575757575758,
"eval_f1_weighted": 0.5689098612906363,
"eval_loss": 1.319955587387085,
"eval_precision_macro": 0.5767229968910641,
"eval_precision_micro": 0.5757575757575758,
"eval_precision_weighted": 0.6137522608110844,
"eval_recall_macro": 0.5268707482993198,
"eval_recall_micro": 0.5757575757575758,
"eval_recall_weighted": 0.5757575757575758,
"eval_runtime": 2.206,
"eval_samples_per_second": 59.836,
"eval_steps_per_second": 7.706,
"step": 377
},
{
"epoch": 13.03448275862069,
"grad_norm": 7.305329322814941,
"learning_rate": 6.976998904709748e-05,
"loss": 0.4888,
"step": 378
},
{
"epoch": 13.10344827586207,
"grad_norm": 7.0207624435424805,
"learning_rate": 6.955093099671414e-05,
"loss": 0.4489,
"step": 380
},
{
"epoch": 13.172413793103448,
"grad_norm": 3.134613513946533,
"learning_rate": 6.933187294633077e-05,
"loss": 0.2369,
"step": 382
},
{
"epoch": 13.241379310344827,
"grad_norm": 4.9292097091674805,
"learning_rate": 6.911281489594743e-05,
"loss": 0.4839,
"step": 384
},
{
"epoch": 13.310344827586206,
"grad_norm": 2.2589919567108154,
"learning_rate": 6.889375684556408e-05,
"loss": 0.222,
"step": 386
},
{
"epoch": 13.379310344827585,
"grad_norm": 4.867913246154785,
"learning_rate": 6.867469879518072e-05,
"loss": 0.2502,
"step": 388
},
{
"epoch": 13.448275862068966,
"grad_norm": 3.433598756790161,
"learning_rate": 6.845564074479738e-05,
"loss": 0.2846,
"step": 390
},
{
"epoch": 13.517241379310345,
"grad_norm": 4.033895492553711,
"learning_rate": 6.823658269441402e-05,
"loss": 0.2156,
"step": 392
},
{
"epoch": 13.586206896551724,
"grad_norm": 6.298670768737793,
"learning_rate": 6.801752464403068e-05,
"loss": 0.2056,
"step": 394
},
{
"epoch": 13.655172413793103,
"grad_norm": 5.606608867645264,
"learning_rate": 6.779846659364732e-05,
"loss": 0.5755,
"step": 396
},
{
"epoch": 13.724137931034482,
"grad_norm": 4.751099109649658,
"learning_rate": 6.757940854326397e-05,
"loss": 0.3081,
"step": 398
},
{
"epoch": 13.793103448275861,
"grad_norm": 6.851717472076416,
"learning_rate": 6.736035049288061e-05,
"loss": 0.512,
"step": 400
},
{
"epoch": 13.862068965517242,
"grad_norm": 6.983868598937988,
"learning_rate": 6.714129244249726e-05,
"loss": 0.5849,
"step": 402
},
{
"epoch": 13.931034482758621,
"grad_norm": 9.133752822875977,
"learning_rate": 6.692223439211392e-05,
"loss": 0.7721,
"step": 404
},
{
"epoch": 14.0,
"grad_norm": 9.328068733215332,
"learning_rate": 6.670317634173057e-05,
"loss": 0.6232,
"step": 406
},
{
"epoch": 14.0,
"eval_accuracy": 0.5757575757575758,
"eval_f1_macro": 0.4789696951253122,
"eval_f1_micro": 0.5757575757575758,
"eval_f1_weighted": 0.5516716249691459,
"eval_loss": 1.319008231163025,
"eval_precision_macro": 0.502492644655116,
"eval_precision_micro": 0.5757575757575758,
"eval_precision_weighted": 0.5734135715543037,
"eval_recall_macro": 0.5006122448979592,
"eval_recall_micro": 0.5757575757575758,
"eval_recall_weighted": 0.5757575757575758,
"eval_runtime": 2.1818,
"eval_samples_per_second": 60.499,
"eval_steps_per_second": 7.792,
"step": 406
},
{
"epoch": 14.068965517241379,
"grad_norm": 3.8055107593536377,
"learning_rate": 6.648411829134721e-05,
"loss": 0.2276,
"step": 408
},
{
"epoch": 14.137931034482758,
"grad_norm": 3.0821352005004883,
"learning_rate": 6.626506024096386e-05,
"loss": 0.2451,
"step": 410
},
{
"epoch": 14.206896551724139,
"grad_norm": 7.605597972869873,
"learning_rate": 6.60460021905805e-05,
"loss": 0.3332,
"step": 412
},
{
"epoch": 14.275862068965518,
"grad_norm": 7.357143402099609,
"learning_rate": 6.582694414019715e-05,
"loss": 0.5178,
"step": 414
},
{
"epoch": 14.344827586206897,
"grad_norm": 2.5872600078582764,
"learning_rate": 6.560788608981381e-05,
"loss": 0.1075,
"step": 416
},
{
"epoch": 14.413793103448276,
"grad_norm": 5.9071879386901855,
"learning_rate": 6.538882803943044e-05,
"loss": 0.2012,
"step": 418
},
{
"epoch": 14.482758620689655,
"grad_norm": 6.262528419494629,
"learning_rate": 6.51697699890471e-05,
"loss": 0.219,
"step": 420
},
{
"epoch": 14.551724137931034,
"grad_norm": 4.72699499130249,
"learning_rate": 6.495071193866375e-05,
"loss": 0.4705,
"step": 422
},
{
"epoch": 14.620689655172415,
"grad_norm": 2.8275880813598633,
"learning_rate": 6.47316538882804e-05,
"loss": 0.259,
"step": 424
},
{
"epoch": 14.689655172413794,
"grad_norm": 4.1800312995910645,
"learning_rate": 6.451259583789706e-05,
"loss": 0.2478,
"step": 426
},
{
"epoch": 14.758620689655173,
"grad_norm": 6.540757179260254,
"learning_rate": 6.429353778751369e-05,
"loss": 0.3005,
"step": 428
},
{
"epoch": 14.827586206896552,
"grad_norm": 3.0680577754974365,
"learning_rate": 6.407447973713035e-05,
"loss": 0.3351,
"step": 430
},
{
"epoch": 14.89655172413793,
"grad_norm": 12.751289367675781,
"learning_rate": 6.385542168674698e-05,
"loss": 0.4988,
"step": 432
},
{
"epoch": 14.96551724137931,
"grad_norm": 7.108068466186523,
"learning_rate": 6.363636363636364e-05,
"loss": 0.3475,
"step": 434
},
{
"epoch": 15.0,
"eval_accuracy": 0.696969696969697,
"eval_f1_macro": 0.630347810119719,
"eval_f1_micro": 0.696969696969697,
"eval_f1_weighted": 0.6894476798984056,
"eval_loss": 1.185251235961914,
"eval_precision_macro": 0.6716845878136201,
"eval_precision_micro": 0.696969696969697,
"eval_precision_weighted": 0.7087732160312806,
"eval_recall_macro": 0.6311791383219955,
"eval_recall_micro": 0.696969696969697,
"eval_recall_weighted": 0.696969696969697,
"eval_runtime": 2.1666,
"eval_samples_per_second": 60.925,
"eval_steps_per_second": 7.846,
"step": 435
},
{
"epoch": 15.03448275862069,
"grad_norm": 6.197813510894775,
"learning_rate": 6.341730558598029e-05,
"loss": 0.162,
"step": 436
},
{
"epoch": 15.10344827586207,
"grad_norm": 3.9454376697540283,
"learning_rate": 6.319824753559693e-05,
"loss": 0.3396,
"step": 438
},
{
"epoch": 15.172413793103448,
"grad_norm": 8.980201721191406,
"learning_rate": 6.297918948521358e-05,
"loss": 0.2316,
"step": 440
},
{
"epoch": 15.241379310344827,
"grad_norm": 4.091892719268799,
"learning_rate": 6.276013143483024e-05,
"loss": 0.277,
"step": 442
},
{
"epoch": 15.310344827586206,
"grad_norm": 7.498462200164795,
"learning_rate": 6.254107338444689e-05,
"loss": 0.3305,
"step": 444
},
{
"epoch": 15.379310344827585,
"grad_norm": 6.023470401763916,
"learning_rate": 6.232201533406353e-05,
"loss": 0.1605,
"step": 446
},
{
"epoch": 15.448275862068966,
"grad_norm": 4.88850212097168,
"learning_rate": 6.210295728368018e-05,
"loss": 0.1803,
"step": 448
},
{
"epoch": 15.517241379310345,
"grad_norm": 2.798743724822998,
"learning_rate": 6.188389923329682e-05,
"loss": 0.1585,
"step": 450
},
{
"epoch": 15.586206896551724,
"grad_norm": 6.272281646728516,
"learning_rate": 6.166484118291348e-05,
"loss": 0.1494,
"step": 452
},
{
"epoch": 15.655172413793103,
"grad_norm": 7.970227241516113,
"learning_rate": 6.144578313253012e-05,
"loss": 0.3169,
"step": 454
},
{
"epoch": 15.724137931034482,
"grad_norm": 6.0759406089782715,
"learning_rate": 6.122672508214678e-05,
"loss": 0.3508,
"step": 456
},
{
"epoch": 15.793103448275861,
"grad_norm": 4.981871128082275,
"learning_rate": 6.1007667031763415e-05,
"loss": 0.1469,
"step": 458
},
{
"epoch": 15.862068965517242,
"grad_norm": 8.259228706359863,
"learning_rate": 6.078860898138007e-05,
"loss": 0.3808,
"step": 460
},
{
"epoch": 15.931034482758621,
"grad_norm": 5.493587017059326,
"learning_rate": 6.056955093099672e-05,
"loss": 0.218,
"step": 462
},
{
"epoch": 16.0,
"grad_norm": 5.321525573730469,
"learning_rate": 6.035049288061336e-05,
"loss": 0.1956,
"step": 464
},
{
"epoch": 16.0,
"eval_accuracy": 0.5151515151515151,
"eval_f1_macro": 0.4323315041705846,
"eval_f1_micro": 0.5151515151515151,
"eval_f1_weighted": 0.4974051721657085,
"eval_loss": 1.569486141204834,
"eval_precision_macro": 0.47551801581876774,
"eval_precision_micro": 0.5151515151515151,
"eval_precision_weighted": 0.5333931937281219,
"eval_recall_macro": 0.4357898715041572,
"eval_recall_micro": 0.5151515151515151,
"eval_recall_weighted": 0.5151515151515151,
"eval_runtime": 2.1474,
"eval_samples_per_second": 61.468,
"eval_steps_per_second": 7.916,
"step": 464
},
{
"epoch": 16.06896551724138,
"grad_norm": 7.035810947418213,
"learning_rate": 6.0131434830230014e-05,
"loss": 0.4266,
"step": 466
},
{
"epoch": 16.137931034482758,
"grad_norm": 3.2283682823181152,
"learning_rate": 5.991237677984666e-05,
"loss": 0.2042,
"step": 468
},
{
"epoch": 16.20689655172414,
"grad_norm": 3.6779544353485107,
"learning_rate": 5.969331872946331e-05,
"loss": 0.1058,
"step": 470
},
{
"epoch": 16.275862068965516,
"grad_norm": 1.8620399236679077,
"learning_rate": 5.9474260679079966e-05,
"loss": 0.1272,
"step": 472
},
{
"epoch": 16.344827586206897,
"grad_norm": 2.111825942993164,
"learning_rate": 5.9255202628696605e-05,
"loss": 0.1298,
"step": 474
},
{
"epoch": 16.413793103448278,
"grad_norm": 6.74976110458374,
"learning_rate": 5.903614457831326e-05,
"loss": 0.3262,
"step": 476
},
{
"epoch": 16.482758620689655,
"grad_norm": 5.992347240447998,
"learning_rate": 5.88170865279299e-05,
"loss": 0.3271,
"step": 478
},
{
"epoch": 16.551724137931036,
"grad_norm": 2.5913877487182617,
"learning_rate": 5.859802847754655e-05,
"loss": 0.1947,
"step": 480
},
{
"epoch": 16.620689655172413,
"grad_norm": 7.1807403564453125,
"learning_rate": 5.8378970427163204e-05,
"loss": 0.1701,
"step": 482
},
{
"epoch": 16.689655172413794,
"grad_norm": 5.666691303253174,
"learning_rate": 5.815991237677984e-05,
"loss": 0.2304,
"step": 484
},
{
"epoch": 16.75862068965517,
"grad_norm": 6.325366973876953,
"learning_rate": 5.7940854326396496e-05,
"loss": 0.3751,
"step": 486
},
{
"epoch": 16.82758620689655,
"grad_norm": 4.312324523925781,
"learning_rate": 5.772179627601315e-05,
"loss": 0.16,
"step": 488
},
{
"epoch": 16.896551724137932,
"grad_norm": 4.738943576812744,
"learning_rate": 5.7502738225629795e-05,
"loss": 0.2339,
"step": 490
},
{
"epoch": 16.96551724137931,
"grad_norm": 4.70164155960083,
"learning_rate": 5.728368017524645e-05,
"loss": 0.1519,
"step": 492
},
{
"epoch": 17.0,
"eval_accuracy": 0.6439393939393939,
"eval_f1_macro": 0.5818818031106167,
"eval_f1_micro": 0.6439393939393939,
"eval_f1_weighted": 0.6317482833372663,
"eval_loss": 1.440421223640442,
"eval_precision_macro": 0.6438369250139081,
"eval_precision_micro": 0.6439393939393939,
"eval_precision_weighted": 0.657660361816567,
"eval_recall_macro": 0.5705744520030234,
"eval_recall_micro": 0.6439393939393939,
"eval_recall_weighted": 0.6439393939393939,
"eval_runtime": 2.213,
"eval_samples_per_second": 59.648,
"eval_steps_per_second": 7.682,
"step": 493
},
{
"epoch": 17.03448275862069,
"grad_norm": 5.552275657653809,
"learning_rate": 5.706462212486309e-05,
"loss": 0.1605,
"step": 494
},
{
"epoch": 17.103448275862068,
"grad_norm": 1.6765620708465576,
"learning_rate": 5.684556407447974e-05,
"loss": 0.057,
"step": 496
},
{
"epoch": 17.17241379310345,
"grad_norm": 2.917738437652588,
"learning_rate": 5.6626506024096394e-05,
"loss": 0.0525,
"step": 498
},
{
"epoch": 17.24137931034483,
"grad_norm": 4.772071838378906,
"learning_rate": 5.640744797371303e-05,
"loss": 0.0594,
"step": 500
},
{
"epoch": 17.310344827586206,
"grad_norm": 5.168885231018066,
"learning_rate": 5.6188389923329686e-05,
"loss": 0.1819,
"step": 502
},
{
"epoch": 17.379310344827587,
"grad_norm": 6.547173976898193,
"learning_rate": 5.596933187294633e-05,
"loss": 0.1747,
"step": 504
},
{
"epoch": 17.448275862068964,
"grad_norm": 2.393808364868164,
"learning_rate": 5.575027382256298e-05,
"loss": 0.1172,
"step": 506
},
{
"epoch": 17.517241379310345,
"grad_norm": 6.183032512664795,
"learning_rate": 5.553121577217963e-05,
"loss": 0.1184,
"step": 508
},
{
"epoch": 17.586206896551722,
"grad_norm": 12.154343605041504,
"learning_rate": 5.531215772179628e-05,
"loss": 0.2882,
"step": 510
},
{
"epoch": 17.655172413793103,
"grad_norm": 7.199910640716553,
"learning_rate": 5.509309967141293e-05,
"loss": 0.3635,
"step": 512
},
{
"epoch": 17.724137931034484,
"grad_norm": 1.0473498106002808,
"learning_rate": 5.487404162102957e-05,
"loss": 0.0773,
"step": 514
},
{
"epoch": 17.79310344827586,
"grad_norm": 1.043884038925171,
"learning_rate": 5.465498357064622e-05,
"loss": 0.0349,
"step": 516
},
{
"epoch": 17.862068965517242,
"grad_norm": 0.591170072555542,
"learning_rate": 5.4435925520262876e-05,
"loss": 0.1192,
"step": 518
},
{
"epoch": 17.93103448275862,
"grad_norm": 4.3712477684021,
"learning_rate": 5.4216867469879516e-05,
"loss": 0.1702,
"step": 520
},
{
"epoch": 18.0,
"grad_norm": 5.553340911865234,
"learning_rate": 5.399780941949617e-05,
"loss": 0.1031,
"step": 522
},
{
"epoch": 18.0,
"eval_accuracy": 0.6136363636363636,
"eval_f1_macro": 0.5370450788240546,
"eval_f1_micro": 0.6136363636363636,
"eval_f1_weighted": 0.6040564132330857,
"eval_loss": 1.4877225160598755,
"eval_precision_macro": 0.5351284054291573,
"eval_precision_micro": 0.6136363636363636,
"eval_precision_weighted": 0.5975074566581743,
"eval_recall_macro": 0.5421919879062737,
"eval_recall_micro": 0.6136363636363636,
"eval_recall_weighted": 0.6136363636363636,
"eval_runtime": 2.2122,
"eval_samples_per_second": 59.668,
"eval_steps_per_second": 7.685,
"step": 522
},
{
"epoch": 18.06896551724138,
"grad_norm": 2.231707811355591,
"learning_rate": 5.3778751369112815e-05,
"loss": 0.0562,
"step": 524
},
{
"epoch": 18.137931034482758,
"grad_norm": 1.9797624349594116,
"learning_rate": 5.355969331872947e-05,
"loss": 0.1513,
"step": 526
},
{
"epoch": 18.20689655172414,
"grad_norm": 4.362570285797119,
"learning_rate": 5.334063526834612e-05,
"loss": 0.1637,
"step": 528
},
{
"epoch": 18.275862068965516,
"grad_norm": 5.458191871643066,
"learning_rate": 5.312157721796276e-05,
"loss": 0.1761,
"step": 530
},
{
"epoch": 18.344827586206897,
"grad_norm": 12.664368629455566,
"learning_rate": 5.290251916757941e-05,
"loss": 0.2171,
"step": 532
},
{
"epoch": 18.413793103448278,
"grad_norm": 4.849126815795898,
"learning_rate": 5.2683461117196066e-05,
"loss": 0.1213,
"step": 534
},
{
"epoch": 18.482758620689655,
"grad_norm": 1.9513343572616577,
"learning_rate": 5.2464403066812705e-05,
"loss": 0.0647,
"step": 536
},
{
"epoch": 18.551724137931036,
"grad_norm": 5.6937642097473145,
"learning_rate": 5.224534501642936e-05,
"loss": 0.1924,
"step": 538
},
{
"epoch": 18.620689655172413,
"grad_norm": 6.157546043395996,
"learning_rate": 5.2026286966046e-05,
"loss": 0.1621,
"step": 540
},
{
"epoch": 18.689655172413794,
"grad_norm": 3.375688076019287,
"learning_rate": 5.180722891566265e-05,
"loss": 0.0725,
"step": 542
},
{
"epoch": 18.75862068965517,
"grad_norm": 1.283026099205017,
"learning_rate": 5.1588170865279304e-05,
"loss": 0.1705,
"step": 544
},
{
"epoch": 18.82758620689655,
"grad_norm": 6.894308090209961,
"learning_rate": 5.136911281489595e-05,
"loss": 0.2579,
"step": 546
},
{
"epoch": 18.896551724137932,
"grad_norm": 7.978748321533203,
"learning_rate": 5.11500547645126e-05,
"loss": 0.1522,
"step": 548
},
{
"epoch": 18.96551724137931,
"grad_norm": 3.8156979084014893,
"learning_rate": 5.093099671412924e-05,
"loss": 0.0615,
"step": 550
},
{
"epoch": 19.0,
"eval_accuracy": 0.6060606060606061,
"eval_f1_macro": 0.6012825511436246,
"eval_f1_micro": 0.6060606060606061,
"eval_f1_weighted": 0.6106316401527286,
"eval_loss": 1.4801414012908936,
"eval_precision_macro": 0.6475544200111578,
"eval_precision_micro": 0.6060606060606061,
"eval_precision_weighted": 0.6581095440160177,
"eval_recall_macro": 0.5951398337112623,
"eval_recall_micro": 0.6060606060606061,
"eval_recall_weighted": 0.6060606060606061,
"eval_runtime": 2.2056,
"eval_samples_per_second": 59.847,
"eval_steps_per_second": 7.708,
"step": 551
},
{
"epoch": 19.03448275862069,
"grad_norm": 1.505656361579895,
"learning_rate": 5.0711938663745895e-05,
"loss": 0.0365,
"step": 552
},
{
"epoch": 19.103448275862068,
"grad_norm": 1.1916121244430542,
"learning_rate": 5.049288061336255e-05,
"loss": 0.099,
"step": 554
},
{
"epoch": 19.17241379310345,
"grad_norm": 3.222411632537842,
"learning_rate": 5.027382256297919e-05,
"loss": 0.0374,
"step": 556
},
{
"epoch": 19.24137931034483,
"grad_norm": 4.7354536056518555,
"learning_rate": 5.005476451259584e-05,
"loss": 0.0901,
"step": 558
},
{
"epoch": 19.310344827586206,
"grad_norm": 0.37637993693351746,
"learning_rate": 4.983570646221249e-05,
"loss": 0.0087,
"step": 560
},
{
"epoch": 19.379310344827587,
"grad_norm": 1.4076848030090332,
"learning_rate": 4.961664841182913e-05,
"loss": 0.2459,
"step": 562
},
{
"epoch": 19.448275862068964,
"grad_norm": 7.020608901977539,
"learning_rate": 4.9397590361445786e-05,
"loss": 0.1859,
"step": 564
},
{
"epoch": 19.517241379310345,
"grad_norm": 1.6740795373916626,
"learning_rate": 4.917853231106244e-05,
"loss": 0.0443,
"step": 566
},
{
"epoch": 19.586206896551722,
"grad_norm": 1.2484346628189087,
"learning_rate": 4.8959474260679085e-05,
"loss": 0.026,
"step": 568
},
{
"epoch": 19.655172413793103,
"grad_norm": 0.44467589259147644,
"learning_rate": 4.874041621029573e-05,
"loss": 0.0704,
"step": 570
},
{
"epoch": 19.724137931034484,
"grad_norm": 1.164262056350708,
"learning_rate": 4.852135815991238e-05,
"loss": 0.0843,
"step": 572
},
{
"epoch": 19.79310344827586,
"grad_norm": 2.4461233615875244,
"learning_rate": 4.8302300109529024e-05,
"loss": 0.1391,
"step": 574
},
{
"epoch": 19.862068965517242,
"grad_norm": 0.4338299334049225,
"learning_rate": 4.808324205914568e-05,
"loss": 0.0174,
"step": 576
},
{
"epoch": 19.93103448275862,
"grad_norm": 14.353382110595703,
"learning_rate": 4.786418400876232e-05,
"loss": 0.0887,
"step": 578
},
{
"epoch": 20.0,
"grad_norm": 0.43623942136764526,
"learning_rate": 4.764512595837897e-05,
"loss": 0.0249,
"step": 580
},
{
"epoch": 20.0,
"eval_accuracy": 0.5909090909090909,
"eval_f1_macro": 0.5197787455591448,
"eval_f1_micro": 0.5909090909090909,
"eval_f1_weighted": 0.5825078945882032,
"eval_loss": 1.6081513166427612,
"eval_precision_macro": 0.5148994878087059,
"eval_precision_micro": 0.5909090909090909,
"eval_precision_weighted": 0.5769802287329502,
"eval_recall_macro": 0.5272184429327286,
"eval_recall_micro": 0.5909090909090909,
"eval_recall_weighted": 0.5909090909090909,
"eval_runtime": 2.1745,
"eval_samples_per_second": 60.703,
"eval_steps_per_second": 7.818,
"step": 580
},
{
"epoch": 20.06896551724138,
"grad_norm": 0.3680793046951294,
"learning_rate": 4.742606790799562e-05,
"loss": 0.0216,
"step": 582
},
{
"epoch": 20.137931034482758,
"grad_norm": 2.0422375202178955,
"learning_rate": 4.7207009857612275e-05,
"loss": 0.0177,
"step": 584
},
{
"epoch": 20.20689655172414,
"grad_norm": 6.626030445098877,
"learning_rate": 4.698795180722892e-05,
"loss": 0.2056,
"step": 586
},
{
"epoch": 20.275862068965516,
"grad_norm": 0.7436681389808655,
"learning_rate": 4.676889375684557e-05,
"loss": 0.0327,
"step": 588
},
{
"epoch": 20.344827586206897,
"grad_norm": 0.9783719182014465,
"learning_rate": 4.6549835706462214e-05,
"loss": 0.044,
"step": 590
},
{
"epoch": 20.413793103448278,
"grad_norm": 0.8457818627357483,
"learning_rate": 4.633077765607886e-05,
"loss": 0.1102,
"step": 592
},
{
"epoch": 20.482758620689655,
"grad_norm": 3.060871124267578,
"learning_rate": 4.611171960569551e-05,
"loss": 0.0726,
"step": 594
},
{
"epoch": 20.551724137931036,
"grad_norm": 7.481118679046631,
"learning_rate": 4.589266155531216e-05,
"loss": 0.1447,
"step": 596
},
{
"epoch": 20.620689655172413,
"grad_norm": 8.65415096282959,
"learning_rate": 4.5673603504928806e-05,
"loss": 0.2099,
"step": 598
},
{
"epoch": 20.689655172413794,
"grad_norm": 3.2042698860168457,
"learning_rate": 4.545454545454546e-05,
"loss": 0.0497,
"step": 600
},
{
"epoch": 20.75862068965517,
"grad_norm": 0.5125285983085632,
"learning_rate": 4.5235487404162105e-05,
"loss": 0.069,
"step": 602
},
{
"epoch": 20.82758620689655,
"grad_norm": 0.8691998720169067,
"learning_rate": 4.501642935377876e-05,
"loss": 0.0354,
"step": 604
},
{
"epoch": 20.896551724137932,
"grad_norm": 10.247215270996094,
"learning_rate": 4.4797371303395404e-05,
"loss": 0.1541,
"step": 606
},
{
"epoch": 20.96551724137931,
"grad_norm": 11.587034225463867,
"learning_rate": 4.457831325301205e-05,
"loss": 0.374,
"step": 608
},
{
"epoch": 21.0,
"eval_accuracy": 0.6287878787878788,
"eval_f1_macro": 0.6084137522037308,
"eval_f1_micro": 0.6287878787878788,
"eval_f1_weighted": 0.6185060346144132,
"eval_loss": 1.7593897581100464,
"eval_precision_macro": 0.6711527035056447,
"eval_precision_micro": 0.6287878787878788,
"eval_precision_weighted": 0.6679164641063037,
"eval_recall_macro": 0.6049433106575963,
"eval_recall_micro": 0.6287878787878788,
"eval_recall_weighted": 0.6287878787878788,
"eval_runtime": 2.1955,
"eval_samples_per_second": 60.123,
"eval_steps_per_second": 7.743,
"step": 609
},
{
"epoch": 21.03448275862069,
"grad_norm": 1.38335120677948,
"learning_rate": 4.4359255202628696e-05,
"loss": 0.079,
"step": 610
},
{
"epoch": 21.103448275862068,
"grad_norm": 5.99662446975708,
"learning_rate": 4.414019715224535e-05,
"loss": 0.0648,
"step": 612
},
{
"epoch": 21.17241379310345,
"grad_norm": 1.0241988897323608,
"learning_rate": 4.3921139101861996e-05,
"loss": 0.1807,
"step": 614
},
{
"epoch": 21.24137931034483,
"grad_norm": 0.5548591017723083,
"learning_rate": 4.370208105147864e-05,
"loss": 0.0375,
"step": 616
},
{
"epoch": 21.310344827586206,
"grad_norm": 0.7137009501457214,
"learning_rate": 4.348302300109529e-05,
"loss": 0.0694,
"step": 618
},
{
"epoch": 21.379310344827587,
"grad_norm": 7.560571193695068,
"learning_rate": 4.326396495071194e-05,
"loss": 0.2715,
"step": 620
},
{
"epoch": 21.448275862068964,
"grad_norm": 7.067291736602783,
"learning_rate": 4.3044906900328594e-05,
"loss": 0.1016,
"step": 622
},
{
"epoch": 21.517241379310345,
"grad_norm": 4.622091770172119,
"learning_rate": 4.282584884994524e-05,
"loss": 0.0621,
"step": 624
},
{
"epoch": 21.586206896551722,
"grad_norm": 5.788636207580566,
"learning_rate": 4.2606790799561886e-05,
"loss": 0.1775,
"step": 626
},
{
"epoch": 21.655172413793103,
"grad_norm": 3.3069419860839844,
"learning_rate": 4.238773274917853e-05,
"loss": 0.0586,
"step": 628
},
{
"epoch": 21.724137931034484,
"grad_norm": 0.65139240026474,
"learning_rate": 4.2168674698795186e-05,
"loss": 0.0672,
"step": 630
},
{
"epoch": 21.79310344827586,
"grad_norm": 2.4793200492858887,
"learning_rate": 4.194961664841183e-05,
"loss": 0.1428,
"step": 632
},
{
"epoch": 21.862068965517242,
"grad_norm": 2.988377809524536,
"learning_rate": 4.173055859802848e-05,
"loss": 0.1759,
"step": 634
},
{
"epoch": 21.93103448275862,
"grad_norm": 5.487617015838623,
"learning_rate": 4.1511500547645124e-05,
"loss": 0.0849,
"step": 636
},
{
"epoch": 22.0,
"grad_norm": 2.8238141536712646,
"learning_rate": 4.129244249726178e-05,
"loss": 0.025,
"step": 638
},
{
"epoch": 22.0,
"eval_accuracy": 0.6515151515151515,
"eval_f1_macro": 0.6445815393183814,
"eval_f1_micro": 0.6515151515151515,
"eval_f1_weighted": 0.6520235479565623,
"eval_loss": 1.4723178148269653,
"eval_precision_macro": 0.6542847694633409,
"eval_precision_micro": 0.6515151515151515,
"eval_precision_weighted": 0.6660361050986052,
"eval_recall_macro": 0.6478760393046107,
"eval_recall_micro": 0.6515151515151515,
"eval_recall_weighted": 0.6515151515151515,
"eval_runtime": 2.2232,
"eval_samples_per_second": 59.375,
"eval_steps_per_second": 7.647,
"step": 638
},
{
"epoch": 22.06896551724138,
"grad_norm": 6.143444061279297,
"learning_rate": 4.107338444687843e-05,
"loss": 0.0913,
"step": 640
},
{
"epoch": 22.137931034482758,
"grad_norm": 2.971240997314453,
"learning_rate": 4.0854326396495076e-05,
"loss": 0.0312,
"step": 642
},
{
"epoch": 22.20689655172414,
"grad_norm": 0.27099546790122986,
"learning_rate": 4.063526834611172e-05,
"loss": 0.045,
"step": 644
},
{
"epoch": 22.275862068965516,
"grad_norm": 0.35845986008644104,
"learning_rate": 4.041621029572837e-05,
"loss": 0.013,
"step": 646
},
{
"epoch": 22.344827586206897,
"grad_norm": 2.0845632553100586,
"learning_rate": 4.019715224534502e-05,
"loss": 0.0523,
"step": 648
},
{
"epoch": 22.413793103448278,
"grad_norm": 3.686854362487793,
"learning_rate": 3.997809419496167e-05,
"loss": 0.0292,
"step": 650
},
{
"epoch": 22.482758620689655,
"grad_norm": 0.846224844455719,
"learning_rate": 3.9759036144578314e-05,
"loss": 0.1901,
"step": 652
},
{
"epoch": 22.551724137931036,
"grad_norm": 0.3240630626678467,
"learning_rate": 3.953997809419496e-05,
"loss": 0.0187,
"step": 654
},
{
"epoch": 22.620689655172413,
"grad_norm": 7.635501861572266,
"learning_rate": 3.9320920043811607e-05,
"loss": 0.1124,
"step": 656
},
{
"epoch": 22.689655172413794,
"grad_norm": 2.2201285362243652,
"learning_rate": 3.910186199342826e-05,
"loss": 0.03,
"step": 658
},
{
"epoch": 22.75862068965517,
"grad_norm": 8.953709602355957,
"learning_rate": 3.888280394304491e-05,
"loss": 0.0588,
"step": 660
},
{
"epoch": 22.82758620689655,
"grad_norm": 7.417150497436523,
"learning_rate": 3.866374589266156e-05,
"loss": 0.0751,
"step": 662
},
{
"epoch": 22.896551724137932,
"grad_norm": 1.5134751796722412,
"learning_rate": 3.8444687842278205e-05,
"loss": 0.0166,
"step": 664
},
{
"epoch": 22.96551724137931,
"grad_norm": 0.537891685962677,
"learning_rate": 3.822562979189485e-05,
"loss": 0.0096,
"step": 666
},
{
"epoch": 23.0,
"eval_accuracy": 0.6136363636363636,
"eval_f1_macro": 0.5899262553677856,
"eval_f1_micro": 0.6136363636363636,
"eval_f1_weighted": 0.6088674363985942,
"eval_loss": 1.5689215660095215,
"eval_precision_macro": 0.616981329954019,
"eval_precision_micro": 0.6136363636363636,
"eval_precision_weighted": 0.6315426797963563,
"eval_recall_macro": 0.5878231292517008,
"eval_recall_micro": 0.6136363636363636,
"eval_recall_weighted": 0.6136363636363636,
"eval_runtime": 2.2085,
"eval_samples_per_second": 59.769,
"eval_steps_per_second": 7.697,
"step": 667
},
{
"epoch": 23.03448275862069,
"grad_norm": 0.5641638040542603,
"learning_rate": 3.8006571741511504e-05,
"loss": 0.0089,
"step": 668
},
{
"epoch": 23.103448275862068,
"grad_norm": 3.1353189945220947,
"learning_rate": 3.778751369112815e-05,
"loss": 0.0292,
"step": 670
},
{
"epoch": 23.17241379310345,
"grad_norm": 0.7373493313789368,
"learning_rate": 3.7568455640744796e-05,
"loss": 0.0612,
"step": 672
},
{
"epoch": 23.24137931034483,
"grad_norm": 2.668566942214966,
"learning_rate": 3.734939759036144e-05,
"loss": 0.0416,
"step": 674
},
{
"epoch": 23.310344827586206,
"grad_norm": 4.210921287536621,
"learning_rate": 3.7130339539978096e-05,
"loss": 0.0405,
"step": 676
},
{
"epoch": 23.379310344827587,
"grad_norm": 0.31117522716522217,
"learning_rate": 3.691128148959475e-05,
"loss": 0.0162,
"step": 678
},
{
"epoch": 23.448275862068964,
"grad_norm": 8.15129280090332,
"learning_rate": 3.6692223439211395e-05,
"loss": 0.0723,
"step": 680
},
{
"epoch": 23.517241379310345,
"grad_norm": 2.1367807388305664,
"learning_rate": 3.647316538882804e-05,
"loss": 0.0501,
"step": 682
},
{
"epoch": 23.586206896551722,
"grad_norm": 1.1246554851531982,
"learning_rate": 3.625410733844469e-05,
"loss": 0.0115,
"step": 684
},
{
"epoch": 23.655172413793103,
"grad_norm": 1.3772636651992798,
"learning_rate": 3.603504928806134e-05,
"loss": 0.033,
"step": 686
},
{
"epoch": 23.724137931034484,
"grad_norm": 0.15142899751663208,
"learning_rate": 3.5815991237677986e-05,
"loss": 0.0613,
"step": 688
},
{
"epoch": 23.79310344827586,
"grad_norm": 13.712115287780762,
"learning_rate": 3.559693318729463e-05,
"loss": 0.2608,
"step": 690
},
{
"epoch": 23.862068965517242,
"grad_norm": 6.292361259460449,
"learning_rate": 3.537787513691128e-05,
"loss": 0.1134,
"step": 692
},
{
"epoch": 23.93103448275862,
"grad_norm": 0.6719773411750793,
"learning_rate": 3.515881708652793e-05,
"loss": 0.0106,
"step": 694
},
{
"epoch": 24.0,
"grad_norm": 1.0674413442611694,
"learning_rate": 3.4939759036144585e-05,
"loss": 0.0661,
"step": 696
},
{
"epoch": 24.0,
"eval_accuracy": 0.6666666666666666,
"eval_f1_macro": 0.6056024708734068,
"eval_f1_micro": 0.6666666666666666,
"eval_f1_weighted": 0.657565805841668,
"eval_loss": 1.6276419162750244,
"eval_precision_macro": 0.6690476190476191,
"eval_precision_micro": 0.6666666666666666,
"eval_precision_weighted": 0.6866965105601469,
"eval_recall_macro": 0.5948677248677248,
"eval_recall_micro": 0.6666666666666666,
"eval_recall_weighted": 0.6666666666666666,
"eval_runtime": 2.2477,
"eval_samples_per_second": 58.726,
"eval_steps_per_second": 7.563,
"step": 696
},
{
"epoch": 24.06896551724138,
"grad_norm": 0.15591685473918915,
"learning_rate": 3.472070098576123e-05,
"loss": 0.0109,
"step": 698
},
{
"epoch": 24.137931034482758,
"grad_norm": 1.411007046699524,
"learning_rate": 3.450164293537788e-05,
"loss": 0.0806,
"step": 700
},
{
"epoch": 24.20689655172414,
"grad_norm": 6.958545684814453,
"learning_rate": 3.4282584884994523e-05,
"loss": 0.2285,
"step": 702
},
{
"epoch": 24.275862068965516,
"grad_norm": 0.38558292388916016,
"learning_rate": 3.4063526834611176e-05,
"loss": 0.0268,
"step": 704
},
{
"epoch": 24.344827586206897,
"grad_norm": 6.778842926025391,
"learning_rate": 3.384446878422782e-05,
"loss": 0.1081,
"step": 706
},
{
"epoch": 24.413793103448278,
"grad_norm": 0.2550676465034485,
"learning_rate": 3.362541073384447e-05,
"loss": 0.0058,
"step": 708
},
{
"epoch": 24.482758620689655,
"grad_norm": 0.24779938161373138,
"learning_rate": 3.3406352683461115e-05,
"loss": 0.0252,
"step": 710
},
{
"epoch": 24.551724137931036,
"grad_norm": 0.1385107785463333,
"learning_rate": 3.318729463307776e-05,
"loss": 0.0057,
"step": 712
},
{
"epoch": 24.620689655172413,
"grad_norm": 3.3009445667266846,
"learning_rate": 3.2968236582694414e-05,
"loss": 0.0338,
"step": 714
},
{
"epoch": 24.689655172413794,
"grad_norm": 0.6270205974578857,
"learning_rate": 3.274917853231107e-05,
"loss": 0.0112,
"step": 716
},
{
"epoch": 24.75862068965517,
"grad_norm": 0.24541209638118744,
"learning_rate": 3.253012048192771e-05,
"loss": 0.0098,
"step": 718
},
{
"epoch": 24.82758620689655,
"grad_norm": 0.5051412582397461,
"learning_rate": 3.231106243154436e-05,
"loss": 0.0616,
"step": 720
},
{
"epoch": 24.896551724137932,
"grad_norm": 0.21808616816997528,
"learning_rate": 3.209200438116101e-05,
"loss": 0.0238,
"step": 722
},
{
"epoch": 24.96551724137931,
"grad_norm": 0.19809569418430328,
"learning_rate": 3.187294633077766e-05,
"loss": 0.0463,
"step": 724
},
{
"epoch": 25.0,
"eval_accuracy": 0.6136363636363636,
"eval_f1_macro": 0.5591332103178793,
"eval_f1_micro": 0.6136363636363636,
"eval_f1_weighted": 0.6084641064500635,
"eval_loss": 1.6760780811309814,
"eval_precision_macro": 0.6192834056699603,
"eval_precision_micro": 0.6136363636363636,
"eval_precision_weighted": 0.6400907915613798,
"eval_recall_macro": 0.5521088435374149,
"eval_recall_micro": 0.6136363636363636,
"eval_recall_weighted": 0.6136363636363636,
"eval_runtime": 2.1953,
"eval_samples_per_second": 60.128,
"eval_steps_per_second": 7.744,
"step": 725
},
{
"epoch": 25.03448275862069,
"grad_norm": 0.9460155367851257,
"learning_rate": 3.1653888280394305e-05,
"loss": 0.0328,
"step": 726
},
{
"epoch": 25.103448275862068,
"grad_norm": 0.34770432114601135,
"learning_rate": 3.143483023001095e-05,
"loss": 0.0169,
"step": 728
},
{
"epoch": 25.17241379310345,
"grad_norm": 0.6745150089263916,
"learning_rate": 3.12157721796276e-05,
"loss": 0.0292,
"step": 730
},
{
"epoch": 25.24137931034483,
"grad_norm": 0.14288195967674255,
"learning_rate": 3.099671412924425e-05,
"loss": 0.0672,
"step": 732
},
{
"epoch": 25.310344827586206,
"grad_norm": 0.1784912347793579,
"learning_rate": 3.07776560788609e-05,
"loss": 0.0046,
"step": 734
},
{
"epoch": 25.379310344827587,
"grad_norm": 0.7752932906150818,
"learning_rate": 3.055859802847755e-05,
"loss": 0.0089,
"step": 736
},
{
"epoch": 25.448275862068964,
"grad_norm": 8.310676574707031,
"learning_rate": 3.0339539978094196e-05,
"loss": 0.0854,
"step": 738
},
{
"epoch": 25.517241379310345,
"grad_norm": 1.2783715724945068,
"learning_rate": 3.012048192771085e-05,
"loss": 0.0091,
"step": 740
},
{
"epoch": 25.586206896551722,
"grad_norm": 1.2155754566192627,
"learning_rate": 2.9901423877327495e-05,
"loss": 0.0182,
"step": 742
},
{
"epoch": 25.655172413793103,
"grad_norm": 0.10511256754398346,
"learning_rate": 2.968236582694414e-05,
"loss": 0.0043,
"step": 744
},
{
"epoch": 25.724137931034484,
"grad_norm": 0.1467219889163971,
"learning_rate": 2.9463307776560787e-05,
"loss": 0.0041,
"step": 746
},
{
"epoch": 25.79310344827586,
"grad_norm": 0.06411899626255035,
"learning_rate": 2.9244249726177437e-05,
"loss": 0.0043,
"step": 748
},
{
"epoch": 25.862068965517242,
"grad_norm": 9.776043891906738,
"learning_rate": 2.902519167579409e-05,
"loss": 0.0456,
"step": 750
},
{
"epoch": 25.93103448275862,
"grad_norm": 0.1743546575307846,
"learning_rate": 2.8806133625410736e-05,
"loss": 0.0029,
"step": 752
},
{
"epoch": 26.0,
"grad_norm": 0.9783799648284912,
"learning_rate": 2.8587075575027382e-05,
"loss": 0.0118,
"step": 754
},
{
"epoch": 26.0,
"eval_accuracy": 0.6287878787878788,
"eval_f1_macro": 0.5353365735453567,
"eval_f1_micro": 0.6287878787878788,
"eval_f1_weighted": 0.6074711236094882,
"eval_loss": 1.6210349798202515,
"eval_precision_macro": 0.5715752748253354,
"eval_precision_micro": 0.6287878787878788,
"eval_precision_weighted": 0.6263481846840905,
"eval_recall_macro": 0.5410279667422525,
"eval_recall_micro": 0.6287878787878788,
"eval_recall_weighted": 0.6287878787878788,
"eval_runtime": 2.2013,
"eval_samples_per_second": 59.964,
"eval_steps_per_second": 7.723,
"step": 754
},
{
"epoch": 26.06896551724138,
"grad_norm": 0.2862379252910614,
"learning_rate": 2.8368017524644032e-05,
"loss": 0.0041,
"step": 756
},
{
"epoch": 26.137931034482758,
"grad_norm": 1.7093660831451416,
"learning_rate": 2.8148959474260678e-05,
"loss": 0.0129,
"step": 758
},
{
"epoch": 26.20689655172414,
"grad_norm": 2.0235061645507812,
"learning_rate": 2.792990142387733e-05,
"loss": 0.0107,
"step": 760
},
{
"epoch": 26.275862068965516,
"grad_norm": 0.19022098183631897,
"learning_rate": 2.7710843373493977e-05,
"loss": 0.0044,
"step": 762
},
{
"epoch": 26.344827586206897,
"grad_norm": 0.09240903705358505,
"learning_rate": 2.7491785323110624e-05,
"loss": 0.0023,
"step": 764
},
{
"epoch": 26.413793103448278,
"grad_norm": 0.08767610788345337,
"learning_rate": 2.7272727272727273e-05,
"loss": 0.0026,
"step": 766
},
{
"epoch": 26.482758620689655,
"grad_norm": 0.3399060368537903,
"learning_rate": 2.7053669222343926e-05,
"loss": 0.0129,
"step": 768
},
{
"epoch": 26.551724137931036,
"grad_norm": 0.16420547664165497,
"learning_rate": 2.6834611171960572e-05,
"loss": 0.0315,
"step": 770
},
{
"epoch": 26.620689655172413,
"grad_norm": 0.07277621328830719,
"learning_rate": 2.661555312157722e-05,
"loss": 0.1677,
"step": 772
},
{
"epoch": 26.689655172413794,
"grad_norm": 0.0779278352856636,
"learning_rate": 2.6396495071193865e-05,
"loss": 0.0034,
"step": 774
},
{
"epoch": 26.75862068965517,
"grad_norm": 0.030221056193113327,
"learning_rate": 2.6177437020810514e-05,
"loss": 0.009,
"step": 776
},
{
"epoch": 26.82758620689655,
"grad_norm": 0.7204201221466064,
"learning_rate": 2.5958378970427167e-05,
"loss": 0.0242,
"step": 778
},
{
"epoch": 26.896551724137932,
"grad_norm": 2.2107677459716797,
"learning_rate": 2.5739320920043813e-05,
"loss": 0.0818,
"step": 780
},
{
"epoch": 26.96551724137931,
"grad_norm": 0.04545823484659195,
"learning_rate": 2.552026286966046e-05,
"loss": 0.0018,
"step": 782
},
{
"epoch": 27.0,
"eval_accuracy": 0.6742424242424242,
"eval_f1_macro": 0.5860085994600535,
"eval_f1_micro": 0.6742424242424242,
"eval_f1_weighted": 0.6574843706054311,
"eval_loss": 1.607276201248169,
"eval_precision_macro": 0.5955862562810968,
"eval_precision_micro": 0.6742424242424242,
"eval_precision_weighted": 0.6586732219548587,
"eval_recall_macro": 0.5929327286470143,
"eval_recall_micro": 0.6742424242424242,
"eval_recall_weighted": 0.6742424242424242,
"eval_runtime": 2.1629,
"eval_samples_per_second": 61.028,
"eval_steps_per_second": 7.86,
"step": 783
},
{
"epoch": 27.03448275862069,
"grad_norm": 1.2520081996917725,
"learning_rate": 2.530120481927711e-05,
"loss": 0.0145,
"step": 784
},
{
"epoch": 27.103448275862068,
"grad_norm": 0.4600828289985657,
"learning_rate": 2.5082146768893762e-05,
"loss": 0.0133,
"step": 786
},
{
"epoch": 27.17241379310345,
"grad_norm": 4.692933082580566,
"learning_rate": 2.486308871851041e-05,
"loss": 0.0382,
"step": 788
},
{
"epoch": 27.24137931034483,
"grad_norm": 0.3261309862136841,
"learning_rate": 2.4644030668127055e-05,
"loss": 0.0035,
"step": 790
},
{
"epoch": 27.310344827586206,
"grad_norm": 0.028574170544743538,
"learning_rate": 2.44249726177437e-05,
"loss": 0.0084,
"step": 792
},
{
"epoch": 27.379310344827587,
"grad_norm": 0.4913921356201172,
"learning_rate": 2.420591456736035e-05,
"loss": 0.0183,
"step": 794
},
{
"epoch": 27.448275862068964,
"grad_norm": 1.5067977905273438,
"learning_rate": 2.3986856516977e-05,
"loss": 0.0387,
"step": 796
},
{
"epoch": 27.517241379310345,
"grad_norm": 0.8277406096458435,
"learning_rate": 2.376779846659365e-05,
"loss": 0.0233,
"step": 798
},
{
"epoch": 27.586206896551722,
"grad_norm": 5.168019771575928,
"learning_rate": 2.3548740416210296e-05,
"loss": 0.0222,
"step": 800
},
{
"epoch": 27.655172413793103,
"grad_norm": 0.5925205945968628,
"learning_rate": 2.3329682365826945e-05,
"loss": 0.0066,
"step": 802
},
{
"epoch": 27.724137931034484,
"grad_norm": 0.7455288767814636,
"learning_rate": 2.3110624315443595e-05,
"loss": 0.0044,
"step": 804
},
{
"epoch": 27.79310344827586,
"grad_norm": 0.029589757323265076,
"learning_rate": 2.289156626506024e-05,
"loss": 0.0055,
"step": 806
},
{
"epoch": 27.862068965517242,
"grad_norm": 10.534521102905273,
"learning_rate": 2.267250821467689e-05,
"loss": 0.0515,
"step": 808
},
{
"epoch": 27.93103448275862,
"grad_norm": 0.19806796312332153,
"learning_rate": 2.2453450164293537e-05,
"loss": 0.0157,
"step": 810
},
{
"epoch": 28.0,
"grad_norm": 5.914359092712402,
"learning_rate": 2.2234392113910187e-05,
"loss": 0.0336,
"step": 812
},
{
"epoch": 28.0,
"eval_accuracy": 0.6439393939393939,
"eval_f1_macro": 0.6085667254819397,
"eval_f1_micro": 0.6439393939393939,
"eval_f1_weighted": 0.6411146413787409,
"eval_loss": 1.5964038372039795,
"eval_precision_macro": 0.6379142750217496,
"eval_precision_micro": 0.6439393939393939,
"eval_precision_weighted": 0.6565570652257958,
"eval_recall_macro": 0.5978533635676493,
"eval_recall_micro": 0.6439393939393939,
"eval_recall_weighted": 0.6439393939393939,
"eval_runtime": 2.2198,
"eval_samples_per_second": 59.466,
"eval_steps_per_second": 7.659,
"step": 812
},
{
"epoch": 28.06896551724138,
"grad_norm": 1.2526508569717407,
"learning_rate": 2.2015334063526836e-05,
"loss": 0.0304,
"step": 814
},
{
"epoch": 28.137931034482758,
"grad_norm": 1.441461443901062,
"learning_rate": 2.1796276013143486e-05,
"loss": 0.0071,
"step": 816
},
{
"epoch": 28.20689655172414,
"grad_norm": 0.2004363089799881,
"learning_rate": 2.1577217962760132e-05,
"loss": 0.0261,
"step": 818
},
{
"epoch": 28.275862068965516,
"grad_norm": 0.16825991868972778,
"learning_rate": 2.1358159912376778e-05,
"loss": 0.0297,
"step": 820
},
{
"epoch": 28.344827586206897,
"grad_norm": 0.8127052783966064,
"learning_rate": 2.1139101861993428e-05,
"loss": 0.005,
"step": 822
},
{
"epoch": 28.413793103448278,
"grad_norm": 0.2261103391647339,
"learning_rate": 2.0920043811610077e-05,
"loss": 0.0056,
"step": 824
},
{
"epoch": 28.482758620689655,
"grad_norm": 0.04766825586557388,
"learning_rate": 2.0700985761226727e-05,
"loss": 0.0013,
"step": 826
},
{
"epoch": 28.551724137931036,
"grad_norm": 0.036670394241809845,
"learning_rate": 2.0481927710843373e-05,
"loss": 0.0028,
"step": 828
},
{
"epoch": 28.620689655172413,
"grad_norm": 0.41730597615242004,
"learning_rate": 2.0262869660460023e-05,
"loss": 0.0028,
"step": 830
},
{
"epoch": 28.689655172413794,
"grad_norm": 0.04215677082538605,
"learning_rate": 2.0043811610076672e-05,
"loss": 0.0018,
"step": 832
},
{
"epoch": 28.75862068965517,
"grad_norm": 0.08167728036642075,
"learning_rate": 1.9824753559693322e-05,
"loss": 0.0016,
"step": 834
},
{
"epoch": 28.82758620689655,
"grad_norm": 0.031280118972063065,
"learning_rate": 1.9605695509309968e-05,
"loss": 0.002,
"step": 836
},
{
"epoch": 28.896551724137932,
"grad_norm": 1.7285773754119873,
"learning_rate": 1.9386637458926614e-05,
"loss": 0.0117,
"step": 838
},
{
"epoch": 28.96551724137931,
"grad_norm": 0.06211957335472107,
"learning_rate": 1.9167579408543264e-05,
"loss": 0.0014,
"step": 840
},
{
"epoch": 29.0,
"eval_accuracy": 0.7121212121212122,
"eval_f1_macro": 0.6872675353596543,
"eval_f1_micro": 0.7121212121212122,
"eval_f1_weighted": 0.7082990442199542,
"eval_loss": 1.5290158987045288,
"eval_precision_macro": 0.7262781667691,
"eval_precision_micro": 0.7121212121212122,
"eval_precision_weighted": 0.7308177925367624,
"eval_recall_macro": 0.6733560090702948,
"eval_recall_micro": 0.7121212121212122,
"eval_recall_weighted": 0.7121212121212122,
"eval_runtime": 2.1608,
"eval_samples_per_second": 61.088,
"eval_steps_per_second": 7.867,
"step": 841
},
{
"epoch": 29.03448275862069,
"grad_norm": 0.01278562843799591,
"learning_rate": 1.8948521358159914e-05,
"loss": 0.003,
"step": 842
},
{
"epoch": 29.103448275862068,
"grad_norm": 0.06935442239046097,
"learning_rate": 1.8729463307776563e-05,
"loss": 0.0014,
"step": 844
},
{
"epoch": 29.17241379310345,
"grad_norm": 0.6586639285087585,
"learning_rate": 1.851040525739321e-05,
"loss": 0.0023,
"step": 846
},
{
"epoch": 29.24137931034483,
"grad_norm": 3.5995774269104004,
"learning_rate": 1.829134720700986e-05,
"loss": 0.018,
"step": 848
},
{
"epoch": 29.310344827586206,
"grad_norm": 0.08816396445035934,
"learning_rate": 1.8072289156626505e-05,
"loss": 0.0031,
"step": 850
},
{
"epoch": 29.379310344827587,
"grad_norm": 1.209425926208496,
"learning_rate": 1.7853231106243155e-05,
"loss": 0.0282,
"step": 852
},
{
"epoch": 29.448275862068964,
"grad_norm": 3.262197732925415,
"learning_rate": 1.7634173055859804e-05,
"loss": 0.0246,
"step": 854
},
{
"epoch": 29.517241379310345,
"grad_norm": 0.7129732966423035,
"learning_rate": 1.741511500547645e-05,
"loss": 0.0085,
"step": 856
},
{
"epoch": 29.586206896551722,
"grad_norm": 0.15869493782520294,
"learning_rate": 1.71960569550931e-05,
"loss": 0.0015,
"step": 858
},
{
"epoch": 29.655172413793103,
"grad_norm": 0.07819876074790955,
"learning_rate": 1.697699890470975e-05,
"loss": 0.0476,
"step": 860
},
{
"epoch": 29.724137931034484,
"grad_norm": 0.12180998921394348,
"learning_rate": 1.67579408543264e-05,
"loss": 0.0032,
"step": 862
},
{
"epoch": 29.79310344827586,
"grad_norm": 0.034297507256269455,
"learning_rate": 1.6538882803943046e-05,
"loss": 0.0008,
"step": 864
},
{
"epoch": 29.862068965517242,
"grad_norm": 0.14563943445682526,
"learning_rate": 1.6319824753559695e-05,
"loss": 0.0087,
"step": 866
},
{
"epoch": 29.93103448275862,
"grad_norm": 0.23122666776180267,
"learning_rate": 1.610076670317634e-05,
"loss": 0.0416,
"step": 868
},
{
"epoch": 30.0,
"grad_norm": 0.06973911821842194,
"learning_rate": 1.588170865279299e-05,
"loss": 0.021,
"step": 870
},
{
"epoch": 30.0,
"eval_accuracy": 0.696969696969697,
"eval_f1_macro": 0.6982068677202545,
"eval_f1_micro": 0.696969696969697,
"eval_f1_weighted": 0.6973740805478152,
"eval_loss": 1.5439778566360474,
"eval_precision_macro": 0.7076152020847177,
"eval_precision_micro": 0.696969696969697,
"eval_precision_weighted": 0.7169868679432659,
"eval_recall_macro": 0.7086167800453513,
"eval_recall_micro": 0.696969696969697,
"eval_recall_weighted": 0.696969696969697,
"eval_runtime": 2.1633,
"eval_samples_per_second": 61.018,
"eval_steps_per_second": 7.858,
"step": 870
},
{
"epoch": 30.06896551724138,
"grad_norm": 1.7453641891479492,
"learning_rate": 1.566265060240964e-05,
"loss": 0.0083,
"step": 872
},
{
"epoch": 30.137931034482758,
"grad_norm": 0.3179946541786194,
"learning_rate": 1.5443592552026287e-05,
"loss": 0.0024,
"step": 874
},
{
"epoch": 30.20689655172414,
"grad_norm": 1.7078912258148193,
"learning_rate": 1.5224534501642936e-05,
"loss": 0.0082,
"step": 876
},
{
"epoch": 30.275862068965516,
"grad_norm": 0.32421720027923584,
"learning_rate": 1.5005476451259584e-05,
"loss": 0.0032,
"step": 878
},
{
"epoch": 30.344827586206897,
"grad_norm": 0.04044501855969429,
"learning_rate": 1.4786418400876234e-05,
"loss": 0.0444,
"step": 880
},
{
"epoch": 30.413793103448278,
"grad_norm": 0.06957350671291351,
"learning_rate": 1.4567360350492882e-05,
"loss": 0.0226,
"step": 882
},
{
"epoch": 30.482758620689655,
"grad_norm": 0.5221211314201355,
"learning_rate": 1.4348302300109528e-05,
"loss": 0.0041,
"step": 884
},
{
"epoch": 30.551724137931036,
"grad_norm": 0.017598647624254227,
"learning_rate": 1.412924424972618e-05,
"loss": 0.0013,
"step": 886
},
{
"epoch": 30.620689655172413,
"grad_norm": 0.47180604934692383,
"learning_rate": 1.3910186199342825e-05,
"loss": 0.003,
"step": 888
},
{
"epoch": 30.689655172413794,
"grad_norm": 0.03941981866955757,
"learning_rate": 1.3691128148959475e-05,
"loss": 0.0112,
"step": 890
},
{
"epoch": 30.75862068965517,
"grad_norm": 0.023555099964141846,
"learning_rate": 1.3472070098576123e-05,
"loss": 0.03,
"step": 892
},
{
"epoch": 30.82758620689655,
"grad_norm": 0.48139652609825134,
"learning_rate": 1.3253012048192772e-05,
"loss": 0.0052,
"step": 894
},
{
"epoch": 30.896551724137932,
"grad_norm": 0.1581326723098755,
"learning_rate": 1.303395399780942e-05,
"loss": 0.0156,
"step": 896
},
{
"epoch": 30.96551724137931,
"grad_norm": 1.283683180809021,
"learning_rate": 1.2814895947426067e-05,
"loss": 0.0065,
"step": 898
},
{
"epoch": 31.0,
"eval_accuracy": 0.696969696969697,
"eval_f1_macro": 0.6868812120235886,
"eval_f1_micro": 0.696969696969697,
"eval_f1_weighted": 0.6914990595967736,
"eval_loss": 1.6575924158096313,
"eval_precision_macro": 0.7429775738046415,
"eval_precision_micro": 0.696969696969697,
"eval_precision_weighted": 0.7269597892803633,
"eval_recall_macro": 0.6698866213151928,
"eval_recall_micro": 0.696969696969697,
"eval_recall_weighted": 0.696969696969697,
"eval_runtime": 2.1493,
"eval_samples_per_second": 61.416,
"eval_steps_per_second": 7.91,
"step": 899
},
{
"epoch": 31.03448275862069,
"grad_norm": 0.018120741471648216,
"learning_rate": 1.2595837897042718e-05,
"loss": 0.0011,
"step": 900
},
{
"epoch": 31.103448275862068,
"grad_norm": 0.1496172994375229,
"learning_rate": 1.2376779846659366e-05,
"loss": 0.0031,
"step": 902
},
{
"epoch": 31.17241379310345,
"grad_norm": 1.2407957315444946,
"learning_rate": 1.2157721796276014e-05,
"loss": 0.0278,
"step": 904
},
{
"epoch": 31.24137931034483,
"grad_norm": 0.14974364638328552,
"learning_rate": 1.1938663745892662e-05,
"loss": 0.0236,
"step": 906
},
{
"epoch": 31.310344827586206,
"grad_norm": 4.202882766723633,
"learning_rate": 1.171960569550931e-05,
"loss": 0.0241,
"step": 908
},
{
"epoch": 31.379310344827587,
"grad_norm": 0.05061774700880051,
"learning_rate": 1.1500547645125959e-05,
"loss": 0.001,
"step": 910
},
{
"epoch": 31.448275862068964,
"grad_norm": 0.019295161589980125,
"learning_rate": 1.1281489594742607e-05,
"loss": 0.0012,
"step": 912
},
{
"epoch": 31.517241379310345,
"grad_norm": 0.1430915743112564,
"learning_rate": 1.1062431544359257e-05,
"loss": 0.0041,
"step": 914
},
{
"epoch": 31.586206896551722,
"grad_norm": 0.03306346759200096,
"learning_rate": 1.0843373493975904e-05,
"loss": 0.0013,
"step": 916
},
{
"epoch": 31.655172413793103,
"grad_norm": 0.42486900091171265,
"learning_rate": 1.0624315443592552e-05,
"loss": 0.006,
"step": 918
},
{
"epoch": 31.724137931034484,
"grad_norm": 0.058433897793293,
"learning_rate": 1.0405257393209202e-05,
"loss": 0.0018,
"step": 920
},
{
"epoch": 31.79310344827586,
"grad_norm": 0.027252651751041412,
"learning_rate": 1.0186199342825848e-05,
"loss": 0.004,
"step": 922
},
{
"epoch": 31.862068965517242,
"grad_norm": 0.13943073153495789,
"learning_rate": 9.967141292442498e-06,
"loss": 0.0016,
"step": 924
},
{
"epoch": 31.93103448275862,
"grad_norm": 0.048901163041591644,
"learning_rate": 9.748083242059146e-06,
"loss": 0.0019,
"step": 926
},
{
"epoch": 32.0,
"grad_norm": 0.019549880176782608,
"learning_rate": 9.529025191675795e-06,
"loss": 0.0013,
"step": 928
},
{
"epoch": 32.0,
"eval_accuracy": 0.7196969696969697,
"eval_f1_macro": 0.7124096518979722,
"eval_f1_micro": 0.7196969696969697,
"eval_f1_weighted": 0.7172944138815308,
"eval_loss": 1.560258388519287,
"eval_precision_macro": 0.750803957946815,
"eval_precision_micro": 0.7196969696969697,
"eval_precision_weighted": 0.7410763478945297,
"eval_recall_macro": 0.6987226001511715,
"eval_recall_micro": 0.7196969696969697,
"eval_recall_weighted": 0.7196969696969697,
"eval_runtime": 2.1728,
"eval_samples_per_second": 60.752,
"eval_steps_per_second": 7.824,
"step": 928
},
{
"epoch": 32.06896551724138,
"grad_norm": 0.05375111103057861,
"learning_rate": 9.309967141292443e-06,
"loss": 0.0024,
"step": 930
},
{
"epoch": 32.13793103448276,
"grad_norm": 0.3470950424671173,
"learning_rate": 9.090909090909091e-06,
"loss": 0.0035,
"step": 932
},
{
"epoch": 32.206896551724135,
"grad_norm": 0.02533532679080963,
"learning_rate": 8.87185104052574e-06,
"loss": 0.001,
"step": 934
},
{
"epoch": 32.275862068965516,
"grad_norm": 1.6475239992141724,
"learning_rate": 8.652792990142389e-06,
"loss": 0.0343,
"step": 936
},
{
"epoch": 32.3448275862069,
"grad_norm": 0.13403227925300598,
"learning_rate": 8.433734939759036e-06,
"loss": 0.0019,
"step": 938
},
{
"epoch": 32.41379310344828,
"grad_norm": 0.13201530277729034,
"learning_rate": 8.214676889375684e-06,
"loss": 0.0019,
"step": 940
},
{
"epoch": 32.48275862068966,
"grad_norm": 2.441126823425293,
"learning_rate": 7.995618838992334e-06,
"loss": 0.0152,
"step": 942
},
{
"epoch": 32.55172413793103,
"grad_norm": 0.3257850408554077,
"learning_rate": 7.776560788608982e-06,
"loss": 0.0019,
"step": 944
},
{
"epoch": 32.62068965517241,
"grad_norm": 0.10473517328500748,
"learning_rate": 7.5575027382256306e-06,
"loss": 0.0027,
"step": 946
},
{
"epoch": 32.689655172413794,
"grad_norm": 0.04300970956683159,
"learning_rate": 7.3384446878422785e-06,
"loss": 0.0008,
"step": 948
},
{
"epoch": 32.758620689655174,
"grad_norm": 0.12258446961641312,
"learning_rate": 7.119386637458927e-06,
"loss": 0.0028,
"step": 950
},
{
"epoch": 32.827586206896555,
"grad_norm": 0.14553672075271606,
"learning_rate": 6.900328587075576e-06,
"loss": 0.0013,
"step": 952
},
{
"epoch": 32.89655172413793,
"grad_norm": 1.198081612586975,
"learning_rate": 6.681270536692223e-06,
"loss": 0.0276,
"step": 954
},
{
"epoch": 32.96551724137931,
"grad_norm": 0.4895022511482239,
"learning_rate": 6.462212486308872e-06,
"loss": 0.0129,
"step": 956
},
{
"epoch": 33.0,
"eval_accuracy": 0.6893939393939394,
"eval_f1_macro": 0.6841929992352647,
"eval_f1_micro": 0.6893939393939394,
"eval_f1_weighted": 0.6870473377924375,
"eval_loss": 1.6027860641479492,
"eval_precision_macro": 0.7152604691775198,
"eval_precision_micro": 0.6893939393939394,
"eval_precision_weighted": 0.7059007626456307,
"eval_recall_macro": 0.673061224489796,
"eval_recall_micro": 0.6893939393939394,
"eval_recall_weighted": 0.6893939393939394,
"eval_runtime": 2.1875,
"eval_samples_per_second": 60.343,
"eval_steps_per_second": 7.771,
"step": 957
},
{
"epoch": 33.03448275862069,
"grad_norm": 0.18283292651176453,
"learning_rate": 6.2431544359255205e-06,
"loss": 0.0023,
"step": 958
},
{
"epoch": 33.10344827586207,
"grad_norm": 0.09138727933168411,
"learning_rate": 6.024096385542169e-06,
"loss": 0.0018,
"step": 960
},
{
"epoch": 33.172413793103445,
"grad_norm": 0.27659812569618225,
"learning_rate": 5.805038335158817e-06,
"loss": 0.004,
"step": 962
},
{
"epoch": 33.241379310344826,
"grad_norm": 1.0702749490737915,
"learning_rate": 5.585980284775466e-06,
"loss": 0.0228,
"step": 964
},
{
"epoch": 33.310344827586206,
"grad_norm": 0.10491228103637695,
"learning_rate": 5.366922234392114e-06,
"loss": 0.0023,
"step": 966
},
{
"epoch": 33.37931034482759,
"grad_norm": 0.33777573704719543,
"learning_rate": 5.1478641840087625e-06,
"loss": 0.0034,
"step": 968
},
{
"epoch": 33.44827586206897,
"grad_norm": 0.06301871687173843,
"learning_rate": 4.928806133625411e-06,
"loss": 0.0023,
"step": 970
},
{
"epoch": 33.51724137931034,
"grad_norm": 0.09340860694646835,
"learning_rate": 4.70974808324206e-06,
"loss": 0.0205,
"step": 972
},
{
"epoch": 33.58620689655172,
"grad_norm": 0.020821426063776016,
"learning_rate": 4.490690032858708e-06,
"loss": 0.0009,
"step": 974
},
{
"epoch": 33.6551724137931,
"grad_norm": 0.04694080352783203,
"learning_rate": 4.271631982475356e-06,
"loss": 0.001,
"step": 976
},
{
"epoch": 33.724137931034484,
"grad_norm": 0.056120615452528,
"learning_rate": 4.0525739320920046e-06,
"loss": 0.0018,
"step": 978
},
{
"epoch": 33.793103448275865,
"grad_norm": 1.5992101430892944,
"learning_rate": 3.8335158817086525e-06,
"loss": 0.0073,
"step": 980
},
{
"epoch": 33.86206896551724,
"grad_norm": 0.044718772172927856,
"learning_rate": 3.614457831325301e-06,
"loss": 0.012,
"step": 982
},
{
"epoch": 33.93103448275862,
"grad_norm": 0.019480116665363312,
"learning_rate": 3.39539978094195e-06,
"loss": 0.0012,
"step": 984
},
{
"epoch": 34.0,
"grad_norm": 0.01623496413230896,
"learning_rate": 3.1763417305585983e-06,
"loss": 0.0006,
"step": 986
},
{
"epoch": 34.0,
"eval_accuracy": 0.6818181818181818,
"eval_f1_macro": 0.6786724695679507,
"eval_f1_micro": 0.6818181818181818,
"eval_f1_weighted": 0.6800052243915973,
"eval_loss": 1.6074531078338623,
"eval_precision_macro": 0.7093953665382237,
"eval_precision_micro": 0.6818181818181818,
"eval_precision_weighted": 0.6991099809281627,
"eval_recall_macro": 0.6677702191987906,
"eval_recall_micro": 0.6818181818181818,
"eval_recall_weighted": 0.6818181818181818,
"eval_runtime": 2.1756,
"eval_samples_per_second": 60.674,
"eval_steps_per_second": 7.814,
"step": 986
},
{
"epoch": 34.06896551724138,
"grad_norm": 0.02141761966049671,
"learning_rate": 2.9572836801752466e-06,
"loss": 0.0015,
"step": 988
},
{
"epoch": 34.13793103448276,
"grad_norm": 0.03435864299535751,
"learning_rate": 2.738225629791895e-06,
"loss": 0.0009,
"step": 990
},
{
"epoch": 34.206896551724135,
"grad_norm": 0.03004680573940277,
"learning_rate": 2.5191675794085432e-06,
"loss": 0.0011,
"step": 992
},
{
"epoch": 34.275862068965516,
"grad_norm": 0.027880065143108368,
"learning_rate": 2.3001095290251916e-06,
"loss": 0.0008,
"step": 994
},
{
"epoch": 34.3448275862069,
"grad_norm": 1.3690009117126465,
"learning_rate": 2.0810514786418403e-06,
"loss": 0.0308,
"step": 996
},
{
"epoch": 34.41379310344828,
"grad_norm": 0.04672781005501747,
"learning_rate": 1.8619934282584884e-06,
"loss": 0.0012,
"step": 998
},
{
"epoch": 34.48275862068966,
"grad_norm": 0.06204487010836601,
"learning_rate": 1.642935377875137e-06,
"loss": 0.0017,
"step": 1000
},
{
"epoch": 34.55172413793103,
"grad_norm": 0.03130810335278511,
"learning_rate": 1.4238773274917855e-06,
"loss": 0.0015,
"step": 1002
},
{
"epoch": 34.62068965517241,
"grad_norm": 0.06150972098112106,
"learning_rate": 1.2048192771084338e-06,
"loss": 0.0026,
"step": 1004
},
{
"epoch": 34.689655172413794,
"grad_norm": 0.1301676630973816,
"learning_rate": 9.857612267250823e-07,
"loss": 0.0023,
"step": 1006
},
{
"epoch": 34.758620689655174,
"grad_norm": 0.11791616678237915,
"learning_rate": 7.667031763417306e-07,
"loss": 0.0019,
"step": 1008
},
{
"epoch": 34.827586206896555,
"grad_norm": 2.598999500274658,
"learning_rate": 5.47645125958379e-07,
"loss": 0.0082,
"step": 1010
},
{
"epoch": 34.89655172413793,
"grad_norm": 1.0025254487991333,
"learning_rate": 3.285870755750274e-07,
"loss": 0.0207,
"step": 1012
},
{
"epoch": 34.96551724137931,
"grad_norm": 0.543045699596405,
"learning_rate": 1.095290251916758e-07,
"loss": 0.0022,
"step": 1014
},
{
"epoch": 35.0,
"eval_accuracy": 0.6893939393939394,
"eval_f1_macro": 0.6848257838882342,
"eval_f1_micro": 0.6893939393939394,
"eval_f1_weighted": 0.6869347739708745,
"eval_loss": 1.6008917093276978,
"eval_precision_macro": 0.7170539138281073,
"eval_precision_micro": 0.6893939393939394,
"eval_precision_weighted": 0.7061795242880288,
"eval_recall_macro": 0.673061224489796,
"eval_recall_micro": 0.6893939393939394,
"eval_recall_weighted": 0.6893939393939394,
"eval_runtime": 2.1921,
"eval_samples_per_second": 60.216,
"eval_steps_per_second": 7.755,
"step": 1015
},
{
"epoch": 35.0,
"step": 1015,
"total_flos": 1.2531016253190758e+18,
"train_loss": 0.5009635013656627,
"train_runtime": 1007.7292,
"train_samples_per_second": 16.046,
"train_steps_per_second": 1.007
}
],
"logging_steps": 2,
"max_steps": 1015,
"num_input_tokens_seen": 0,
"num_train_epochs": 35,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2531016253190758e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}