v2c_mistral_lora / last-checkpoint /trainer_state.json
mtzig's picture
Training in progress, step 900, checkpoint
960a54b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.6089309878213802,
"eval_steps": 20,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_accuracy": 0.726605504587156,
"eval_f1": 0.11834319526627218,
"eval_loss": 0.6266470551490784,
"eval_precision": 0.5,
"eval_recall": 0.06711409395973154,
"eval_runtime": 53.0676,
"eval_samples_per_second": 5.615,
"eval_steps_per_second": 0.188,
"step": 0
},
{
"epoch": 0.0006765899864682003,
"grad_norm": 1.9097040891647339,
"learning_rate": 1.3513513513513515e-07,
"loss": 0.5346,
"step": 1
},
{
"epoch": 0.0013531799729364006,
"grad_norm": 2.262101173400879,
"learning_rate": 2.702702702702703e-07,
"loss": 0.6432,
"step": 2
},
{
"epoch": 0.0020297699594046007,
"grad_norm": 2.2351596355438232,
"learning_rate": 4.0540540540540546e-07,
"loss": 0.6418,
"step": 3
},
{
"epoch": 0.0027063599458728013,
"grad_norm": 2.1407454013824463,
"learning_rate": 5.405405405405406e-07,
"loss": 0.681,
"step": 4
},
{
"epoch": 0.0033829499323410014,
"grad_norm": 1.836843729019165,
"learning_rate": 6.756756756756758e-07,
"loss": 0.6663,
"step": 5
},
{
"epoch": 0.0040595399188092015,
"grad_norm": 2.4660489559173584,
"learning_rate": 8.108108108108109e-07,
"loss": 0.6643,
"step": 6
},
{
"epoch": 0.004736129905277402,
"grad_norm": 2.2095065116882324,
"learning_rate": 9.459459459459461e-07,
"loss": 0.6107,
"step": 7
},
{
"epoch": 0.005412719891745603,
"grad_norm": 2.3385086059570312,
"learning_rate": 1.0810810810810812e-06,
"loss": 0.6332,
"step": 8
},
{
"epoch": 0.006089309878213802,
"grad_norm": 2.0470025539398193,
"learning_rate": 1.2162162162162164e-06,
"loss": 0.6645,
"step": 9
},
{
"epoch": 0.006765899864682003,
"grad_norm": 2.1129884719848633,
"learning_rate": 1.3513513513513515e-06,
"loss": 0.5937,
"step": 10
},
{
"epoch": 0.007442489851150203,
"grad_norm": 2.343991994857788,
"learning_rate": 1.4864864864864868e-06,
"loss": 0.6274,
"step": 11
},
{
"epoch": 0.008119079837618403,
"grad_norm": 2.254518508911133,
"learning_rate": 1.6216216216216219e-06,
"loss": 0.6133,
"step": 12
},
{
"epoch": 0.008795669824086604,
"grad_norm": 2.3268182277679443,
"learning_rate": 1.756756756756757e-06,
"loss": 0.5994,
"step": 13
},
{
"epoch": 0.009472259810554804,
"grad_norm": 2.1147611141204834,
"learning_rate": 1.8918918918918922e-06,
"loss": 0.6043,
"step": 14
},
{
"epoch": 0.010148849797023005,
"grad_norm": 3.140791654586792,
"learning_rate": 2.0270270270270273e-06,
"loss": 0.6447,
"step": 15
},
{
"epoch": 0.010825439783491205,
"grad_norm": 2.154975175857544,
"learning_rate": 2.1621621621621623e-06,
"loss": 0.6472,
"step": 16
},
{
"epoch": 0.011502029769959404,
"grad_norm": 2.405954599380493,
"learning_rate": 2.297297297297298e-06,
"loss": 0.6622,
"step": 17
},
{
"epoch": 0.012178619756427604,
"grad_norm": 1.8810043334960938,
"learning_rate": 2.432432432432433e-06,
"loss": 0.6463,
"step": 18
},
{
"epoch": 0.012855209742895805,
"grad_norm": 2.251763105392456,
"learning_rate": 2.5675675675675675e-06,
"loss": 0.6118,
"step": 19
},
{
"epoch": 0.013531799729364006,
"grad_norm": 2.2010996341705322,
"learning_rate": 2.702702702702703e-06,
"loss": 0.6566,
"step": 20
},
{
"epoch": 0.013531799729364006,
"eval_accuracy": 0.726605504587156,
"eval_f1": 0.11834319526627218,
"eval_loss": 0.6250319480895996,
"eval_precision": 0.5,
"eval_recall": 0.06711409395973154,
"eval_runtime": 51.8026,
"eval_samples_per_second": 5.753,
"eval_steps_per_second": 0.193,
"step": 20
},
{
"epoch": 0.014208389715832206,
"grad_norm": 2.1348178386688232,
"learning_rate": 2.837837837837838e-06,
"loss": 0.6593,
"step": 21
},
{
"epoch": 0.014884979702300407,
"grad_norm": 2.461346387863159,
"learning_rate": 2.9729729729729736e-06,
"loss": 0.5665,
"step": 22
},
{
"epoch": 0.015561569688768605,
"grad_norm": 1.7864395380020142,
"learning_rate": 3.1081081081081082e-06,
"loss": 0.6044,
"step": 23
},
{
"epoch": 0.016238159675236806,
"grad_norm": 2.120920419692993,
"learning_rate": 3.2432432432432437e-06,
"loss": 0.6494,
"step": 24
},
{
"epoch": 0.016914749661705007,
"grad_norm": 2.293957233428955,
"learning_rate": 3.3783783783783788e-06,
"loss": 0.6729,
"step": 25
},
{
"epoch": 0.017591339648173207,
"grad_norm": 1.9928455352783203,
"learning_rate": 3.513513513513514e-06,
"loss": 0.606,
"step": 26
},
{
"epoch": 0.018267929634641408,
"grad_norm": 1.8565198183059692,
"learning_rate": 3.648648648648649e-06,
"loss": 0.571,
"step": 27
},
{
"epoch": 0.018944519621109608,
"grad_norm": 1.8976123332977295,
"learning_rate": 3.7837837837837844e-06,
"loss": 0.5702,
"step": 28
},
{
"epoch": 0.01962110960757781,
"grad_norm": 2.2150862216949463,
"learning_rate": 3.918918918918919e-06,
"loss": 0.5535,
"step": 29
},
{
"epoch": 0.02029769959404601,
"grad_norm": 2.0916941165924072,
"learning_rate": 4.0540540540540545e-06,
"loss": 0.6707,
"step": 30
},
{
"epoch": 0.02097428958051421,
"grad_norm": 2.0436134338378906,
"learning_rate": 4.189189189189189e-06,
"loss": 0.5966,
"step": 31
},
{
"epoch": 0.02165087956698241,
"grad_norm": 1.8890984058380127,
"learning_rate": 4.324324324324325e-06,
"loss": 0.5533,
"step": 32
},
{
"epoch": 0.022327469553450607,
"grad_norm": 2.0738587379455566,
"learning_rate": 4.45945945945946e-06,
"loss": 0.6128,
"step": 33
},
{
"epoch": 0.023004059539918808,
"grad_norm": 1.9424076080322266,
"learning_rate": 4.594594594594596e-06,
"loss": 0.5763,
"step": 34
},
{
"epoch": 0.02368064952638701,
"grad_norm": 1.7840420007705688,
"learning_rate": 4.72972972972973e-06,
"loss": 0.5632,
"step": 35
},
{
"epoch": 0.02435723951285521,
"grad_norm": 2.2191755771636963,
"learning_rate": 4.864864864864866e-06,
"loss": 0.6482,
"step": 36
},
{
"epoch": 0.02503382949932341,
"grad_norm": 1.925732970237732,
"learning_rate": 5e-06,
"loss": 0.6266,
"step": 37
},
{
"epoch": 0.02571041948579161,
"grad_norm": 1.7854461669921875,
"learning_rate": 5.135135135135135e-06,
"loss": 0.5505,
"step": 38
},
{
"epoch": 0.02638700947225981,
"grad_norm": 1.9672614336013794,
"learning_rate": 5.2702702702702705e-06,
"loss": 0.5851,
"step": 39
},
{
"epoch": 0.02706359945872801,
"grad_norm": 1.713619589805603,
"learning_rate": 5.405405405405406e-06,
"loss": 0.5066,
"step": 40
},
{
"epoch": 0.02706359945872801,
"eval_accuracy": 0.728440366972477,
"eval_f1": 0.11904761904761904,
"eval_loss": 0.6119223237037659,
"eval_precision": 0.5263157894736842,
"eval_recall": 0.06711409395973154,
"eval_runtime": 52.1134,
"eval_samples_per_second": 5.718,
"eval_steps_per_second": 0.192,
"step": 40
},
{
"epoch": 0.02774018944519621,
"grad_norm": 2.512800455093384,
"learning_rate": 5.540540540540541e-06,
"loss": 0.6359,
"step": 41
},
{
"epoch": 0.028416779431664412,
"grad_norm": 2.311678647994995,
"learning_rate": 5.675675675675676e-06,
"loss": 0.5823,
"step": 42
},
{
"epoch": 0.029093369418132613,
"grad_norm": 1.8111237287521362,
"learning_rate": 5.810810810810811e-06,
"loss": 0.5194,
"step": 43
},
{
"epoch": 0.029769959404600813,
"grad_norm": 2.3231632709503174,
"learning_rate": 5.945945945945947e-06,
"loss": 0.6335,
"step": 44
},
{
"epoch": 0.030446549391069014,
"grad_norm": 1.9767159223556519,
"learning_rate": 6.081081081081082e-06,
"loss": 0.5406,
"step": 45
},
{
"epoch": 0.03112313937753721,
"grad_norm": 1.8099788427352905,
"learning_rate": 6.2162162162162164e-06,
"loss": 0.5191,
"step": 46
},
{
"epoch": 0.031799729364005415,
"grad_norm": 1.9105194807052612,
"learning_rate": 6.351351351351351e-06,
"loss": 0.5575,
"step": 47
},
{
"epoch": 0.03247631935047361,
"grad_norm": 1.8297271728515625,
"learning_rate": 6.486486486486487e-06,
"loss": 0.51,
"step": 48
},
{
"epoch": 0.033152909336941816,
"grad_norm": 1.8884862661361694,
"learning_rate": 6.621621621621622e-06,
"loss": 0.5755,
"step": 49
},
{
"epoch": 0.03382949932341001,
"grad_norm": 2.0803935527801514,
"learning_rate": 6.7567567567567575e-06,
"loss": 0.5915,
"step": 50
},
{
"epoch": 0.03450608930987822,
"grad_norm": 2.036954164505005,
"learning_rate": 6.891891891891892e-06,
"loss": 0.5394,
"step": 51
},
{
"epoch": 0.035182679296346414,
"grad_norm": 2.0037217140197754,
"learning_rate": 7.027027027027028e-06,
"loss": 0.4967,
"step": 52
},
{
"epoch": 0.03585926928281461,
"grad_norm": 1.6572487354278564,
"learning_rate": 7.162162162162163e-06,
"loss": 0.5458,
"step": 53
},
{
"epoch": 0.036535859269282815,
"grad_norm": 1.8542054891586304,
"learning_rate": 7.297297297297298e-06,
"loss": 0.4571,
"step": 54
},
{
"epoch": 0.03721244925575101,
"grad_norm": 1.6970975399017334,
"learning_rate": 7.4324324324324324e-06,
"loss": 0.5125,
"step": 55
},
{
"epoch": 0.037889039242219216,
"grad_norm": 1.8225724697113037,
"learning_rate": 7.567567567567569e-06,
"loss": 0.549,
"step": 56
},
{
"epoch": 0.03856562922868741,
"grad_norm": 1.5912785530090332,
"learning_rate": 7.702702702702704e-06,
"loss": 0.4843,
"step": 57
},
{
"epoch": 0.03924221921515562,
"grad_norm": 1.694573998451233,
"learning_rate": 7.837837837837838e-06,
"loss": 0.5804,
"step": 58
},
{
"epoch": 0.039918809201623814,
"grad_norm": 1.6933585405349731,
"learning_rate": 7.972972972972974e-06,
"loss": 0.5306,
"step": 59
},
{
"epoch": 0.04059539918809202,
"grad_norm": 1.7225837707519531,
"learning_rate": 8.108108108108109e-06,
"loss": 0.4866,
"step": 60
},
{
"epoch": 0.04059539918809202,
"eval_accuracy": 0.7376146788990826,
"eval_f1": 0.2011173184357542,
"eval_loss": 0.581759512424469,
"eval_precision": 0.6,
"eval_recall": 0.12080536912751678,
"eval_runtime": 51.4731,
"eval_samples_per_second": 5.789,
"eval_steps_per_second": 0.194,
"step": 60
},
{
"epoch": 0.041271989174560215,
"grad_norm": 1.9804434776306152,
"learning_rate": 8.243243243243245e-06,
"loss": 0.5489,
"step": 61
},
{
"epoch": 0.04194857916102842,
"grad_norm": 2.3419950008392334,
"learning_rate": 8.378378378378378e-06,
"loss": 0.5551,
"step": 62
},
{
"epoch": 0.04262516914749662,
"grad_norm": 2.275982618331909,
"learning_rate": 8.513513513513514e-06,
"loss": 0.5127,
"step": 63
},
{
"epoch": 0.04330175913396482,
"grad_norm": 2.507098913192749,
"learning_rate": 8.64864864864865e-06,
"loss": 0.5736,
"step": 64
},
{
"epoch": 0.04397834912043302,
"grad_norm": 1.8046241998672485,
"learning_rate": 8.783783783783785e-06,
"loss": 0.4755,
"step": 65
},
{
"epoch": 0.044654939106901215,
"grad_norm": 1.8296290636062622,
"learning_rate": 8.91891891891892e-06,
"loss": 0.4999,
"step": 66
},
{
"epoch": 0.04533152909336942,
"grad_norm": 2.3316869735717773,
"learning_rate": 9.054054054054054e-06,
"loss": 0.4797,
"step": 67
},
{
"epoch": 0.046008119079837616,
"grad_norm": 1.6778762340545654,
"learning_rate": 9.189189189189191e-06,
"loss": 0.5238,
"step": 68
},
{
"epoch": 0.04668470906630582,
"grad_norm": 1.8217062950134277,
"learning_rate": 9.324324324324325e-06,
"loss": 0.526,
"step": 69
},
{
"epoch": 0.04736129905277402,
"grad_norm": 2.7135376930236816,
"learning_rate": 9.45945945945946e-06,
"loss": 0.5899,
"step": 70
},
{
"epoch": 0.04803788903924222,
"grad_norm": 1.841891884803772,
"learning_rate": 9.594594594594594e-06,
"loss": 0.5312,
"step": 71
},
{
"epoch": 0.04871447902571042,
"grad_norm": 1.9096564054489136,
"learning_rate": 9.729729729729732e-06,
"loss": 0.5277,
"step": 72
},
{
"epoch": 0.04939106901217862,
"grad_norm": 3.7141664028167725,
"learning_rate": 9.864864864864865e-06,
"loss": 0.5468,
"step": 73
},
{
"epoch": 0.05006765899864682,
"grad_norm": 2.147271156311035,
"learning_rate": 1e-05,
"loss": 0.4658,
"step": 74
},
{
"epoch": 0.05074424898511502,
"grad_norm": 3.2354440689086914,
"learning_rate": 1.0135135135135136e-05,
"loss": 0.4915,
"step": 75
},
{
"epoch": 0.05142083897158322,
"grad_norm": 2.6529741287231445,
"learning_rate": 1.027027027027027e-05,
"loss": 0.5009,
"step": 76
},
{
"epoch": 0.052097428958051424,
"grad_norm": 1.9220309257507324,
"learning_rate": 1.0405405405405407e-05,
"loss": 0.4614,
"step": 77
},
{
"epoch": 0.05277401894451962,
"grad_norm": 2.6269216537475586,
"learning_rate": 1.0540540540540541e-05,
"loss": 0.4909,
"step": 78
},
{
"epoch": 0.05345060893098782,
"grad_norm": 2.8617451190948486,
"learning_rate": 1.0675675675675677e-05,
"loss": 0.5087,
"step": 79
},
{
"epoch": 0.05412719891745602,
"grad_norm": 2.258033275604248,
"learning_rate": 1.0810810810810812e-05,
"loss": 0.4434,
"step": 80
},
{
"epoch": 0.05412719891745602,
"eval_accuracy": 0.7412844036697248,
"eval_f1": 0.3922413793103448,
"eval_loss": 0.5494486689567566,
"eval_precision": 0.5481927710843374,
"eval_recall": 0.3053691275167785,
"eval_runtime": 52.2043,
"eval_samples_per_second": 5.708,
"eval_steps_per_second": 0.192,
"step": 80
},
{
"epoch": 0.05480378890392422,
"grad_norm": 3.6041858196258545,
"learning_rate": 1.0945945945945946e-05,
"loss": 0.4269,
"step": 81
},
{
"epoch": 0.05548037889039242,
"grad_norm": 2.4709510803222656,
"learning_rate": 1.1081081081081081e-05,
"loss": 0.5329,
"step": 82
},
{
"epoch": 0.05615696887686062,
"grad_norm": 2.8416366577148438,
"learning_rate": 1.1216216216216219e-05,
"loss": 0.4599,
"step": 83
},
{
"epoch": 0.056833558863328824,
"grad_norm": 2.6396408081054688,
"learning_rate": 1.1351351351351352e-05,
"loss": 0.4452,
"step": 84
},
{
"epoch": 0.05751014884979702,
"grad_norm": 1.7931419610977173,
"learning_rate": 1.1486486486486488e-05,
"loss": 0.4034,
"step": 85
},
{
"epoch": 0.058186738836265225,
"grad_norm": 2.2836318016052246,
"learning_rate": 1.1621621621621622e-05,
"loss": 0.3732,
"step": 86
},
{
"epoch": 0.05886332882273342,
"grad_norm": 2.0475215911865234,
"learning_rate": 1.1756756756756757e-05,
"loss": 0.4186,
"step": 87
},
{
"epoch": 0.05953991880920163,
"grad_norm": 2.0375993251800537,
"learning_rate": 1.1891891891891894e-05,
"loss": 0.3456,
"step": 88
},
{
"epoch": 0.060216508795669824,
"grad_norm": 3.458310604095459,
"learning_rate": 1.2027027027027028e-05,
"loss": 0.3599,
"step": 89
},
{
"epoch": 0.06089309878213803,
"grad_norm": 2.087979555130005,
"learning_rate": 1.2162162162162164e-05,
"loss": 0.3591,
"step": 90
},
{
"epoch": 0.061569688768606225,
"grad_norm": 2.4800474643707275,
"learning_rate": 1.2297297297297299e-05,
"loss": 0.3947,
"step": 91
},
{
"epoch": 0.06224627875507442,
"grad_norm": 3.9390594959259033,
"learning_rate": 1.2432432432432433e-05,
"loss": 0.4404,
"step": 92
},
{
"epoch": 0.06292286874154263,
"grad_norm": 3.231876850128174,
"learning_rate": 1.2567567567567568e-05,
"loss": 0.4116,
"step": 93
},
{
"epoch": 0.06359945872801083,
"grad_norm": 5.661862373352051,
"learning_rate": 1.2702702702702702e-05,
"loss": 0.4991,
"step": 94
},
{
"epoch": 0.06427604871447902,
"grad_norm": 3.7746121883392334,
"learning_rate": 1.283783783783784e-05,
"loss": 0.5173,
"step": 95
},
{
"epoch": 0.06495263870094722,
"grad_norm": 2.9691073894500732,
"learning_rate": 1.2972972972972975e-05,
"loss": 0.377,
"step": 96
},
{
"epoch": 0.06562922868741543,
"grad_norm": 2.5602574348449707,
"learning_rate": 1.3108108108108109e-05,
"loss": 0.3232,
"step": 97
},
{
"epoch": 0.06630581867388363,
"grad_norm": 3.1697347164154053,
"learning_rate": 1.3243243243243244e-05,
"loss": 0.3596,
"step": 98
},
{
"epoch": 0.06698240866035182,
"grad_norm": 5.4793877601623535,
"learning_rate": 1.3378378378378381e-05,
"loss": 0.3252,
"step": 99
},
{
"epoch": 0.06765899864682003,
"grad_norm": 3.7010715007781982,
"learning_rate": 1.3513513513513515e-05,
"loss": 0.264,
"step": 100
},
{
"epoch": 0.06765899864682003,
"eval_accuracy": 0.7568807339449541,
"eval_f1": 0.40979955456570155,
"eval_loss": 0.5758041143417358,
"eval_precision": 0.609271523178808,
"eval_recall": 0.3087248322147651,
"eval_runtime": 51.8245,
"eval_samples_per_second": 5.75,
"eval_steps_per_second": 0.193,
"step": 100
},
{
"epoch": 0.06833558863328823,
"grad_norm": 2.3830792903900146,
"learning_rate": 1.364864864864865e-05,
"loss": 0.2756,
"step": 101
},
{
"epoch": 0.06901217861975643,
"grad_norm": 2.8554539680480957,
"learning_rate": 1.3783783783783784e-05,
"loss": 0.3233,
"step": 102
},
{
"epoch": 0.06968876860622462,
"grad_norm": 3.331234931945801,
"learning_rate": 1.391891891891892e-05,
"loss": 0.3524,
"step": 103
},
{
"epoch": 0.07036535859269283,
"grad_norm": 2.8779256343841553,
"learning_rate": 1.4054054054054055e-05,
"loss": 0.323,
"step": 104
},
{
"epoch": 0.07104194857916103,
"grad_norm": 2.8393092155456543,
"learning_rate": 1.4189189189189189e-05,
"loss": 0.328,
"step": 105
},
{
"epoch": 0.07171853856562922,
"grad_norm": 3.7622110843658447,
"learning_rate": 1.4324324324324326e-05,
"loss": 0.3449,
"step": 106
},
{
"epoch": 0.07239512855209743,
"grad_norm": 3.734447479248047,
"learning_rate": 1.4459459459459462e-05,
"loss": 0.3367,
"step": 107
},
{
"epoch": 0.07307171853856563,
"grad_norm": 4.105041980743408,
"learning_rate": 1.4594594594594596e-05,
"loss": 0.3038,
"step": 108
},
{
"epoch": 0.07374830852503383,
"grad_norm": 3.9254539012908936,
"learning_rate": 1.4729729729729731e-05,
"loss": 0.2617,
"step": 109
},
{
"epoch": 0.07442489851150202,
"grad_norm": 5.182884693145752,
"learning_rate": 1.4864864864864865e-05,
"loss": 0.3423,
"step": 110
},
{
"epoch": 0.07510148849797023,
"grad_norm": 3.852728843688965,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.246,
"step": 111
},
{
"epoch": 0.07577807848443843,
"grad_norm": 3.291020631790161,
"learning_rate": 1.5135135135135138e-05,
"loss": 0.3383,
"step": 112
},
{
"epoch": 0.07645466847090664,
"grad_norm": 5.644819259643555,
"learning_rate": 1.527027027027027e-05,
"loss": 0.2452,
"step": 113
},
{
"epoch": 0.07713125845737483,
"grad_norm": 6.728042125701904,
"learning_rate": 1.540540540540541e-05,
"loss": 0.2767,
"step": 114
},
{
"epoch": 0.07780784844384303,
"grad_norm": 4.200859546661377,
"learning_rate": 1.554054054054054e-05,
"loss": 0.2707,
"step": 115
},
{
"epoch": 0.07848443843031123,
"grad_norm": 3.9574716091156006,
"learning_rate": 1.5675675675675676e-05,
"loss": 0.2733,
"step": 116
},
{
"epoch": 0.07916102841677942,
"grad_norm": 3.50284743309021,
"learning_rate": 1.581081081081081e-05,
"loss": 0.2615,
"step": 117
},
{
"epoch": 0.07983761840324763,
"grad_norm": 7.720501899719238,
"learning_rate": 1.5945945945945947e-05,
"loss": 0.2353,
"step": 118
},
{
"epoch": 0.08051420838971583,
"grad_norm": 5.794226169586182,
"learning_rate": 1.6081081081081083e-05,
"loss": 0.2454,
"step": 119
},
{
"epoch": 0.08119079837618404,
"grad_norm": 6.7274250984191895,
"learning_rate": 1.6216216216216218e-05,
"loss": 0.2948,
"step": 120
},
{
"epoch": 0.08119079837618404,
"eval_accuracy": 0.7678899082568807,
"eval_f1": 0.3990498812351544,
"eval_loss": 0.7271434664726257,
"eval_precision": 0.6829268292682927,
"eval_recall": 0.28187919463087246,
"eval_runtime": 51.8636,
"eval_samples_per_second": 5.746,
"eval_steps_per_second": 0.193,
"step": 120
},
{
"epoch": 0.08186738836265223,
"grad_norm": 4.321250915527344,
"learning_rate": 1.6351351351351354e-05,
"loss": 0.2774,
"step": 121
},
{
"epoch": 0.08254397834912043,
"grad_norm": 5.205666542053223,
"learning_rate": 1.648648648648649e-05,
"loss": 0.254,
"step": 122
},
{
"epoch": 0.08322056833558863,
"grad_norm": 4.166099548339844,
"learning_rate": 1.662162162162162e-05,
"loss": 0.2455,
"step": 123
},
{
"epoch": 0.08389715832205684,
"grad_norm": 5.376754283905029,
"learning_rate": 1.6756756756756757e-05,
"loss": 0.2982,
"step": 124
},
{
"epoch": 0.08457374830852503,
"grad_norm": 5.893986225128174,
"learning_rate": 1.6891891891891896e-05,
"loss": 0.2632,
"step": 125
},
{
"epoch": 0.08525033829499323,
"grad_norm": 5.461335182189941,
"learning_rate": 1.7027027027027028e-05,
"loss": 0.1964,
"step": 126
},
{
"epoch": 0.08592692828146144,
"grad_norm": 8.870018005371094,
"learning_rate": 1.7162162162162163e-05,
"loss": 0.3057,
"step": 127
},
{
"epoch": 0.08660351826792964,
"grad_norm": 3.8947367668151855,
"learning_rate": 1.72972972972973e-05,
"loss": 0.2715,
"step": 128
},
{
"epoch": 0.08728010825439783,
"grad_norm": 4.829451084136963,
"learning_rate": 1.7432432432432434e-05,
"loss": 0.2395,
"step": 129
},
{
"epoch": 0.08795669824086604,
"grad_norm": 3.4110400676727295,
"learning_rate": 1.756756756756757e-05,
"loss": 0.2363,
"step": 130
},
{
"epoch": 0.08863328822733424,
"grad_norm": 3.4218814373016357,
"learning_rate": 1.7702702702702702e-05,
"loss": 0.2343,
"step": 131
},
{
"epoch": 0.08930987821380243,
"grad_norm": 4.7118425369262695,
"learning_rate": 1.783783783783784e-05,
"loss": 0.2438,
"step": 132
},
{
"epoch": 0.08998646820027063,
"grad_norm": 5.201712608337402,
"learning_rate": 1.7972972972972976e-05,
"loss": 0.3213,
"step": 133
},
{
"epoch": 0.09066305818673884,
"grad_norm": 8.192056655883789,
"learning_rate": 1.8108108108108108e-05,
"loss": 0.3043,
"step": 134
},
{
"epoch": 0.09133964817320704,
"grad_norm": 4.3292694091796875,
"learning_rate": 1.8243243243243244e-05,
"loss": 0.3077,
"step": 135
},
{
"epoch": 0.09201623815967523,
"grad_norm": 9.865090370178223,
"learning_rate": 1.8378378378378383e-05,
"loss": 0.2584,
"step": 136
},
{
"epoch": 0.09269282814614344,
"grad_norm": 7.474611759185791,
"learning_rate": 1.8513513513513515e-05,
"loss": 0.2391,
"step": 137
},
{
"epoch": 0.09336941813261164,
"grad_norm": 3.1332149505615234,
"learning_rate": 1.864864864864865e-05,
"loss": 0.1335,
"step": 138
},
{
"epoch": 0.09404600811907984,
"grad_norm": 4.493014335632324,
"learning_rate": 1.8783783783783786e-05,
"loss": 0.2344,
"step": 139
},
{
"epoch": 0.09472259810554803,
"grad_norm": 5.90848970413208,
"learning_rate": 1.891891891891892e-05,
"loss": 0.2329,
"step": 140
},
{
"epoch": 0.09472259810554803,
"eval_accuracy": 0.7697247706422018,
"eval_f1": 0.35475578406169667,
"eval_loss": 0.5965576767921448,
"eval_precision": 0.7582417582417582,
"eval_recall": 0.23154362416107382,
"eval_runtime": 51.8498,
"eval_samples_per_second": 5.747,
"eval_steps_per_second": 0.193,
"step": 140
},
{
"epoch": 0.09539918809201624,
"grad_norm": 3.0857784748077393,
"learning_rate": 1.9054054054054057e-05,
"loss": 0.1699,
"step": 141
},
{
"epoch": 0.09607577807848444,
"grad_norm": 3.591951370239258,
"learning_rate": 1.918918918918919e-05,
"loss": 0.2166,
"step": 142
},
{
"epoch": 0.09675236806495263,
"grad_norm": 5.5953826904296875,
"learning_rate": 1.9324324324324328e-05,
"loss": 0.1826,
"step": 143
},
{
"epoch": 0.09742895805142084,
"grad_norm": 4.522704601287842,
"learning_rate": 1.9459459459459463e-05,
"loss": 0.2288,
"step": 144
},
{
"epoch": 0.09810554803788904,
"grad_norm": 2.501812219619751,
"learning_rate": 1.9594594594594595e-05,
"loss": 0.1412,
"step": 145
},
{
"epoch": 0.09878213802435724,
"grad_norm": 6.26653528213501,
"learning_rate": 1.972972972972973e-05,
"loss": 0.2609,
"step": 146
},
{
"epoch": 0.09945872801082543,
"grad_norm": 13.06122875213623,
"learning_rate": 1.9864864864864866e-05,
"loss": 0.3233,
"step": 147
},
{
"epoch": 0.10013531799729364,
"grad_norm": 4.477540493011475,
"learning_rate": 2e-05,
"loss": 0.2679,
"step": 148
},
{
"epoch": 0.10081190798376184,
"grad_norm": 5.897082328796387,
"learning_rate": 1.9999972102437076e-05,
"loss": 0.2136,
"step": 149
},
{
"epoch": 0.10148849797023005,
"grad_norm": 4.226516246795654,
"learning_rate": 1.9999888409903948e-05,
"loss": 0.22,
"step": 150
},
{
"epoch": 0.10216508795669824,
"grad_norm": 6.373837471008301,
"learning_rate": 1.9999748922867592e-05,
"loss": 0.2117,
"step": 151
},
{
"epoch": 0.10284167794316644,
"grad_norm": 4.057104110717773,
"learning_rate": 1.9999553642106267e-05,
"loss": 0.2398,
"step": 152
},
{
"epoch": 0.10351826792963464,
"grad_norm": 6.765925884246826,
"learning_rate": 1.9999302568709548e-05,
"loss": 0.2921,
"step": 153
},
{
"epoch": 0.10419485791610285,
"grad_norm": 11.143022537231445,
"learning_rate": 1.9998995704078305e-05,
"loss": 0.2496,
"step": 154
},
{
"epoch": 0.10487144790257104,
"grad_norm": 7.253014087677002,
"learning_rate": 1.9998633049924693e-05,
"loss": 0.1869,
"step": 155
},
{
"epoch": 0.10554803788903924,
"grad_norm": 9.102387428283691,
"learning_rate": 1.9998214608272136e-05,
"loss": 0.2344,
"step": 156
},
{
"epoch": 0.10622462787550745,
"grad_norm": 3.9514195919036865,
"learning_rate": 1.9997740381455348e-05,
"loss": 0.2364,
"step": 157
},
{
"epoch": 0.10690121786197564,
"grad_norm": 5.509130954742432,
"learning_rate": 1.9997210372120276e-05,
"loss": 0.2863,
"step": 158
},
{
"epoch": 0.10757780784844384,
"grad_norm": 3.954360246658325,
"learning_rate": 1.9996624583224112e-05,
"loss": 0.1248,
"step": 159
},
{
"epoch": 0.10825439783491204,
"grad_norm": 3.0605578422546387,
"learning_rate": 1.999598301803528e-05,
"loss": 0.1726,
"step": 160
},
{
"epoch": 0.10825439783491204,
"eval_accuracy": 0.763302752293578,
"eval_f1": 0.31382978723404253,
"eval_loss": 0.5946537256240845,
"eval_precision": 0.7564102564102564,
"eval_recall": 0.19798657718120805,
"eval_runtime": 51.9229,
"eval_samples_per_second": 5.739,
"eval_steps_per_second": 0.193,
"step": 160
},
{
"epoch": 0.10893098782138025,
"grad_norm": 4.9909281730651855,
"learning_rate": 1.9995285680133393e-05,
"loss": 0.2449,
"step": 161
},
{
"epoch": 0.10960757780784844,
"grad_norm": 2.7885420322418213,
"learning_rate": 1.999453257340926e-05,
"loss": 0.1239,
"step": 162
},
{
"epoch": 0.11028416779431664,
"grad_norm": 4.381866931915283,
"learning_rate": 1.9993723702064852e-05,
"loss": 0.146,
"step": 163
},
{
"epoch": 0.11096075778078485,
"grad_norm": 7.0832109451293945,
"learning_rate": 1.9992859070613275e-05,
"loss": 0.2178,
"step": 164
},
{
"epoch": 0.11163734776725305,
"grad_norm": 4.502629280090332,
"learning_rate": 1.9991938683878746e-05,
"loss": 0.2039,
"step": 165
},
{
"epoch": 0.11231393775372124,
"grad_norm": 3.46604323387146,
"learning_rate": 1.9990962546996583e-05,
"loss": 0.1235,
"step": 166
},
{
"epoch": 0.11299052774018944,
"grad_norm": 2.314317464828491,
"learning_rate": 1.9989930665413148e-05,
"loss": 0.1033,
"step": 167
},
{
"epoch": 0.11366711772665765,
"grad_norm": 5.851840019226074,
"learning_rate": 1.998884304488584e-05,
"loss": 0.2414,
"step": 168
},
{
"epoch": 0.11434370771312584,
"grad_norm": 6.2724714279174805,
"learning_rate": 1.998769969148305e-05,
"loss": 0.2474,
"step": 169
},
{
"epoch": 0.11502029769959404,
"grad_norm": 3.0591259002685547,
"learning_rate": 1.9986500611584133e-05,
"loss": 0.1661,
"step": 170
},
{
"epoch": 0.11569688768606225,
"grad_norm": 4.147556304931641,
"learning_rate": 1.9985245811879372e-05,
"loss": 0.1855,
"step": 171
},
{
"epoch": 0.11637347767253045,
"grad_norm": 4.872109413146973,
"learning_rate": 1.9983935299369934e-05,
"loss": 0.2505,
"step": 172
},
{
"epoch": 0.11705006765899864,
"grad_norm": 10.929080963134766,
"learning_rate": 1.9982569081367844e-05,
"loss": 0.238,
"step": 173
},
{
"epoch": 0.11772665764546685,
"grad_norm": 9.166586875915527,
"learning_rate": 1.998114716549593e-05,
"loss": 0.2415,
"step": 174
},
{
"epoch": 0.11840324763193505,
"grad_norm": 4.646167278289795,
"learning_rate": 1.997966955968779e-05,
"loss": 0.1264,
"step": 175
},
{
"epoch": 0.11907983761840325,
"grad_norm": 4.666916847229004,
"learning_rate": 1.9978136272187745e-05,
"loss": 0.178,
"step": 176
},
{
"epoch": 0.11975642760487144,
"grad_norm": 7.303848743438721,
"learning_rate": 1.9976547311550796e-05,
"loss": 0.2303,
"step": 177
},
{
"epoch": 0.12043301759133965,
"grad_norm": 5.617541313171387,
"learning_rate": 1.997490268664256e-05,
"loss": 0.1295,
"step": 178
},
{
"epoch": 0.12110960757780785,
"grad_norm": 7.912723541259766,
"learning_rate": 1.9973202406639247e-05,
"loss": 0.2137,
"step": 179
},
{
"epoch": 0.12178619756427606,
"grad_norm": 3.9384965896606445,
"learning_rate": 1.997144648102759e-05,
"loss": 0.1085,
"step": 180
},
{
"epoch": 0.12178619756427606,
"eval_accuracy": 0.7678899082568807,
"eval_f1": 0.3394255874673629,
"eval_loss": 0.558770477771759,
"eval_precision": 0.7647058823529411,
"eval_recall": 0.2181208053691275,
"eval_runtime": 52.2836,
"eval_samples_per_second": 5.7,
"eval_steps_per_second": 0.191,
"step": 180
},
{
"epoch": 0.12246278755074425,
"grad_norm": 4.896997928619385,
"learning_rate": 1.99696349196048e-05,
"loss": 0.2525,
"step": 181
},
{
"epoch": 0.12313937753721245,
"grad_norm": 2.2250826358795166,
"learning_rate": 1.9967767732478506e-05,
"loss": 0.1442,
"step": 182
},
{
"epoch": 0.12381596752368065,
"grad_norm": 5.748762607574463,
"learning_rate": 1.99658449300667e-05,
"loss": 0.3173,
"step": 183
},
{
"epoch": 0.12449255751014884,
"grad_norm": 3.4051263332366943,
"learning_rate": 1.9963866523097683e-05,
"loss": 0.2134,
"step": 184
},
{
"epoch": 0.12516914749661706,
"grad_norm": 3.8892011642456055,
"learning_rate": 1.9961832522610004e-05,
"loss": 0.2136,
"step": 185
},
{
"epoch": 0.12584573748308525,
"grad_norm": 5.042850017547607,
"learning_rate": 1.9959742939952393e-05,
"loss": 0.1986,
"step": 186
},
{
"epoch": 0.12652232746955344,
"grad_norm": 7.566000461578369,
"learning_rate": 1.99575977867837e-05,
"loss": 0.2481,
"step": 187
},
{
"epoch": 0.12719891745602166,
"grad_norm": 5.193778991699219,
"learning_rate": 1.995539707507284e-05,
"loss": 0.2304,
"step": 188
},
{
"epoch": 0.12787550744248985,
"grad_norm": 4.714810371398926,
"learning_rate": 1.99531408170987e-05,
"loss": 0.2234,
"step": 189
},
{
"epoch": 0.12855209742895804,
"grad_norm": 4.679834842681885,
"learning_rate": 1.9950829025450116e-05,
"loss": 0.2152,
"step": 190
},
{
"epoch": 0.12922868741542626,
"grad_norm": 2.8689143657684326,
"learning_rate": 1.994846171302575e-05,
"loss": 0.1938,
"step": 191
},
{
"epoch": 0.12990527740189445,
"grad_norm": 3.1976468563079834,
"learning_rate": 1.9946038893034045e-05,
"loss": 0.1858,
"step": 192
},
{
"epoch": 0.13058186738836267,
"grad_norm": 3.2573113441467285,
"learning_rate": 1.994356057899317e-05,
"loss": 0.1333,
"step": 193
},
{
"epoch": 0.13125845737483086,
"grad_norm": 6.062759876251221,
"learning_rate": 1.9941026784730898e-05,
"loss": 0.2143,
"step": 194
},
{
"epoch": 0.13193504736129905,
"grad_norm": 3.474382162094116,
"learning_rate": 1.9938437524384572e-05,
"loss": 0.2385,
"step": 195
},
{
"epoch": 0.13261163734776726,
"grad_norm": 4.171142101287842,
"learning_rate": 1.9935792812400997e-05,
"loss": 0.2212,
"step": 196
},
{
"epoch": 0.13328822733423545,
"grad_norm": 2.72599720954895,
"learning_rate": 1.9933092663536384e-05,
"loss": 0.1579,
"step": 197
},
{
"epoch": 0.13396481732070364,
"grad_norm": 6.6125102043151855,
"learning_rate": 1.9930337092856243e-05,
"loss": 0.2187,
"step": 198
},
{
"epoch": 0.13464140730717186,
"grad_norm": 2.2951035499572754,
"learning_rate": 1.9927526115735315e-05,
"loss": 0.1567,
"step": 199
},
{
"epoch": 0.13531799729364005,
"grad_norm": 4.760623931884766,
"learning_rate": 1.9924659747857485e-05,
"loss": 0.2326,
"step": 200
},
{
"epoch": 0.13531799729364005,
"eval_accuracy": 0.7623853211009174,
"eval_f1": 0.3508771929824561,
"eval_loss": 0.5019528865814209,
"eval_precision": 0.693069306930693,
"eval_recall": 0.2348993288590604,
"eval_runtime": 51.9146,
"eval_samples_per_second": 5.74,
"eval_steps_per_second": 0.193,
"step": 200
},
{
"epoch": 0.13599458728010824,
"grad_norm": 2.476130247116089,
"learning_rate": 1.9921738005215687e-05,
"loss": 0.0617,
"step": 201
},
{
"epoch": 0.13667117726657646,
"grad_norm": 7.735743045806885,
"learning_rate": 1.9918760904111818e-05,
"loss": 0.252,
"step": 202
},
{
"epoch": 0.13734776725304465,
"grad_norm": 3.769490957260132,
"learning_rate": 1.991572846115666e-05,
"loss": 0.1439,
"step": 203
},
{
"epoch": 0.13802435723951287,
"grad_norm": 4.381724834442139,
"learning_rate": 1.9912640693269754e-05,
"loss": 0.2143,
"step": 204
},
{
"epoch": 0.13870094722598106,
"grad_norm": 3.511615037918091,
"learning_rate": 1.990949761767935e-05,
"loss": 0.1747,
"step": 205
},
{
"epoch": 0.13937753721244925,
"grad_norm": 5.647243499755859,
"learning_rate": 1.9906299251922273e-05,
"loss": 0.1904,
"step": 206
},
{
"epoch": 0.14005412719891747,
"grad_norm": 3.9160757064819336,
"learning_rate": 1.9903045613843844e-05,
"loss": 0.1816,
"step": 207
},
{
"epoch": 0.14073071718538566,
"grad_norm": 5.033432483673096,
"learning_rate": 1.9899736721597787e-05,
"loss": 0.141,
"step": 208
},
{
"epoch": 0.14140730717185385,
"grad_norm": 4.042255401611328,
"learning_rate": 1.9896372593646095e-05,
"loss": 0.2043,
"step": 209
},
{
"epoch": 0.14208389715832206,
"grad_norm": 7.3899149894714355,
"learning_rate": 1.989295324875897e-05,
"loss": 0.2182,
"step": 210
},
{
"epoch": 0.14276048714479025,
"grad_norm": 9.090564727783203,
"learning_rate": 1.9889478706014687e-05,
"loss": 0.2333,
"step": 211
},
{
"epoch": 0.14343707713125844,
"grad_norm": 4.3934526443481445,
"learning_rate": 1.9885948984799502e-05,
"loss": 0.2836,
"step": 212
},
{
"epoch": 0.14411366711772666,
"grad_norm": 3.89416766166687,
"learning_rate": 1.9882364104807536e-05,
"loss": 0.2256,
"step": 213
},
{
"epoch": 0.14479025710419485,
"grad_norm": 6.6705241203308105,
"learning_rate": 1.987872408604068e-05,
"loss": 0.2007,
"step": 214
},
{
"epoch": 0.14546684709066307,
"grad_norm": 9.201639175415039,
"learning_rate": 1.9875028948808457e-05,
"loss": 0.217,
"step": 215
},
{
"epoch": 0.14614343707713126,
"grad_norm": 3.3696377277374268,
"learning_rate": 1.9871278713727932e-05,
"loss": 0.1132,
"step": 216
},
{
"epoch": 0.14682002706359945,
"grad_norm": 3.9588944911956787,
"learning_rate": 1.9867473401723595e-05,
"loss": 0.2086,
"step": 217
},
{
"epoch": 0.14749661705006767,
"grad_norm": 5.10556173324585,
"learning_rate": 1.9863613034027224e-05,
"loss": 0.1367,
"step": 218
},
{
"epoch": 0.14817320703653586,
"grad_norm": 3.8839104175567627,
"learning_rate": 1.9859697632177796e-05,
"loss": 0.1882,
"step": 219
},
{
"epoch": 0.14884979702300405,
"grad_norm": 3.7395753860473633,
"learning_rate": 1.985572721802134e-05,
"loss": 0.1228,
"step": 220
},
{
"epoch": 0.14884979702300405,
"eval_accuracy": 0.763302752293578,
"eval_f1": 0.3316062176165803,
"eval_loss": 0.49911028146743774,
"eval_precision": 0.7272727272727273,
"eval_recall": 0.21476510067114093,
"eval_runtime": 53.4724,
"eval_samples_per_second": 5.573,
"eval_steps_per_second": 0.187,
"step": 220
},
{
"epoch": 0.14952638700947227,
"grad_norm": 2.52254056930542,
"learning_rate": 1.9851701813710838e-05,
"loss": 0.1429,
"step": 221
},
{
"epoch": 0.15020297699594046,
"grad_norm": 2.212614059448242,
"learning_rate": 1.9847621441706076e-05,
"loss": 0.0924,
"step": 222
},
{
"epoch": 0.15087956698240865,
"grad_norm": 5.361288547515869,
"learning_rate": 1.9843486124773546e-05,
"loss": 0.1915,
"step": 223
},
{
"epoch": 0.15155615696887687,
"grad_norm": 5.06777286529541,
"learning_rate": 1.98392958859863e-05,
"loss": 0.2049,
"step": 224
},
{
"epoch": 0.15223274695534506,
"grad_norm": 2.5112369060516357,
"learning_rate": 1.9835050748723826e-05,
"loss": 0.1132,
"step": 225
},
{
"epoch": 0.15290933694181327,
"grad_norm": 3.829697847366333,
"learning_rate": 1.9830750736671923e-05,
"loss": 0.1766,
"step": 226
},
{
"epoch": 0.15358592692828146,
"grad_norm": 6.517053604125977,
"learning_rate": 1.982639587382256e-05,
"loss": 0.2742,
"step": 227
},
{
"epoch": 0.15426251691474965,
"grad_norm": 3.8287339210510254,
"learning_rate": 1.9821986184473757e-05,
"loss": 0.1686,
"step": 228
},
{
"epoch": 0.15493910690121787,
"grad_norm": 3.6989524364471436,
"learning_rate": 1.981752169322942e-05,
"loss": 0.1286,
"step": 229
},
{
"epoch": 0.15561569688768606,
"grad_norm": 4.2301788330078125,
"learning_rate": 1.981300242499924e-05,
"loss": 0.1242,
"step": 230
},
{
"epoch": 0.15629228687415425,
"grad_norm": 5.749031066894531,
"learning_rate": 1.9808428404998532e-05,
"loss": 0.2348,
"step": 231
},
{
"epoch": 0.15696887686062247,
"grad_norm": 4.1858744621276855,
"learning_rate": 1.9803799658748096e-05,
"loss": 0.1809,
"step": 232
},
{
"epoch": 0.15764546684709066,
"grad_norm": 2.808894157409668,
"learning_rate": 1.9799116212074077e-05,
"loss": 0.1228,
"step": 233
},
{
"epoch": 0.15832205683355885,
"grad_norm": 4.898924350738525,
"learning_rate": 1.9794378091107834e-05,
"loss": 0.1964,
"step": 234
},
{
"epoch": 0.15899864682002707,
"grad_norm": 4.328680038452148,
"learning_rate": 1.978958532228576e-05,
"loss": 0.1566,
"step": 235
},
{
"epoch": 0.15967523680649526,
"grad_norm": 4.020467758178711,
"learning_rate": 1.978473793234918e-05,
"loss": 0.2254,
"step": 236
},
{
"epoch": 0.16035182679296348,
"grad_norm": 2.9529521465301514,
"learning_rate": 1.977983594834416e-05,
"loss": 0.1425,
"step": 237
},
{
"epoch": 0.16102841677943167,
"grad_norm": 3.5832724571228027,
"learning_rate": 1.9774879397621387e-05,
"loss": 0.1848,
"step": 238
},
{
"epoch": 0.16170500676589986,
"grad_norm": 6.061310768127441,
"learning_rate": 1.9769868307835996e-05,
"loss": 0.1344,
"step": 239
},
{
"epoch": 0.16238159675236807,
"grad_norm": 4.559755325317383,
"learning_rate": 1.9764802706947423e-05,
"loss": 0.1678,
"step": 240
},
{
"epoch": 0.16238159675236807,
"eval_accuracy": 0.763302752293578,
"eval_f1": 0.32105263157894737,
"eval_loss": 0.5306172966957092,
"eval_precision": 0.7439024390243902,
"eval_recall": 0.20469798657718122,
"eval_runtime": 52.6664,
"eval_samples_per_second": 5.658,
"eval_steps_per_second": 0.19,
"step": 240
},
{
"epoch": 0.16305818673883626,
"grad_norm": 4.202253818511963,
"learning_rate": 1.975968262321925e-05,
"loss": 0.1828,
"step": 241
},
{
"epoch": 0.16373477672530445,
"grad_norm": 4.026851654052734,
"learning_rate": 1.9754508085219057e-05,
"loss": 0.2173,
"step": 242
},
{
"epoch": 0.16441136671177267,
"grad_norm": 3.136077404022217,
"learning_rate": 1.9749279121818235e-05,
"loss": 0.1614,
"step": 243
},
{
"epoch": 0.16508795669824086,
"grad_norm": 3.726810932159424,
"learning_rate": 1.974399576219186e-05,
"loss": 0.1667,
"step": 244
},
{
"epoch": 0.16576454668470908,
"grad_norm": 5.430721282958984,
"learning_rate": 1.9738658035818495e-05,
"loss": 0.2158,
"step": 245
},
{
"epoch": 0.16644113667117727,
"grad_norm": 2.940300226211548,
"learning_rate": 1.973326597248006e-05,
"loss": 0.1664,
"step": 246
},
{
"epoch": 0.16711772665764546,
"grad_norm": 3.436058282852173,
"learning_rate": 1.972781960226163e-05,
"loss": 0.1944,
"step": 247
},
{
"epoch": 0.16779431664411368,
"grad_norm": 3.708514928817749,
"learning_rate": 1.9722318955551307e-05,
"loss": 0.1869,
"step": 248
},
{
"epoch": 0.16847090663058187,
"grad_norm": 5.172970771789551,
"learning_rate": 1.971676406304001e-05,
"loss": 0.219,
"step": 249
},
{
"epoch": 0.16914749661705006,
"grad_norm": 4.198179721832275,
"learning_rate": 1.9711154955721338e-05,
"loss": 0.1606,
"step": 250
},
{
"epoch": 0.16982408660351828,
"grad_norm": 4.068305969238281,
"learning_rate": 1.9705491664891368e-05,
"loss": 0.1952,
"step": 251
},
{
"epoch": 0.17050067658998647,
"grad_norm": 4.898702144622803,
"learning_rate": 1.969977422214851e-05,
"loss": 0.1904,
"step": 252
},
{
"epoch": 0.17117726657645466,
"grad_norm": 3.666128158569336,
"learning_rate": 1.9694002659393306e-05,
"loss": 0.177,
"step": 253
},
{
"epoch": 0.17185385656292287,
"grad_norm": 3.7936861515045166,
"learning_rate": 1.968817700882826e-05,
"loss": 0.1003,
"step": 254
},
{
"epoch": 0.17253044654939106,
"grad_norm": 3.3366808891296387,
"learning_rate": 1.9682297302957666e-05,
"loss": 0.1729,
"step": 255
},
{
"epoch": 0.17320703653585928,
"grad_norm": 4.625013828277588,
"learning_rate": 1.9676363574587414e-05,
"loss": 0.2212,
"step": 256
},
{
"epoch": 0.17388362652232747,
"grad_norm": 4.048298358917236,
"learning_rate": 1.9670375856824823e-05,
"loss": 0.1301,
"step": 257
},
{
"epoch": 0.17456021650879566,
"grad_norm": 3.388268232345581,
"learning_rate": 1.966433418307843e-05,
"loss": 0.091,
"step": 258
},
{
"epoch": 0.17523680649526388,
"grad_norm": 3.287910223007202,
"learning_rate": 1.9658238587057832e-05,
"loss": 0.1748,
"step": 259
},
{
"epoch": 0.17591339648173207,
"grad_norm": 4.814307689666748,
"learning_rate": 1.9652089102773487e-05,
"loss": 0.2111,
"step": 260
},
{
"epoch": 0.17591339648173207,
"eval_accuracy": 0.763302752293578,
"eval_f1": 0.29120879120879123,
"eval_loss": 0.5758374333381653,
"eval_precision": 0.803030303030303,
"eval_recall": 0.17785234899328858,
"eval_runtime": 52.5901,
"eval_samples_per_second": 5.666,
"eval_steps_per_second": 0.19,
"step": 260
},
{
"epoch": 0.17658998646820026,
"grad_norm": 6.4831461906433105,
"learning_rate": 1.9645885764536522e-05,
"loss": 0.2182,
"step": 261
},
{
"epoch": 0.17726657645466848,
"grad_norm": 6.477516174316406,
"learning_rate": 1.9639628606958535e-05,
"loss": 0.2462,
"step": 262
},
{
"epoch": 0.17794316644113667,
"grad_norm": 3.73384690284729,
"learning_rate": 1.9633317664951418e-05,
"loss": 0.1425,
"step": 263
},
{
"epoch": 0.17861975642760486,
"grad_norm": 4.063915252685547,
"learning_rate": 1.962695297372715e-05,
"loss": 0.1388,
"step": 264
},
{
"epoch": 0.17929634641407308,
"grad_norm": 4.379425048828125,
"learning_rate": 1.962053456879761e-05,
"loss": 0.1929,
"step": 265
},
{
"epoch": 0.17997293640054127,
"grad_norm": 3.696601629257202,
"learning_rate": 1.9614062485974364e-05,
"loss": 0.1795,
"step": 266
},
{
"epoch": 0.18064952638700948,
"grad_norm": 4.814270973205566,
"learning_rate": 1.9607536761368484e-05,
"loss": 0.1906,
"step": 267
},
{
"epoch": 0.18132611637347767,
"grad_norm": 4.517858028411865,
"learning_rate": 1.960095743139033e-05,
"loss": 0.1902,
"step": 268
},
{
"epoch": 0.18200270635994586,
"grad_norm": 4.473437309265137,
"learning_rate": 1.9594324532749353e-05,
"loss": 0.1581,
"step": 269
},
{
"epoch": 0.18267929634641408,
"grad_norm": 6.359562873840332,
"learning_rate": 1.95876381024539e-05,
"loss": 0.2641,
"step": 270
},
{
"epoch": 0.18335588633288227,
"grad_norm": 5.020825386047363,
"learning_rate": 1.958089817781099e-05,
"loss": 0.1734,
"step": 271
},
{
"epoch": 0.18403247631935046,
"grad_norm": 5.725416660308838,
"learning_rate": 1.9574104796426124e-05,
"loss": 0.2771,
"step": 272
},
{
"epoch": 0.18470906630581868,
"grad_norm": 3.2466204166412354,
"learning_rate": 1.956725799620305e-05,
"loss": 0.1493,
"step": 273
},
{
"epoch": 0.18538565629228687,
"grad_norm": 4.315434455871582,
"learning_rate": 1.9560357815343577e-05,
"loss": 0.1879,
"step": 274
},
{
"epoch": 0.18606224627875506,
"grad_norm": 4.0321245193481445,
"learning_rate": 1.9553404292347356e-05,
"loss": 0.1276,
"step": 275
},
{
"epoch": 0.18673883626522328,
"grad_norm": 3.7112905979156494,
"learning_rate": 1.9546397466011654e-05,
"loss": 0.173,
"step": 276
},
{
"epoch": 0.18741542625169147,
"grad_norm": 5.85778284072876,
"learning_rate": 1.9539337375431144e-05,
"loss": 0.3178,
"step": 277
},
{
"epoch": 0.1880920162381597,
"grad_norm": 5.455870151519775,
"learning_rate": 1.9532224059997693e-05,
"loss": 0.1971,
"step": 278
},
{
"epoch": 0.18876860622462788,
"grad_norm": 3.5191891193389893,
"learning_rate": 1.9525057559400134e-05,
"loss": 0.165,
"step": 279
},
{
"epoch": 0.18944519621109607,
"grad_norm": 4.080350399017334,
"learning_rate": 1.9517837913624048e-05,
"loss": 0.115,
"step": 280
},
{
"epoch": 0.18944519621109607,
"eval_accuracy": 0.7706422018348624,
"eval_f1": 0.3315508021390374,
"eval_loss": 0.49153777956962585,
"eval_precision": 0.8157894736842105,
"eval_recall": 0.2080536912751678,
"eval_runtime": 53.3463,
"eval_samples_per_second": 5.586,
"eval_steps_per_second": 0.187,
"step": 280
},
{
"epoch": 0.19012178619756429,
"grad_norm": 3.4508748054504395,
"learning_rate": 1.9510565162951538e-05,
"loss": 0.1461,
"step": 281
},
{
"epoch": 0.19079837618403248,
"grad_norm": 5.258754253387451,
"learning_rate": 1.9503239347961006e-05,
"loss": 0.2396,
"step": 282
},
{
"epoch": 0.19147496617050067,
"grad_norm": 5.140385627746582,
"learning_rate": 1.9495860509526935e-05,
"loss": 0.1444,
"step": 283
},
{
"epoch": 0.19215155615696888,
"grad_norm": 1.9280897378921509,
"learning_rate": 1.948842868881964e-05,
"loss": 0.1426,
"step": 284
},
{
"epoch": 0.19282814614343707,
"grad_norm": 2.6845431327819824,
"learning_rate": 1.948094392730506e-05,
"loss": 0.1702,
"step": 285
},
{
"epoch": 0.19350473612990526,
"grad_norm": 2.7291038036346436,
"learning_rate": 1.9473406266744518e-05,
"loss": 0.1525,
"step": 286
},
{
"epoch": 0.19418132611637348,
"grad_norm": 2.8851161003112793,
"learning_rate": 1.9465815749194482e-05,
"loss": 0.1419,
"step": 287
},
{
"epoch": 0.19485791610284167,
"grad_norm": 3.973231554031372,
"learning_rate": 1.9458172417006347e-05,
"loss": 0.1782,
"step": 288
},
{
"epoch": 0.1955345060893099,
"grad_norm": 5.708676338195801,
"learning_rate": 1.9450476312826178e-05,
"loss": 0.1396,
"step": 289
},
{
"epoch": 0.19621109607577808,
"grad_norm": 3.4198830127716064,
"learning_rate": 1.9442727479594486e-05,
"loss": 0.1762,
"step": 290
},
{
"epoch": 0.19688768606224627,
"grad_norm": 5.898075103759766,
"learning_rate": 1.9434925960545978e-05,
"loss": 0.213,
"step": 291
},
{
"epoch": 0.1975642760487145,
"grad_norm": 5.121380805969238,
"learning_rate": 1.9427071799209335e-05,
"loss": 0.2684,
"step": 292
},
{
"epoch": 0.19824086603518268,
"grad_norm": 5.1736931800842285,
"learning_rate": 1.941916503940694e-05,
"loss": 0.2272,
"step": 293
},
{
"epoch": 0.19891745602165087,
"grad_norm": 3.988576650619507,
"learning_rate": 1.941120572525467e-05,
"loss": 0.2007,
"step": 294
},
{
"epoch": 0.19959404600811909,
"grad_norm": 6.444464683532715,
"learning_rate": 1.9403193901161614e-05,
"loss": 0.2243,
"step": 295
},
{
"epoch": 0.20027063599458728,
"grad_norm": 3.4448323249816895,
"learning_rate": 1.9395129611829844e-05,
"loss": 0.1175,
"step": 296
},
{
"epoch": 0.2009472259810555,
"grad_norm": 7.464962005615234,
"learning_rate": 1.9387012902254165e-05,
"loss": 0.2362,
"step": 297
},
{
"epoch": 0.20162381596752368,
"grad_norm": 4.778014183044434,
"learning_rate": 1.9378843817721856e-05,
"loss": 0.1657,
"step": 298
},
{
"epoch": 0.20230040595399187,
"grad_norm": 4.121883392333984,
"learning_rate": 1.937062240381243e-05,
"loss": 0.1339,
"step": 299
},
{
"epoch": 0.2029769959404601,
"grad_norm": 5.5182576179504395,
"learning_rate": 1.9362348706397374e-05,
"loss": 0.1785,
"step": 300
},
{
"epoch": 0.2029769959404601,
"eval_accuracy": 0.773394495412844,
"eval_f1": 0.35170603674540685,
"eval_loss": 0.5283112525939941,
"eval_precision": 0.8072289156626506,
"eval_recall": 0.22483221476510068,
"eval_runtime": 51.5451,
"eval_samples_per_second": 5.781,
"eval_steps_per_second": 0.194,
"step": 300
},
{
"epoch": 0.20365358592692828,
"grad_norm": 5.575674057006836,
"learning_rate": 1.935402277163988e-05,
"loss": 0.1904,
"step": 301
},
{
"epoch": 0.20433017591339647,
"grad_norm": 4.0572829246521,
"learning_rate": 1.934564464599461e-05,
"loss": 0.1882,
"step": 302
},
{
"epoch": 0.2050067658998647,
"grad_norm": 3.6547298431396484,
"learning_rate": 1.9337214376207417e-05,
"loss": 0.2039,
"step": 303
},
{
"epoch": 0.20568335588633288,
"grad_norm": 4.347687721252441,
"learning_rate": 1.9328732009315107e-05,
"loss": 0.169,
"step": 304
},
{
"epoch": 0.20635994587280107,
"grad_norm": 4.556861400604248,
"learning_rate": 1.932019759264514e-05,
"loss": 0.203,
"step": 305
},
{
"epoch": 0.2070365358592693,
"grad_norm": 5.285150527954102,
"learning_rate": 1.931161117381541e-05,
"loss": 0.1871,
"step": 306
},
{
"epoch": 0.20771312584573748,
"grad_norm": 3.5677342414855957,
"learning_rate": 1.9302972800733945e-05,
"loss": 0.1735,
"step": 307
},
{
"epoch": 0.2083897158322057,
"grad_norm": 2.768244743347168,
"learning_rate": 1.929428252159866e-05,
"loss": 0.1313,
"step": 308
},
{
"epoch": 0.2090663058186739,
"grad_norm": 3.212564706802368,
"learning_rate": 1.9285540384897073e-05,
"loss": 0.1884,
"step": 309
},
{
"epoch": 0.20974289580514208,
"grad_norm": 3.121872901916504,
"learning_rate": 1.9276746439406046e-05,
"loss": 0.2129,
"step": 310
},
{
"epoch": 0.2104194857916103,
"grad_norm": 3.6964709758758545,
"learning_rate": 1.9267900734191515e-05,
"loss": 0.2355,
"step": 311
},
{
"epoch": 0.21109607577807848,
"grad_norm": 3.5150861740112305,
"learning_rate": 1.9259003318608192e-05,
"loss": 0.1878,
"step": 312
},
{
"epoch": 0.21177266576454667,
"grad_norm": 3.9340460300445557,
"learning_rate": 1.925005424229933e-05,
"loss": 0.15,
"step": 313
},
{
"epoch": 0.2124492557510149,
"grad_norm": 8.675088882446289,
"learning_rate": 1.9241053555196405e-05,
"loss": 0.2527,
"step": 314
},
{
"epoch": 0.21312584573748308,
"grad_norm": 7.502621173858643,
"learning_rate": 1.923200130751887e-05,
"loss": 0.2089,
"step": 315
},
{
"epoch": 0.21380243572395127,
"grad_norm": 2.9825868606567383,
"learning_rate": 1.922289754977385e-05,
"loss": 0.1341,
"step": 316
},
{
"epoch": 0.2144790257104195,
"grad_norm": 5.308103084564209,
"learning_rate": 1.9213742332755877e-05,
"loss": 0.1872,
"step": 317
},
{
"epoch": 0.21515561569688768,
"grad_norm": 5.801865577697754,
"learning_rate": 1.9204535707546602e-05,
"loss": 0.2068,
"step": 318
},
{
"epoch": 0.2158322056833559,
"grad_norm": 4.229133605957031,
"learning_rate": 1.919527772551451e-05,
"loss": 0.1741,
"step": 319
},
{
"epoch": 0.2165087956698241,
"grad_norm": 2.5876822471618652,
"learning_rate": 1.918596843831462e-05,
"loss": 0.1221,
"step": 320
},
{
"epoch": 0.2165087956698241,
"eval_accuracy": 0.771559633027523,
"eval_f1": 0.3178082191780822,
"eval_loss": 0.49765169620513916,
"eval_precision": 0.8656716417910447,
"eval_recall": 0.19463087248322147,
"eval_runtime": 51.4716,
"eval_samples_per_second": 5.79,
"eval_steps_per_second": 0.194,
"step": 320
},
{
"epoch": 0.21718538565629228,
"grad_norm": 3.950688600540161,
"learning_rate": 1.9176607897888217e-05,
"loss": 0.1415,
"step": 321
},
{
"epoch": 0.2178619756427605,
"grad_norm": 2.8493704795837402,
"learning_rate": 1.916719615646256e-05,
"loss": 0.1171,
"step": 322
},
{
"epoch": 0.2185385656292287,
"grad_norm": 2.8084137439727783,
"learning_rate": 1.9157733266550577e-05,
"loss": 0.1774,
"step": 323
},
{
"epoch": 0.21921515561569688,
"grad_norm": 3.1478347778320312,
"learning_rate": 1.914821928095058e-05,
"loss": 0.1816,
"step": 324
},
{
"epoch": 0.2198917456021651,
"grad_norm": 3.8505070209503174,
"learning_rate": 1.913865425274597e-05,
"loss": 0.1413,
"step": 325
},
{
"epoch": 0.22056833558863329,
"grad_norm": 4.398627281188965,
"learning_rate": 1.9129038235304946e-05,
"loss": 0.1745,
"step": 326
},
{
"epoch": 0.22124492557510148,
"grad_norm": 4.207502841949463,
"learning_rate": 1.9119371282280197e-05,
"loss": 0.1996,
"step": 327
},
{
"epoch": 0.2219215155615697,
"grad_norm": 3.8576388359069824,
"learning_rate": 1.9109653447608607e-05,
"loss": 0.1522,
"step": 328
},
{
"epoch": 0.22259810554803788,
"grad_norm": 3.856382369995117,
"learning_rate": 1.909988478551096e-05,
"loss": 0.1642,
"step": 329
},
{
"epoch": 0.2232746955345061,
"grad_norm": 5.088944911956787,
"learning_rate": 1.909006535049163e-05,
"loss": 0.1496,
"step": 330
},
{
"epoch": 0.2239512855209743,
"grad_norm": 3.050905704498291,
"learning_rate": 1.908019519733827e-05,
"loss": 0.1263,
"step": 331
},
{
"epoch": 0.22462787550744248,
"grad_norm": 2.3252899646759033,
"learning_rate": 1.907027438112153e-05,
"loss": 0.1323,
"step": 332
},
{
"epoch": 0.2253044654939107,
"grad_norm": 3.3663437366485596,
"learning_rate": 1.9060302957194732e-05,
"loss": 0.1085,
"step": 333
},
{
"epoch": 0.2259810554803789,
"grad_norm": 6.574558258056641,
"learning_rate": 1.9050280981193555e-05,
"loss": 0.2637,
"step": 334
},
{
"epoch": 0.22665764546684708,
"grad_norm": 4.396321773529053,
"learning_rate": 1.9040208509035745e-05,
"loss": 0.1464,
"step": 335
},
{
"epoch": 0.2273342354533153,
"grad_norm": 6.94221305847168,
"learning_rate": 1.9030085596920786e-05,
"loss": 0.1608,
"step": 336
},
{
"epoch": 0.2280108254397835,
"grad_norm": 3.8373563289642334,
"learning_rate": 1.9019912301329593e-05,
"loss": 0.2373,
"step": 337
},
{
"epoch": 0.22868741542625168,
"grad_norm": 2.109513998031616,
"learning_rate": 1.900968867902419e-05,
"loss": 0.1219,
"step": 338
},
{
"epoch": 0.2293640054127199,
"grad_norm": 2.0259997844696045,
"learning_rate": 1.899941478704742e-05,
"loss": 0.1093,
"step": 339
},
{
"epoch": 0.23004059539918809,
"grad_norm": 2.7739081382751465,
"learning_rate": 1.8989090682722583e-05,
"loss": 0.1666,
"step": 340
},
{
"epoch": 0.23004059539918809,
"eval_accuracy": 0.7779816513761468,
"eval_f1": 0.36649214659685864,
"eval_loss": 0.48961758613586426,
"eval_precision": 0.8333333333333334,
"eval_recall": 0.2348993288590604,
"eval_runtime": 51.6267,
"eval_samples_per_second": 5.772,
"eval_steps_per_second": 0.194,
"step": 340
},
{
"epoch": 0.2307171853856563,
"grad_norm": 2.2220234870910645,
"learning_rate": 1.8978716423653153e-05,
"loss": 0.1309,
"step": 341
},
{
"epoch": 0.2313937753721245,
"grad_norm": 2.679076910018921,
"learning_rate": 1.8968292067722433e-05,
"loss": 0.1182,
"step": 342
},
{
"epoch": 0.23207036535859268,
"grad_norm": 2.6752357482910156,
"learning_rate": 1.8957817673093258e-05,
"loss": 0.0976,
"step": 343
},
{
"epoch": 0.2327469553450609,
"grad_norm": 3.9076900482177734,
"learning_rate": 1.8947293298207637e-05,
"loss": 0.2482,
"step": 344
},
{
"epoch": 0.2334235453315291,
"grad_norm": 2.4209744930267334,
"learning_rate": 1.8936719001786453e-05,
"loss": 0.1294,
"step": 345
},
{
"epoch": 0.23410013531799728,
"grad_norm": 2.483504056930542,
"learning_rate": 1.8926094842829128e-05,
"loss": 0.1349,
"step": 346
},
{
"epoch": 0.2347767253044655,
"grad_norm": 5.663891315460205,
"learning_rate": 1.891542088061329e-05,
"loss": 0.247,
"step": 347
},
{
"epoch": 0.2354533152909337,
"grad_norm": 3.8202829360961914,
"learning_rate": 1.8904697174694447e-05,
"loss": 0.1991,
"step": 348
},
{
"epoch": 0.2361299052774019,
"grad_norm": 4.326652526855469,
"learning_rate": 1.8893923784905647e-05,
"loss": 0.2144,
"step": 349
},
{
"epoch": 0.2368064952638701,
"grad_norm": 4.610856056213379,
"learning_rate": 1.888310077135716e-05,
"loss": 0.2581,
"step": 350
},
{
"epoch": 0.2374830852503383,
"grad_norm": 1.77244234085083,
"learning_rate": 1.887222819443612e-05,
"loss": 0.1308,
"step": 351
},
{
"epoch": 0.2381596752368065,
"grad_norm": 3.1713812351226807,
"learning_rate": 1.886130611480621e-05,
"loss": 0.1067,
"step": 352
},
{
"epoch": 0.2388362652232747,
"grad_norm": 3.379279136657715,
"learning_rate": 1.885033459340731e-05,
"loss": 0.1363,
"step": 353
},
{
"epoch": 0.2395128552097429,
"grad_norm": 3.649993896484375,
"learning_rate": 1.8839313691455163e-05,
"loss": 0.1521,
"step": 354
},
{
"epoch": 0.2401894451962111,
"grad_norm": 3.849156379699707,
"learning_rate": 1.8828243470441026e-05,
"loss": 0.2396,
"step": 355
},
{
"epoch": 0.2408660351826793,
"grad_norm": 2.089524745941162,
"learning_rate": 1.8817123992131344e-05,
"loss": 0.1016,
"step": 356
},
{
"epoch": 0.24154262516914748,
"grad_norm": 5.252878665924072,
"learning_rate": 1.880595531856738e-05,
"loss": 0.1606,
"step": 357
},
{
"epoch": 0.2422192151556157,
"grad_norm": 2.9322009086608887,
"learning_rate": 1.879473751206489e-05,
"loss": 0.1346,
"step": 358
},
{
"epoch": 0.2428958051420839,
"grad_norm": 3.819847822189331,
"learning_rate": 1.878347063521377e-05,
"loss": 0.2052,
"step": 359
},
{
"epoch": 0.2435723951285521,
"grad_norm": 3.4725139141082764,
"learning_rate": 1.8772154750877696e-05,
"loss": 0.1843,
"step": 360
},
{
"epoch": 0.2435723951285521,
"eval_accuracy": 0.8009174311926606,
"eval_f1": 0.49417249417249415,
"eval_loss": 0.4227532148361206,
"eval_precision": 0.8091603053435115,
"eval_recall": 0.35570469798657717,
"eval_runtime": 51.8917,
"eval_samples_per_second": 5.743,
"eval_steps_per_second": 0.193,
"step": 360
},
{
"epoch": 0.2442489851150203,
"grad_norm": 5.326034069061279,
"learning_rate": 1.876078992219379e-05,
"loss": 0.1945,
"step": 361
},
{
"epoch": 0.2449255751014885,
"grad_norm": 7.080038070678711,
"learning_rate": 1.8749376212572254e-05,
"loss": 0.2108,
"step": 362
},
{
"epoch": 0.2456021650879567,
"grad_norm": 4.56862211227417,
"learning_rate": 1.873791368569603e-05,
"loss": 0.1117,
"step": 363
},
{
"epoch": 0.2462787550744249,
"grad_norm": 5.154612064361572,
"learning_rate": 1.8726402405520425e-05,
"loss": 0.1771,
"step": 364
},
{
"epoch": 0.2469553450608931,
"grad_norm": 4.204885005950928,
"learning_rate": 1.8714842436272774e-05,
"loss": 0.1878,
"step": 365
},
{
"epoch": 0.2476319350473613,
"grad_norm": 3.4089162349700928,
"learning_rate": 1.8703233842452072e-05,
"loss": 0.1567,
"step": 366
},
{
"epoch": 0.2483085250338295,
"grad_norm": 3.831226348876953,
"learning_rate": 1.8691576688828613e-05,
"loss": 0.1782,
"step": 367
},
{
"epoch": 0.2489851150202977,
"grad_norm": 7.267455577850342,
"learning_rate": 1.8679871040443632e-05,
"loss": 0.2087,
"step": 368
},
{
"epoch": 0.2496617050067659,
"grad_norm": 4.274538993835449,
"learning_rate": 1.866811696260894e-05,
"loss": 0.1906,
"step": 369
},
{
"epoch": 0.2503382949932341,
"grad_norm": 5.741366863250732,
"learning_rate": 1.865631452090657e-05,
"loss": 0.2119,
"step": 370
},
{
"epoch": 0.2510148849797023,
"grad_norm": 3.3094899654388428,
"learning_rate": 1.8644463781188387e-05,
"loss": 0.1853,
"step": 371
},
{
"epoch": 0.2516914749661705,
"grad_norm": 3.474271774291992,
"learning_rate": 1.863256480957574e-05,
"loss": 0.1849,
"step": 372
},
{
"epoch": 0.2523680649526387,
"grad_norm": 2.9809741973876953,
"learning_rate": 1.8620617672459097e-05,
"loss": 0.1298,
"step": 373
},
{
"epoch": 0.2530446549391069,
"grad_norm": 6.16650915145874,
"learning_rate": 1.8608622436497657e-05,
"loss": 0.2405,
"step": 374
},
{
"epoch": 0.25372124492557513,
"grad_norm": 2.5345046520233154,
"learning_rate": 1.859657916861899e-05,
"loss": 0.1597,
"step": 375
},
{
"epoch": 0.2543978349120433,
"grad_norm": 4.540293216705322,
"learning_rate": 1.8584487936018663e-05,
"loss": 0.2112,
"step": 376
},
{
"epoch": 0.2550744248985115,
"grad_norm": 1.9297409057617188,
"learning_rate": 1.8572348806159857e-05,
"loss": 0.181,
"step": 377
},
{
"epoch": 0.2557510148849797,
"grad_norm": 2.586928606033325,
"learning_rate": 1.8560161846773002e-05,
"loss": 0.1348,
"step": 378
},
{
"epoch": 0.2564276048714479,
"grad_norm": 2.4230494499206543,
"learning_rate": 1.854792712585539e-05,
"loss": 0.1307,
"step": 379
},
{
"epoch": 0.2571041948579161,
"grad_norm": 3.1270833015441895,
"learning_rate": 1.8535644711670804e-05,
"loss": 0.1995,
"step": 380
},
{
"epoch": 0.2571041948579161,
"eval_accuracy": 0.773394495412844,
"eval_f1": 0.35509138381201044,
"eval_loss": 0.45505937933921814,
"eval_precision": 0.8,
"eval_recall": 0.22818791946308725,
"eval_runtime": 51.9893,
"eval_samples_per_second": 5.732,
"eval_steps_per_second": 0.192,
"step": 380
},
{
"epoch": 0.2577807848443843,
"grad_norm": 3.090973138809204,
"learning_rate": 1.8523314672749123e-05,
"loss": 0.1002,
"step": 381
},
{
"epoch": 0.2584573748308525,
"grad_norm": 2.197547197341919,
"learning_rate": 1.851093707788596e-05,
"loss": 0.1507,
"step": 382
},
{
"epoch": 0.2591339648173207,
"grad_norm": 3.1576650142669678,
"learning_rate": 1.8498511996142255e-05,
"loss": 0.1499,
"step": 383
},
{
"epoch": 0.2598105548037889,
"grad_norm": 4.379716396331787,
"learning_rate": 1.848603949684391e-05,
"loss": 0.1485,
"step": 384
},
{
"epoch": 0.2604871447902571,
"grad_norm": 3.0266523361206055,
"learning_rate": 1.8473519649581396e-05,
"loss": 0.1738,
"step": 385
},
{
"epoch": 0.26116373477672533,
"grad_norm": 4.019914627075195,
"learning_rate": 1.8460952524209355e-05,
"loss": 0.2077,
"step": 386
},
{
"epoch": 0.2618403247631935,
"grad_norm": 4.621005535125732,
"learning_rate": 1.844833819084622e-05,
"loss": 0.1384,
"step": 387
},
{
"epoch": 0.2625169147496617,
"grad_norm": 3.632671594619751,
"learning_rate": 1.8435676719873828e-05,
"loss": 0.1214,
"step": 388
},
{
"epoch": 0.2631935047361299,
"grad_norm": 6.161576271057129,
"learning_rate": 1.842296818193701e-05,
"loss": 0.2652,
"step": 389
},
{
"epoch": 0.2638700947225981,
"grad_norm": 2.499661922454834,
"learning_rate": 1.8410212647943215e-05,
"loss": 0.1611,
"step": 390
},
{
"epoch": 0.2645466847090663,
"grad_norm": 2.6793367862701416,
"learning_rate": 1.8397410189062106e-05,
"loss": 0.1301,
"step": 391
},
{
"epoch": 0.2652232746955345,
"grad_norm": 3.2850935459136963,
"learning_rate": 1.8384560876725163e-05,
"loss": 0.1692,
"step": 392
},
{
"epoch": 0.2658998646820027,
"grad_norm": 4.6707682609558105,
"learning_rate": 1.8371664782625287e-05,
"loss": 0.1819,
"step": 393
},
{
"epoch": 0.2665764546684709,
"grad_norm": 2.8113269805908203,
"learning_rate": 1.8358721978716398e-05,
"loss": 0.1587,
"step": 394
},
{
"epoch": 0.2672530446549391,
"grad_norm": 2.5062456130981445,
"learning_rate": 1.834573253721303e-05,
"loss": 0.1458,
"step": 395
},
{
"epoch": 0.2679296346414073,
"grad_norm": 4.038453102111816,
"learning_rate": 1.8332696530589936e-05,
"loss": 0.1954,
"step": 396
},
{
"epoch": 0.26860622462787553,
"grad_norm": 4.07120418548584,
"learning_rate": 1.831961403158168e-05,
"loss": 0.1479,
"step": 397
},
{
"epoch": 0.2692828146143437,
"grad_norm": 3.7134037017822266,
"learning_rate": 1.830648511318223e-05,
"loss": 0.1507,
"step": 398
},
{
"epoch": 0.2699594046008119,
"grad_norm": 4.160938739776611,
"learning_rate": 1.8293309848644554e-05,
"loss": 0.1709,
"step": 399
},
{
"epoch": 0.2706359945872801,
"grad_norm": 3.3158607482910156,
"learning_rate": 1.8280088311480203e-05,
"loss": 0.0761,
"step": 400
},
{
"epoch": 0.2706359945872801,
"eval_accuracy": 0.8073394495412844,
"eval_f1": 0.5161290322580645,
"eval_loss": 0.42147114872932434,
"eval_precision": 0.8235294117647058,
"eval_recall": 0.37583892617449666,
"eval_runtime": 51.945,
"eval_samples_per_second": 5.737,
"eval_steps_per_second": 0.193,
"step": 400
},
{
"epoch": 0.2713125845737483,
"grad_norm": 3.803469181060791,
"learning_rate": 1.8266820575458908e-05,
"loss": 0.1116,
"step": 401
},
{
"epoch": 0.2719891745602165,
"grad_norm": 4.727139949798584,
"learning_rate": 1.8253506714608176e-05,
"loss": 0.1973,
"step": 402
},
{
"epoch": 0.27266576454668473,
"grad_norm": 4.788311004638672,
"learning_rate": 1.8240146803212854e-05,
"loss": 0.2294,
"step": 403
},
{
"epoch": 0.2733423545331529,
"grad_norm": 9.147326469421387,
"learning_rate": 1.822674091581474e-05,
"loss": 0.1194,
"step": 404
},
{
"epoch": 0.2740189445196211,
"grad_norm": 3.2791059017181396,
"learning_rate": 1.8213289127212152e-05,
"loss": 0.0931,
"step": 405
},
{
"epoch": 0.2746955345060893,
"grad_norm": 4.282406330108643,
"learning_rate": 1.8199791512459507e-05,
"loss": 0.1893,
"step": 406
},
{
"epoch": 0.2753721244925575,
"grad_norm": 5.307563781738281,
"learning_rate": 1.8186248146866928e-05,
"loss": 0.1627,
"step": 407
},
{
"epoch": 0.27604871447902574,
"grad_norm": 3.126235246658325,
"learning_rate": 1.817265910599978e-05,
"loss": 0.1254,
"step": 408
},
{
"epoch": 0.2767253044654939,
"grad_norm": 3.751150369644165,
"learning_rate": 1.81590244656783e-05,
"loss": 0.1884,
"step": 409
},
{
"epoch": 0.2774018944519621,
"grad_norm": 4.908536434173584,
"learning_rate": 1.8145344301977126e-05,
"loss": 0.1522,
"step": 410
},
{
"epoch": 0.2780784844384303,
"grad_norm": 3.765190601348877,
"learning_rate": 1.8131618691224916e-05,
"loss": 0.1509,
"step": 411
},
{
"epoch": 0.2787550744248985,
"grad_norm": 4.558242321014404,
"learning_rate": 1.811784771000387e-05,
"loss": 0.1151,
"step": 412
},
{
"epoch": 0.2794316644113667,
"grad_norm": 3.2288334369659424,
"learning_rate": 1.8104031435149366e-05,
"loss": 0.1238,
"step": 413
},
{
"epoch": 0.28010825439783493,
"grad_norm": 3.91261625289917,
"learning_rate": 1.8090169943749477e-05,
"loss": 0.1979,
"step": 414
},
{
"epoch": 0.2807848443843031,
"grad_norm": 5.789203643798828,
"learning_rate": 1.8076263313144568e-05,
"loss": 0.1015,
"step": 415
},
{
"epoch": 0.2814614343707713,
"grad_norm": 3.4970619678497314,
"learning_rate": 1.806231162092686e-05,
"loss": 0.1626,
"step": 416
},
{
"epoch": 0.2821380243572395,
"grad_norm": 2.941303253173828,
"learning_rate": 1.804831494494e-05,
"loss": 0.1262,
"step": 417
},
{
"epoch": 0.2828146143437077,
"grad_norm": 3.1387312412261963,
"learning_rate": 1.8034273363278615e-05,
"loss": 0.112,
"step": 418
},
{
"epoch": 0.28349120433017594,
"grad_norm": 3.1637914180755615,
"learning_rate": 1.8020186954287883e-05,
"loss": 0.1387,
"step": 419
},
{
"epoch": 0.28416779431664413,
"grad_norm": 7.6281538009643555,
"learning_rate": 1.8006055796563103e-05,
"loss": 0.1498,
"step": 420
},
{
"epoch": 0.28416779431664413,
"eval_accuracy": 0.7880733944954128,
"eval_f1": 0.43795620437956206,
"eval_loss": 0.4631403982639313,
"eval_precision": 0.7964601769911505,
"eval_recall": 0.30201342281879195,
"eval_runtime": 53.6246,
"eval_samples_per_second": 5.557,
"eval_steps_per_second": 0.186,
"step": 420
},
{
"epoch": 0.2848443843031123,
"grad_norm": 7.874175071716309,
"learning_rate": 1.7991879968949248e-05,
"loss": 0.1542,
"step": 421
},
{
"epoch": 0.2855209742895805,
"grad_norm": 2.5916264057159424,
"learning_rate": 1.797765955054053e-05,
"loss": 0.1319,
"step": 422
},
{
"epoch": 0.2861975642760487,
"grad_norm": 9.027409553527832,
"learning_rate": 1.7963394620679945e-05,
"loss": 0.2224,
"step": 423
},
{
"epoch": 0.2868741542625169,
"grad_norm": 2.5223119258880615,
"learning_rate": 1.7949085258958853e-05,
"loss": 0.1183,
"step": 424
},
{
"epoch": 0.28755074424898514,
"grad_norm": 1.9095633029937744,
"learning_rate": 1.7934731545216515e-05,
"loss": 0.1178,
"step": 425
},
{
"epoch": 0.2882273342354533,
"grad_norm": 3.547039031982422,
"learning_rate": 1.792033355953966e-05,
"loss": 0.1246,
"step": 426
},
{
"epoch": 0.2889039242219215,
"grad_norm": 3.945955991744995,
"learning_rate": 1.790589138226203e-05,
"loss": 0.2155,
"step": 427
},
{
"epoch": 0.2895805142083897,
"grad_norm": 1.659956932067871,
"learning_rate": 1.789140509396394e-05,
"loss": 0.0721,
"step": 428
},
{
"epoch": 0.2902571041948579,
"grad_norm": 3.547576427459717,
"learning_rate": 1.7876874775471806e-05,
"loss": 0.1895,
"step": 429
},
{
"epoch": 0.29093369418132614,
"grad_norm": 5.191123008728027,
"learning_rate": 1.7862300507857733e-05,
"loss": 0.2105,
"step": 430
},
{
"epoch": 0.29161028416779433,
"grad_norm": 4.68615198135376,
"learning_rate": 1.7847682372439024e-05,
"loss": 0.2427,
"step": 431
},
{
"epoch": 0.2922868741542625,
"grad_norm": 7.467837333679199,
"learning_rate": 1.7833020450777756e-05,
"loss": 0.255,
"step": 432
},
{
"epoch": 0.2929634641407307,
"grad_norm": 4.769316673278809,
"learning_rate": 1.78183148246803e-05,
"loss": 0.2349,
"step": 433
},
{
"epoch": 0.2936400541271989,
"grad_norm": 2.3752694129943848,
"learning_rate": 1.7803565576196884e-05,
"loss": 0.1347,
"step": 434
},
{
"epoch": 0.2943166441136671,
"grad_norm": 2.9256367683410645,
"learning_rate": 1.7788772787621126e-05,
"loss": 0.19,
"step": 435
},
{
"epoch": 0.29499323410013534,
"grad_norm": 2.6127521991729736,
"learning_rate": 1.7773936541489577e-05,
"loss": 0.1579,
"step": 436
},
{
"epoch": 0.2956698240866035,
"grad_norm": 1.9983330965042114,
"learning_rate": 1.7759056920581256e-05,
"loss": 0.1109,
"step": 437
},
{
"epoch": 0.2963464140730717,
"grad_norm": 2.2543447017669678,
"learning_rate": 1.7744134007917195e-05,
"loss": 0.1244,
"step": 438
},
{
"epoch": 0.2970230040595399,
"grad_norm": 3.1791696548461914,
"learning_rate": 1.7729167886759974e-05,
"loss": 0.1867,
"step": 439
},
{
"epoch": 0.2976995940460081,
"grad_norm": 2.7958037853240967,
"learning_rate": 1.771415864061326e-05,
"loss": 0.1344,
"step": 440
},
{
"epoch": 0.2976995940460081,
"eval_accuracy": 0.7963302752293578,
"eval_f1": 0.47641509433962265,
"eval_loss": 0.42485642433166504,
"eval_precision": 0.8015873015873016,
"eval_recall": 0.3389261744966443,
"eval_runtime": 52.4984,
"eval_samples_per_second": 5.676,
"eval_steps_per_second": 0.19,
"step": 440
},
{
"epoch": 0.29837618403247634,
"grad_norm": 2.20705509185791,
"learning_rate": 1.7699106353221322e-05,
"loss": 0.1233,
"step": 441
},
{
"epoch": 0.29905277401894453,
"grad_norm": 2.328334331512451,
"learning_rate": 1.7684011108568593e-05,
"loss": 0.1142,
"step": 442
},
{
"epoch": 0.2997293640054127,
"grad_norm": 3.256822109222412,
"learning_rate": 1.7668872990879175e-05,
"loss": 0.1556,
"step": 443
},
{
"epoch": 0.3004059539918809,
"grad_norm": 2.4061648845672607,
"learning_rate": 1.765369208461639e-05,
"loss": 0.0828,
"step": 444
},
{
"epoch": 0.3010825439783491,
"grad_norm": 5.99202299118042,
"learning_rate": 1.7638468474482297e-05,
"loss": 0.157,
"step": 445
},
{
"epoch": 0.3017591339648173,
"grad_norm": 3.7360379695892334,
"learning_rate": 1.762320224541722e-05,
"loss": 0.1257,
"step": 446
},
{
"epoch": 0.30243572395128554,
"grad_norm": 3.2651238441467285,
"learning_rate": 1.760789348259927e-05,
"loss": 0.1732,
"step": 447
},
{
"epoch": 0.30311231393775373,
"grad_norm": 3.5508763790130615,
"learning_rate": 1.7592542271443888e-05,
"loss": 0.1644,
"step": 448
},
{
"epoch": 0.3037889039242219,
"grad_norm": 4.703643798828125,
"learning_rate": 1.757714869760335e-05,
"loss": 0.2103,
"step": 449
},
{
"epoch": 0.3044654939106901,
"grad_norm": 5.588313102722168,
"learning_rate": 1.756171284696629e-05,
"loss": 0.187,
"step": 450
},
{
"epoch": 0.3051420838971583,
"grad_norm": 1.9803051948547363,
"learning_rate": 1.7546234805657235e-05,
"loss": 0.0944,
"step": 451
},
{
"epoch": 0.30581867388362655,
"grad_norm": 3.1527740955352783,
"learning_rate": 1.7530714660036112e-05,
"loss": 0.1105,
"step": 452
},
{
"epoch": 0.30649526387009474,
"grad_norm": 4.078627109527588,
"learning_rate": 1.7515152496697765e-05,
"loss": 0.161,
"step": 453
},
{
"epoch": 0.3071718538565629,
"grad_norm": 4.430943489074707,
"learning_rate": 1.749954840247148e-05,
"loss": 0.1883,
"step": 454
},
{
"epoch": 0.3078484438430311,
"grad_norm": 3.115837335586548,
"learning_rate": 1.7483902464420507e-05,
"loss": 0.1122,
"step": 455
},
{
"epoch": 0.3085250338294993,
"grad_norm": 3.008695602416992,
"learning_rate": 1.7468214769841542e-05,
"loss": 0.1034,
"step": 456
},
{
"epoch": 0.3092016238159675,
"grad_norm": 6.273781776428223,
"learning_rate": 1.7452485406264278e-05,
"loss": 0.1709,
"step": 457
},
{
"epoch": 0.30987821380243574,
"grad_norm": 4.796054363250732,
"learning_rate": 1.74367144614509e-05,
"loss": 0.1932,
"step": 458
},
{
"epoch": 0.31055480378890393,
"grad_norm": 10.217569351196289,
"learning_rate": 1.742090202339559e-05,
"loss": 0.19,
"step": 459
},
{
"epoch": 0.3112313937753721,
"grad_norm": 3.481541395187378,
"learning_rate": 1.7405048180324046e-05,
"loss": 0.161,
"step": 460
},
{
"epoch": 0.3112313937753721,
"eval_accuracy": 0.8091743119266055,
"eval_f1": 0.5336322869955157,
"eval_loss": 0.42575448751449585,
"eval_precision": 0.8040540540540541,
"eval_recall": 0.39932885906040266,
"eval_runtime": 53.3596,
"eval_samples_per_second": 5.585,
"eval_steps_per_second": 0.187,
"step": 460
},
{
"epoch": 0.3119079837618403,
"grad_norm": 3.4007368087768555,
"learning_rate": 1.7389153020692985e-05,
"loss": 0.1502,
"step": 461
},
{
"epoch": 0.3125845737483085,
"grad_norm": 3.0644993782043457,
"learning_rate": 1.7373216633189653e-05,
"loss": 0.1749,
"step": 462
},
{
"epoch": 0.31326116373477675,
"grad_norm": 4.407646179199219,
"learning_rate": 1.735723910673132e-05,
"loss": 0.1703,
"step": 463
},
{
"epoch": 0.31393775372124494,
"grad_norm": 3.6031856536865234,
"learning_rate": 1.7341220530464796e-05,
"loss": 0.1745,
"step": 464
},
{
"epoch": 0.31461434370771313,
"grad_norm": 5.204887390136719,
"learning_rate": 1.7325160993765934e-05,
"loss": 0.0987,
"step": 465
},
{
"epoch": 0.3152909336941813,
"grad_norm": 2.864173173904419,
"learning_rate": 1.7309060586239117e-05,
"loss": 0.1985,
"step": 466
},
{
"epoch": 0.3159675236806495,
"grad_norm": 3.125213861465454,
"learning_rate": 1.7292919397716772e-05,
"loss": 0.1482,
"step": 467
},
{
"epoch": 0.3166441136671177,
"grad_norm": 5.636457920074463,
"learning_rate": 1.7276737518258865e-05,
"loss": 0.1882,
"step": 468
},
{
"epoch": 0.31732070365358594,
"grad_norm": 4.034516334533691,
"learning_rate": 1.7260515038152393e-05,
"loss": 0.2319,
"step": 469
},
{
"epoch": 0.31799729364005414,
"grad_norm": 2.29288911819458,
"learning_rate": 1.7244252047910893e-05,
"loss": 0.0806,
"step": 470
},
{
"epoch": 0.3186738836265223,
"grad_norm": 4.686462879180908,
"learning_rate": 1.7227948638273918e-05,
"loss": 0.192,
"step": 471
},
{
"epoch": 0.3193504736129905,
"grad_norm": 3.879487991333008,
"learning_rate": 1.7211604900206552e-05,
"loss": 0.1904,
"step": 472
},
{
"epoch": 0.3200270635994587,
"grad_norm": 4.023051738739014,
"learning_rate": 1.7195220924898883e-05,
"loss": 0.2034,
"step": 473
},
{
"epoch": 0.32070365358592695,
"grad_norm": 4.105659008026123,
"learning_rate": 1.717879680376551e-05,
"loss": 0.1803,
"step": 474
},
{
"epoch": 0.32138024357239514,
"grad_norm": 5.522044658660889,
"learning_rate": 1.7162332628445024e-05,
"loss": 0.2052,
"step": 475
},
{
"epoch": 0.32205683355886333,
"grad_norm": 4.441620349884033,
"learning_rate": 1.7145828490799497e-05,
"loss": 0.1982,
"step": 476
},
{
"epoch": 0.3227334235453315,
"grad_norm": 2.258070707321167,
"learning_rate": 1.7129284482913973e-05,
"loss": 0.1493,
"step": 477
},
{
"epoch": 0.3234100135317997,
"grad_norm": 4.115694522857666,
"learning_rate": 1.7112700697095955e-05,
"loss": 0.1957,
"step": 478
},
{
"epoch": 0.32408660351826796,
"grad_norm": 4.366945743560791,
"learning_rate": 1.709607722587488e-05,
"loss": 0.2066,
"step": 479
},
{
"epoch": 0.32476319350473615,
"grad_norm": 3.625458240509033,
"learning_rate": 1.7079414162001617e-05,
"loss": 0.2087,
"step": 480
},
{
"epoch": 0.32476319350473615,
"eval_accuracy": 0.810091743119266,
"eval_f1": 0.5152224824355972,
"eval_loss": 0.4003700017929077,
"eval_precision": 0.8527131782945736,
"eval_recall": 0.3691275167785235,
"eval_runtime": 52.4305,
"eval_samples_per_second": 5.684,
"eval_steps_per_second": 0.191,
"step": 480
},
{
"epoch": 0.32543978349120434,
"grad_norm": 4.504916667938232,
"learning_rate": 1.7062711598447936e-05,
"loss": 0.168,
"step": 481
},
{
"epoch": 0.3261163734776725,
"grad_norm": 3.6613380908966064,
"learning_rate": 1.7045969628406013e-05,
"loss": 0.1761,
"step": 482
},
{
"epoch": 0.3267929634641407,
"grad_norm": 5.603222846984863,
"learning_rate": 1.7029188345287868e-05,
"loss": 0.1618,
"step": 483
},
{
"epoch": 0.3274695534506089,
"grad_norm": 5.035453796386719,
"learning_rate": 1.7012367842724887e-05,
"loss": 0.1485,
"step": 484
},
{
"epoch": 0.32814614343707715,
"grad_norm": 3.6518185138702393,
"learning_rate": 1.6995508214567275e-05,
"loss": 0.1497,
"step": 485
},
{
"epoch": 0.32882273342354534,
"grad_norm": 3.421865463256836,
"learning_rate": 1.6978609554883544e-05,
"loss": 0.1269,
"step": 486
},
{
"epoch": 0.32949932341001353,
"grad_norm": 1.6450062990188599,
"learning_rate": 1.6961671957959967e-05,
"loss": 0.0814,
"step": 487
},
{
"epoch": 0.3301759133964817,
"grad_norm": 3.649200201034546,
"learning_rate": 1.6944695518300087e-05,
"loss": 0.0999,
"step": 488
},
{
"epoch": 0.3308525033829499,
"grad_norm": 5.043969631195068,
"learning_rate": 1.6927680330624165e-05,
"loss": 0.2421,
"step": 489
},
{
"epoch": 0.33152909336941816,
"grad_norm": 2.5395421981811523,
"learning_rate": 1.691062648986865e-05,
"loss": 0.1194,
"step": 490
},
{
"epoch": 0.33220568335588635,
"grad_norm": 3.3575992584228516,
"learning_rate": 1.6893534091185658e-05,
"loss": 0.1217,
"step": 491
},
{
"epoch": 0.33288227334235454,
"grad_norm": 3.968233823776245,
"learning_rate": 1.6876403229942453e-05,
"loss": 0.1636,
"step": 492
},
{
"epoch": 0.33355886332882273,
"grad_norm": 4.133052825927734,
"learning_rate": 1.6859234001720882e-05,
"loss": 0.0986,
"step": 493
},
{
"epoch": 0.3342354533152909,
"grad_norm": 6.324604034423828,
"learning_rate": 1.6842026502316874e-05,
"loss": 0.1145,
"step": 494
},
{
"epoch": 0.3349120433017591,
"grad_norm": 4.619051933288574,
"learning_rate": 1.682478082773989e-05,
"loss": 0.2143,
"step": 495
},
{
"epoch": 0.33558863328822736,
"grad_norm": 3.0241997241973877,
"learning_rate": 1.680749707421238e-05,
"loss": 0.1057,
"step": 496
},
{
"epoch": 0.33626522327469555,
"grad_norm": 1.7503517866134644,
"learning_rate": 1.6790175338169277e-05,
"loss": 0.0634,
"step": 497
},
{
"epoch": 0.33694181326116374,
"grad_norm": 4.2864990234375,
"learning_rate": 1.6772815716257414e-05,
"loss": 0.1524,
"step": 498
},
{
"epoch": 0.3376184032476319,
"grad_norm": 4.454866886138916,
"learning_rate": 1.6755418305335026e-05,
"loss": 0.1908,
"step": 499
},
{
"epoch": 0.3382949932341001,
"grad_norm": 5.34849739074707,
"learning_rate": 1.673798320247118e-05,
"loss": 0.1857,
"step": 500
},
{
"epoch": 0.3382949932341001,
"eval_accuracy": 0.7862385321100918,
"eval_f1": 0.391644908616188,
"eval_loss": 0.5401991605758667,
"eval_precision": 0.8823529411764706,
"eval_recall": 0.2516778523489933,
"eval_runtime": 52.9654,
"eval_samples_per_second": 5.626,
"eval_steps_per_second": 0.189,
"step": 500
},
{
"epoch": 0.33897158322056836,
"grad_norm": 4.322254180908203,
"learning_rate": 1.672051050494526e-05,
"loss": 0.2227,
"step": 501
},
{
"epoch": 0.33964817320703655,
"grad_norm": 3.6232991218566895,
"learning_rate": 1.67030003102464e-05,
"loss": 0.1609,
"step": 502
},
{
"epoch": 0.34032476319350474,
"grad_norm": 6.037874221801758,
"learning_rate": 1.6685452716072946e-05,
"loss": 0.144,
"step": 503
},
{
"epoch": 0.34100135317997293,
"grad_norm": 3.3179101943969727,
"learning_rate": 1.6667867820331927e-05,
"loss": 0.1325,
"step": 504
},
{
"epoch": 0.3416779431664411,
"grad_norm": 3.1885428428649902,
"learning_rate": 1.6650245721138483e-05,
"loss": 0.1493,
"step": 505
},
{
"epoch": 0.3423545331529093,
"grad_norm": 3.5949137210845947,
"learning_rate": 1.6632586516815346e-05,
"loss": 0.1273,
"step": 506
},
{
"epoch": 0.34303112313937756,
"grad_norm": 2.8679418563842773,
"learning_rate": 1.6614890305892266e-05,
"loss": 0.0887,
"step": 507
},
{
"epoch": 0.34370771312584575,
"grad_norm": 2.384528160095215,
"learning_rate": 1.6597157187105475e-05,
"loss": 0.0974,
"step": 508
},
{
"epoch": 0.34438430311231394,
"grad_norm": 3.2372498512268066,
"learning_rate": 1.657938725939713e-05,
"loss": 0.1175,
"step": 509
},
{
"epoch": 0.34506089309878213,
"grad_norm": 2.4635872840881348,
"learning_rate": 1.6561580621914764e-05,
"loss": 0.0602,
"step": 510
},
{
"epoch": 0.3457374830852503,
"grad_norm": 4.7463531494140625,
"learning_rate": 1.6543737374010742e-05,
"loss": 0.1404,
"step": 511
},
{
"epoch": 0.34641407307171856,
"grad_norm": 3.910125255584717,
"learning_rate": 1.6525857615241686e-05,
"loss": 0.1732,
"step": 512
},
{
"epoch": 0.34709066305818675,
"grad_norm": 3.2249362468719482,
"learning_rate": 1.6507941445367935e-05,
"loss": 0.1706,
"step": 513
},
{
"epoch": 0.34776725304465494,
"grad_norm": 3.5670406818389893,
"learning_rate": 1.648998896435299e-05,
"loss": 0.1288,
"step": 514
},
{
"epoch": 0.34844384303112313,
"grad_norm": 2.954425096511841,
"learning_rate": 1.6472000272362937e-05,
"loss": 0.1691,
"step": 515
},
{
"epoch": 0.3491204330175913,
"grad_norm": 3.0924575328826904,
"learning_rate": 1.6453975469765913e-05,
"loss": 0.1445,
"step": 516
},
{
"epoch": 0.3497970230040595,
"grad_norm": 3.242204427719116,
"learning_rate": 1.643591465713153e-05,
"loss": 0.113,
"step": 517
},
{
"epoch": 0.35047361299052776,
"grad_norm": 3.513796806335449,
"learning_rate": 1.6417817935230318e-05,
"loss": 0.1342,
"step": 518
},
{
"epoch": 0.35115020297699595,
"grad_norm": 3.459606409072876,
"learning_rate": 1.6399685405033168e-05,
"loss": 0.167,
"step": 519
},
{
"epoch": 0.35182679296346414,
"grad_norm": 3.4279625415802,
"learning_rate": 1.6381517167710757e-05,
"loss": 0.1466,
"step": 520
},
{
"epoch": 0.35182679296346414,
"eval_accuracy": 0.8027522935779816,
"eval_f1": 0.48687350835322196,
"eval_loss": 0.4396270513534546,
"eval_precision": 0.8429752066115702,
"eval_recall": 0.3422818791946309,
"eval_runtime": 53.1809,
"eval_samples_per_second": 5.604,
"eval_steps_per_second": 0.188,
"step": 520
},
{
"epoch": 0.35250338294993233,
"grad_norm": 3.181802988052368,
"learning_rate": 1.6363313324632995e-05,
"loss": 0.1381,
"step": 521
},
{
"epoch": 0.3531799729364005,
"grad_norm": 2.620626449584961,
"learning_rate": 1.6345073977368455e-05,
"loss": 0.1523,
"step": 522
},
{
"epoch": 0.35385656292286877,
"grad_norm": 4.116923809051514,
"learning_rate": 1.6326799227683806e-05,
"loss": 0.0602,
"step": 523
},
{
"epoch": 0.35453315290933696,
"grad_norm": 3.4836175441741943,
"learning_rate": 1.630848917754324e-05,
"loss": 0.0969,
"step": 524
},
{
"epoch": 0.35520974289580515,
"grad_norm": 3.9089815616607666,
"learning_rate": 1.629014392910791e-05,
"loss": 0.1509,
"step": 525
},
{
"epoch": 0.35588633288227334,
"grad_norm": 3.1335699558258057,
"learning_rate": 1.6271763584735373e-05,
"loss": 0.1366,
"step": 526
},
{
"epoch": 0.3565629228687415,
"grad_norm": 3.3636960983276367,
"learning_rate": 1.625334824697898e-05,
"loss": 0.1233,
"step": 527
},
{
"epoch": 0.3572395128552097,
"grad_norm": 3.7551486492156982,
"learning_rate": 1.6234898018587336e-05,
"loss": 0.1541,
"step": 528
},
{
"epoch": 0.35791610284167796,
"grad_norm": 6.929388046264648,
"learning_rate": 1.6216413002503736e-05,
"loss": 0.2057,
"step": 529
},
{
"epoch": 0.35859269282814615,
"grad_norm": 4.0752763748168945,
"learning_rate": 1.619789330186555e-05,
"loss": 0.1008,
"step": 530
},
{
"epoch": 0.35926928281461434,
"grad_norm": 3.1588234901428223,
"learning_rate": 1.6179339020003685e-05,
"loss": 0.1454,
"step": 531
},
{
"epoch": 0.35994587280108253,
"grad_norm": 6.536987781524658,
"learning_rate": 1.616075026044199e-05,
"loss": 0.1461,
"step": 532
},
{
"epoch": 0.3606224627875507,
"grad_norm": 3.1867458820343018,
"learning_rate": 1.6142127126896682e-05,
"loss": 0.182,
"step": 533
},
{
"epoch": 0.36129905277401897,
"grad_norm": 3.9853105545043945,
"learning_rate": 1.6123469723275766e-05,
"loss": 0.1525,
"step": 534
},
{
"epoch": 0.36197564276048716,
"grad_norm": 2.4770116806030273,
"learning_rate": 1.6104778153678467e-05,
"loss": 0.1789,
"step": 535
},
{
"epoch": 0.36265223274695535,
"grad_norm": 4.895524024963379,
"learning_rate": 1.6086052522394625e-05,
"loss": 0.1909,
"step": 536
},
{
"epoch": 0.36332882273342354,
"grad_norm": 7.819604873657227,
"learning_rate": 1.6067292933904144e-05,
"loss": 0.1793,
"step": 537
},
{
"epoch": 0.36400541271989173,
"grad_norm": 5.251774311065674,
"learning_rate": 1.6048499492876378e-05,
"loss": 0.1829,
"step": 538
},
{
"epoch": 0.3646820027063599,
"grad_norm": 5.605532646179199,
"learning_rate": 1.602967230416957e-05,
"loss": 0.273,
"step": 539
},
{
"epoch": 0.36535859269282817,
"grad_norm": 3.302903175354004,
"learning_rate": 1.6010811472830253e-05,
"loss": 0.1608,
"step": 540
},
{
"epoch": 0.36535859269282817,
"eval_accuracy": 0.8165137614678899,
"eval_f1": 0.5412844036697247,
"eval_loss": 0.4029388427734375,
"eval_precision": 0.855072463768116,
"eval_recall": 0.3959731543624161,
"eval_runtime": 54.3008,
"eval_samples_per_second": 5.488,
"eval_steps_per_second": 0.184,
"step": 540
},
{
"epoch": 0.36603518267929636,
"grad_norm": 3.057288408279419,
"learning_rate": 1.5991917104092677e-05,
"loss": 0.1671,
"step": 541
},
{
"epoch": 0.36671177266576455,
"grad_norm": 4.837218761444092,
"learning_rate": 1.5972989303378207e-05,
"loss": 0.1425,
"step": 542
},
{
"epoch": 0.36738836265223274,
"grad_norm": 2.922201633453369,
"learning_rate": 1.595402817629475e-05,
"loss": 0.2097,
"step": 543
},
{
"epoch": 0.3680649526387009,
"grad_norm": 8.20699691772461,
"learning_rate": 1.593503382863615e-05,
"loss": 0.1657,
"step": 544
},
{
"epoch": 0.36874154262516917,
"grad_norm": 3.043370246887207,
"learning_rate": 1.591600636638161e-05,
"loss": 0.1568,
"step": 545
},
{
"epoch": 0.36941813261163736,
"grad_norm": 6.523357391357422,
"learning_rate": 1.589694589569509e-05,
"loss": 0.1299,
"step": 546
},
{
"epoch": 0.37009472259810555,
"grad_norm": 3.4266302585601807,
"learning_rate": 1.5877852522924733e-05,
"loss": 0.1608,
"step": 547
},
{
"epoch": 0.37077131258457374,
"grad_norm": 4.111809253692627,
"learning_rate": 1.5858726354602248e-05,
"loss": 0.1975,
"step": 548
},
{
"epoch": 0.37144790257104193,
"grad_norm": 3.3651816844940186,
"learning_rate": 1.5839567497442338e-05,
"loss": 0.171,
"step": 549
},
{
"epoch": 0.3721244925575101,
"grad_norm": 3.0030105113983154,
"learning_rate": 1.5820376058342077e-05,
"loss": 0.1365,
"step": 550
},
{
"epoch": 0.37280108254397837,
"grad_norm": 3.5873923301696777,
"learning_rate": 1.5801152144380353e-05,
"loss": 0.1745,
"step": 551
},
{
"epoch": 0.37347767253044656,
"grad_norm": 3.0994861125946045,
"learning_rate": 1.578189586281723e-05,
"loss": 0.1407,
"step": 552
},
{
"epoch": 0.37415426251691475,
"grad_norm": 2.788184642791748,
"learning_rate": 1.5762607321093368e-05,
"loss": 0.134,
"step": 553
},
{
"epoch": 0.37483085250338294,
"grad_norm": 2.5775389671325684,
"learning_rate": 1.5743286626829437e-05,
"loss": 0.1075,
"step": 554
},
{
"epoch": 0.37550744248985113,
"grad_norm": 3.5334537029266357,
"learning_rate": 1.5723933887825492e-05,
"loss": 0.1165,
"step": 555
},
{
"epoch": 0.3761840324763194,
"grad_norm": 3.544222116470337,
"learning_rate": 1.5704549212060383e-05,
"loss": 0.1739,
"step": 556
},
{
"epoch": 0.37686062246278756,
"grad_norm": 3.89497709274292,
"learning_rate": 1.568513270769115e-05,
"loss": 0.1549,
"step": 557
},
{
"epoch": 0.37753721244925575,
"grad_norm": 2.996244192123413,
"learning_rate": 1.5665684483052425e-05,
"loss": 0.1742,
"step": 558
},
{
"epoch": 0.37821380243572394,
"grad_norm": 4.149686336517334,
"learning_rate": 1.564620464665582e-05,
"loss": 0.1655,
"step": 559
},
{
"epoch": 0.37889039242219213,
"grad_norm": 3.9218225479125977,
"learning_rate": 1.5626693307189334e-05,
"loss": 0.1472,
"step": 560
},
{
"epoch": 0.37889039242219213,
"eval_accuracy": 0.8064220183486238,
"eval_f1": 0.5104408352668214,
"eval_loss": 0.44546324014663696,
"eval_precision": 0.8270676691729323,
"eval_recall": 0.3691275167785235,
"eval_runtime": 52.4956,
"eval_samples_per_second": 5.677,
"eval_steps_per_second": 0.19,
"step": 560
},
{
"epoch": 0.3795669824086603,
"grad_norm": 7.877570629119873,
"learning_rate": 1.560715057351673e-05,
"loss": 0.0964,
"step": 561
},
{
"epoch": 0.38024357239512857,
"grad_norm": 3.73523211479187,
"learning_rate": 1.5587576554676927e-05,
"loss": 0.1444,
"step": 562
},
{
"epoch": 0.38092016238159676,
"grad_norm": 10.825215339660645,
"learning_rate": 1.556797135988342e-05,
"loss": 0.151,
"step": 563
},
{
"epoch": 0.38159675236806495,
"grad_norm": 6.863844871520996,
"learning_rate": 1.5548335098523634e-05,
"loss": 0.2423,
"step": 564
},
{
"epoch": 0.38227334235453314,
"grad_norm": 3.01707124710083,
"learning_rate": 1.5528667880158338e-05,
"loss": 0.1483,
"step": 565
},
{
"epoch": 0.38294993234100133,
"grad_norm": 3.50577712059021,
"learning_rate": 1.5508969814521026e-05,
"loss": 0.1359,
"step": 566
},
{
"epoch": 0.3836265223274696,
"grad_norm": 3.558225631713867,
"learning_rate": 1.5489241011517303e-05,
"loss": 0.0951,
"step": 567
},
{
"epoch": 0.38430311231393777,
"grad_norm": 7.069665431976318,
"learning_rate": 1.5469481581224274e-05,
"loss": 0.0979,
"step": 568
},
{
"epoch": 0.38497970230040596,
"grad_norm": 4.208998680114746,
"learning_rate": 1.5449691633889924e-05,
"loss": 0.1451,
"step": 569
},
{
"epoch": 0.38565629228687415,
"grad_norm": 3.042346477508545,
"learning_rate": 1.5429871279932514e-05,
"loss": 0.1555,
"step": 570
},
{
"epoch": 0.38633288227334234,
"grad_norm": 6.0393595695495605,
"learning_rate": 1.5410020629939966e-05,
"loss": 0.1965,
"step": 571
},
{
"epoch": 0.3870094722598105,
"grad_norm": 3.022724151611328,
"learning_rate": 1.5390139794669225e-05,
"loss": 0.1219,
"step": 572
},
{
"epoch": 0.3876860622462788,
"grad_norm": 2.8513102531433105,
"learning_rate": 1.5370228885045662e-05,
"loss": 0.1634,
"step": 573
},
{
"epoch": 0.38836265223274696,
"grad_norm": 3.42635440826416,
"learning_rate": 1.535028801216245e-05,
"loss": 0.1874,
"step": 574
},
{
"epoch": 0.38903924221921515,
"grad_norm": 6.154781341552734,
"learning_rate": 1.533031728727994e-05,
"loss": 0.1979,
"step": 575
},
{
"epoch": 0.38971583220568334,
"grad_norm": 4.227107524871826,
"learning_rate": 1.531031682182504e-05,
"loss": 0.2125,
"step": 576
},
{
"epoch": 0.39039242219215153,
"grad_norm": 3.389040946960449,
"learning_rate": 1.5290286727390604e-05,
"loss": 0.1554,
"step": 577
},
{
"epoch": 0.3910690121786198,
"grad_norm": 3.34859299659729,
"learning_rate": 1.527022711573479e-05,
"loss": 0.1859,
"step": 578
},
{
"epoch": 0.39174560216508797,
"grad_norm": 2.8738038539886475,
"learning_rate": 1.5250138098780456e-05,
"loss": 0.105,
"step": 579
},
{
"epoch": 0.39242219215155616,
"grad_norm": 3.0525081157684326,
"learning_rate": 1.5230019788614527e-05,
"loss": 0.1437,
"step": 580
},
{
"epoch": 0.39242219215155616,
"eval_accuracy": 0.8036697247706422,
"eval_f1": 0.4928909952606635,
"eval_loss": 0.4023875296115875,
"eval_precision": 0.8387096774193549,
"eval_recall": 0.348993288590604,
"eval_runtime": 53.2583,
"eval_samples_per_second": 5.595,
"eval_steps_per_second": 0.188,
"step": 580
},
{
"epoch": 0.39309878213802435,
"grad_norm": 2.1808993816375732,
"learning_rate": 1.5209872297487365e-05,
"loss": 0.158,
"step": 581
},
{
"epoch": 0.39377537212449254,
"grad_norm": 2.2843339443206787,
"learning_rate": 1.5189695737812153e-05,
"loss": 0.0944,
"step": 582
},
{
"epoch": 0.3944519621109608,
"grad_norm": 3.0277621746063232,
"learning_rate": 1.5169490222164255e-05,
"loss": 0.1253,
"step": 583
},
{
"epoch": 0.395128552097429,
"grad_norm": 3.6869609355926514,
"learning_rate": 1.5149255863280607e-05,
"loss": 0.1293,
"step": 584
},
{
"epoch": 0.39580514208389717,
"grad_norm": 2.9757912158966064,
"learning_rate": 1.5128992774059063e-05,
"loss": 0.1344,
"step": 585
},
{
"epoch": 0.39648173207036536,
"grad_norm": 3.4651436805725098,
"learning_rate": 1.5108701067557787e-05,
"loss": 0.1131,
"step": 586
},
{
"epoch": 0.39715832205683355,
"grad_norm": 3.6160385608673096,
"learning_rate": 1.5088380856994608e-05,
"loss": 0.1679,
"step": 587
},
{
"epoch": 0.39783491204330174,
"grad_norm": 2.459801197052002,
"learning_rate": 1.50680322557464e-05,
"loss": 0.0886,
"step": 588
},
{
"epoch": 0.39851150202977,
"grad_norm": 3.2416129112243652,
"learning_rate": 1.504765537734844e-05,
"loss": 0.1534,
"step": 589
},
{
"epoch": 0.39918809201623817,
"grad_norm": 2.9892609119415283,
"learning_rate": 1.5027250335493771e-05,
"loss": 0.0943,
"step": 590
},
{
"epoch": 0.39986468200270636,
"grad_norm": 5.113293647766113,
"learning_rate": 1.5006817244032589e-05,
"loss": 0.1717,
"step": 591
},
{
"epoch": 0.40054127198917455,
"grad_norm": 6.7918195724487305,
"learning_rate": 1.4986356216971583e-05,
"loss": 0.1747,
"step": 592
},
{
"epoch": 0.40121786197564274,
"grad_norm": 3.726599931716919,
"learning_rate": 1.4965867368473308e-05,
"loss": 0.1416,
"step": 593
},
{
"epoch": 0.401894451962111,
"grad_norm": 6.388960361480713,
"learning_rate": 1.4945350812855555e-05,
"loss": 0.1946,
"step": 594
},
{
"epoch": 0.4025710419485792,
"grad_norm": 3.8928306102752686,
"learning_rate": 1.4924806664590702e-05,
"loss": 0.1622,
"step": 595
},
{
"epoch": 0.40324763193504737,
"grad_norm": 3.4860315322875977,
"learning_rate": 1.4904235038305084e-05,
"loss": 0.1549,
"step": 596
},
{
"epoch": 0.40392422192151556,
"grad_norm": 4.641502857208252,
"learning_rate": 1.4883636048778347e-05,
"loss": 0.1789,
"step": 597
},
{
"epoch": 0.40460081190798375,
"grad_norm": 4.543725967407227,
"learning_rate": 1.4863009810942814e-05,
"loss": 0.1651,
"step": 598
},
{
"epoch": 0.40527740189445194,
"grad_norm": 10.844779014587402,
"learning_rate": 1.4842356439882841e-05,
"loss": 0.2244,
"step": 599
},
{
"epoch": 0.4059539918809202,
"grad_norm": 7.078522205352783,
"learning_rate": 1.4821676050834166e-05,
"loss": 0.2055,
"step": 600
},
{
"epoch": 0.4059539918809202,
"eval_accuracy": 0.8229357798165138,
"eval_f1": 0.5758241758241758,
"eval_loss": 0.40693244338035583,
"eval_precision": 0.8343949044585988,
"eval_recall": 0.4395973154362416,
"eval_runtime": 53.515,
"eval_samples_per_second": 5.569,
"eval_steps_per_second": 0.187,
"step": 600
},
{
"epoch": 0.4066305818673884,
"grad_norm": 5.193844795227051,
"learning_rate": 1.4800968759183288e-05,
"loss": 0.1437,
"step": 601
},
{
"epoch": 0.40730717185385656,
"grad_norm": 4.853598117828369,
"learning_rate": 1.4780234680466792e-05,
"loss": 0.2045,
"step": 602
},
{
"epoch": 0.40798376184032475,
"grad_norm": 2.771559476852417,
"learning_rate": 1.4759473930370738e-05,
"loss": 0.0885,
"step": 603
},
{
"epoch": 0.40866035182679294,
"grad_norm": 2.854980945587158,
"learning_rate": 1.4738686624729987e-05,
"loss": 0.0996,
"step": 604
},
{
"epoch": 0.4093369418132612,
"grad_norm": 3.643080949783325,
"learning_rate": 1.4717872879527578e-05,
"loss": 0.1512,
"step": 605
},
{
"epoch": 0.4100135317997294,
"grad_norm": 3.83292555809021,
"learning_rate": 1.4697032810894062e-05,
"loss": 0.1761,
"step": 606
},
{
"epoch": 0.41069012178619757,
"grad_norm": 2.26672101020813,
"learning_rate": 1.4676166535106866e-05,
"loss": 0.116,
"step": 607
},
{
"epoch": 0.41136671177266576,
"grad_norm": 3.7530345916748047,
"learning_rate": 1.4655274168589635e-05,
"loss": 0.1392,
"step": 608
},
{
"epoch": 0.41204330175913395,
"grad_norm": 3.018079996109009,
"learning_rate": 1.463435582791159e-05,
"loss": 0.1159,
"step": 609
},
{
"epoch": 0.41271989174560214,
"grad_norm": 3.2969181537628174,
"learning_rate": 1.461341162978688e-05,
"loss": 0.2222,
"step": 610
},
{
"epoch": 0.4133964817320704,
"grad_norm": 4.386163711547852,
"learning_rate": 1.459244169107392e-05,
"loss": 0.1679,
"step": 611
},
{
"epoch": 0.4140730717185386,
"grad_norm": 3.1107709407806396,
"learning_rate": 1.4571446128774746e-05,
"loss": 0.0994,
"step": 612
},
{
"epoch": 0.41474966170500677,
"grad_norm": 4.727970600128174,
"learning_rate": 1.4550425060034367e-05,
"loss": 0.2336,
"step": 613
},
{
"epoch": 0.41542625169147496,
"grad_norm": 2.739393949508667,
"learning_rate": 1.4529378602140096e-05,
"loss": 0.1315,
"step": 614
},
{
"epoch": 0.41610284167794315,
"grad_norm": 3.5588788986206055,
"learning_rate": 1.4508306872520912e-05,
"loss": 0.128,
"step": 615
},
{
"epoch": 0.4167794316644114,
"grad_norm": 3.703800916671753,
"learning_rate": 1.4487209988746791e-05,
"loss": 0.1546,
"step": 616
},
{
"epoch": 0.4174560216508796,
"grad_norm": 4.310748100280762,
"learning_rate": 1.4466088068528068e-05,
"loss": 0.1225,
"step": 617
},
{
"epoch": 0.4181326116373478,
"grad_norm": 4.539455413818359,
"learning_rate": 1.444494122971476e-05,
"loss": 0.0978,
"step": 618
},
{
"epoch": 0.41880920162381596,
"grad_norm": 4.7135210037231445,
"learning_rate": 1.4423769590295917e-05,
"loss": 0.1628,
"step": 619
},
{
"epoch": 0.41948579161028415,
"grad_norm": 2.408241033554077,
"learning_rate": 1.4402573268398969e-05,
"loss": 0.1135,
"step": 620
},
{
"epoch": 0.41948579161028415,
"eval_accuracy": 0.8146788990825689,
"eval_f1": 0.5388127853881278,
"eval_loss": 0.4136140048503876,
"eval_precision": 0.8428571428571429,
"eval_recall": 0.3959731543624161,
"eval_runtime": 52.6653,
"eval_samples_per_second": 5.658,
"eval_steps_per_second": 0.19,
"step": 620
},
{
"epoch": 0.42016238159675234,
"grad_norm": 7.886865139007568,
"learning_rate": 1.4381352382289064e-05,
"loss": 0.2124,
"step": 621
},
{
"epoch": 0.4208389715832206,
"grad_norm": 4.534457206726074,
"learning_rate": 1.4360107050368403e-05,
"loss": 0.2076,
"step": 622
},
{
"epoch": 0.4215155615696888,
"grad_norm": 4.329330921173096,
"learning_rate": 1.4338837391175582e-05,
"loss": 0.1768,
"step": 623
},
{
"epoch": 0.42219215155615697,
"grad_norm": 4.343597412109375,
"learning_rate": 1.4317543523384928e-05,
"loss": 0.1652,
"step": 624
},
{
"epoch": 0.42286874154262516,
"grad_norm": 3.2291901111602783,
"learning_rate": 1.4296225565805854e-05,
"loss": 0.1808,
"step": 625
},
{
"epoch": 0.42354533152909335,
"grad_norm": 3.0291831493377686,
"learning_rate": 1.4274883637382162e-05,
"loss": 0.1375,
"step": 626
},
{
"epoch": 0.4242219215155616,
"grad_norm": 4.428736686706543,
"learning_rate": 1.4253517857191415e-05,
"loss": 0.125,
"step": 627
},
{
"epoch": 0.4248985115020298,
"grad_norm": 4.2908616065979,
"learning_rate": 1.4232128344444251e-05,
"loss": 0.2029,
"step": 628
},
{
"epoch": 0.425575101488498,
"grad_norm": 3.273651123046875,
"learning_rate": 1.4210715218483726e-05,
"loss": 0.156,
"step": 629
},
{
"epoch": 0.42625169147496617,
"grad_norm": 4.760056495666504,
"learning_rate": 1.4189278598784648e-05,
"loss": 0.1802,
"step": 630
},
{
"epoch": 0.42692828146143436,
"grad_norm": 4.106754302978516,
"learning_rate": 1.4167818604952906e-05,
"loss": 0.1104,
"step": 631
},
{
"epoch": 0.42760487144790255,
"grad_norm": 3.9572083950042725,
"learning_rate": 1.4146335356724803e-05,
"loss": 0.1584,
"step": 632
},
{
"epoch": 0.4282814614343708,
"grad_norm": 4.0582194328308105,
"learning_rate": 1.4124828973966393e-05,
"loss": 0.1678,
"step": 633
},
{
"epoch": 0.428958051420839,
"grad_norm": 3.1031177043914795,
"learning_rate": 1.410329957667281e-05,
"loss": 0.119,
"step": 634
},
{
"epoch": 0.42963464140730717,
"grad_norm": 6.854620456695557,
"learning_rate": 1.4081747284967602e-05,
"loss": 0.2084,
"step": 635
},
{
"epoch": 0.43031123139377536,
"grad_norm": 3.733327627182007,
"learning_rate": 1.4060172219102046e-05,
"loss": 0.0581,
"step": 636
},
{
"epoch": 0.43098782138024355,
"grad_norm": 6.749164581298828,
"learning_rate": 1.4038574499454496e-05,
"loss": 0.1663,
"step": 637
},
{
"epoch": 0.4316644113667118,
"grad_norm": 4.190524101257324,
"learning_rate": 1.4016954246529697e-05,
"loss": 0.1851,
"step": 638
},
{
"epoch": 0.43234100135318,
"grad_norm": 5.539178371429443,
"learning_rate": 1.3995311580958124e-05,
"loss": 0.1292,
"step": 639
},
{
"epoch": 0.4330175913396482,
"grad_norm": 3.6414852142333984,
"learning_rate": 1.3973646623495305e-05,
"loss": 0.1331,
"step": 640
},
{
"epoch": 0.4330175913396482,
"eval_accuracy": 0.791743119266055,
"eval_f1": 0.44226044226044225,
"eval_loss": 0.46029743552207947,
"eval_precision": 0.8256880733944955,
"eval_recall": 0.30201342281879195,
"eval_runtime": 52.6165,
"eval_samples_per_second": 5.664,
"eval_steps_per_second": 0.19,
"step": 640
},
{
"epoch": 0.43369418132611637,
"grad_norm": 2.1139845848083496,
"learning_rate": 1.395195949502114e-05,
"loss": 0.1142,
"step": 641
},
{
"epoch": 0.43437077131258456,
"grad_norm": 4.319032669067383,
"learning_rate": 1.3930250316539237e-05,
"loss": 0.1261,
"step": 642
},
{
"epoch": 0.43504736129905275,
"grad_norm": 6.74521017074585,
"learning_rate": 1.3908519209176227e-05,
"loss": 0.1988,
"step": 643
},
{
"epoch": 0.435723951285521,
"grad_norm": 4.900043487548828,
"learning_rate": 1.3886766294181105e-05,
"loss": 0.1694,
"step": 644
},
{
"epoch": 0.4364005412719892,
"grad_norm": 3.822930097579956,
"learning_rate": 1.3864991692924524e-05,
"loss": 0.1607,
"step": 645
},
{
"epoch": 0.4370771312584574,
"grad_norm": 6.831581115722656,
"learning_rate": 1.384319552689815e-05,
"loss": 0.1969,
"step": 646
},
{
"epoch": 0.43775372124492556,
"grad_norm": 4.031900405883789,
"learning_rate": 1.3821377917713969e-05,
"loss": 0.1413,
"step": 647
},
{
"epoch": 0.43843031123139375,
"grad_norm": 4.6610565185546875,
"learning_rate": 1.37995389871036e-05,
"loss": 0.1502,
"step": 648
},
{
"epoch": 0.439106901217862,
"grad_norm": 5.273025035858154,
"learning_rate": 1.3777678856917637e-05,
"loss": 0.1584,
"step": 649
},
{
"epoch": 0.4397834912043302,
"grad_norm": 4.015101909637451,
"learning_rate": 1.3755797649124944e-05,
"loss": 0.1968,
"step": 650
},
{
"epoch": 0.4404600811907984,
"grad_norm": 3.669017791748047,
"learning_rate": 1.3733895485812005e-05,
"loss": 0.1168,
"step": 651
},
{
"epoch": 0.44113667117726657,
"grad_norm": 4.213320732116699,
"learning_rate": 1.3711972489182208e-05,
"loss": 0.1717,
"step": 652
},
{
"epoch": 0.44181326116373476,
"grad_norm": 4.028888702392578,
"learning_rate": 1.369002878155519e-05,
"loss": 0.1231,
"step": 653
},
{
"epoch": 0.44248985115020295,
"grad_norm": 4.561563968658447,
"learning_rate": 1.366806448536615e-05,
"loss": 0.1529,
"step": 654
},
{
"epoch": 0.4431664411366712,
"grad_norm": 5.331336975097656,
"learning_rate": 1.3646079723165148e-05,
"loss": 0.1654,
"step": 655
},
{
"epoch": 0.4438430311231394,
"grad_norm": 3.651341199874878,
"learning_rate": 1.3624074617616443e-05,
"loss": 0.1898,
"step": 656
},
{
"epoch": 0.4445196211096076,
"grad_norm": 3.460707187652588,
"learning_rate": 1.3602049291497798e-05,
"loss": 0.132,
"step": 657
},
{
"epoch": 0.44519621109607577,
"grad_norm": 4.810662269592285,
"learning_rate": 1.3580003867699801e-05,
"loss": 0.2468,
"step": 658
},
{
"epoch": 0.44587280108254396,
"grad_norm": 3.721719264984131,
"learning_rate": 1.3557938469225167e-05,
"loss": 0.1419,
"step": 659
},
{
"epoch": 0.4465493910690122,
"grad_norm": 2.96408748626709,
"learning_rate": 1.3535853219188064e-05,
"loss": 0.1433,
"step": 660
},
{
"epoch": 0.4465493910690122,
"eval_accuracy": 0.7779816513761468,
"eval_f1": 0.35978835978835977,
"eval_loss": 0.45713141560554504,
"eval_precision": 0.85,
"eval_recall": 0.22818791946308725,
"eval_runtime": 52.7449,
"eval_samples_per_second": 5.65,
"eval_steps_per_second": 0.19,
"step": 660
},
{
"epoch": 0.4472259810554804,
"grad_norm": 4.556414604187012,
"learning_rate": 1.3513748240813429e-05,
"loss": 0.2081,
"step": 661
},
{
"epoch": 0.4479025710419486,
"grad_norm": 3.337388515472412,
"learning_rate": 1.349162365743626e-05,
"loss": 0.1469,
"step": 662
},
{
"epoch": 0.4485791610284168,
"grad_norm": 2.080909252166748,
"learning_rate": 1.3469479592500954e-05,
"loss": 0.084,
"step": 663
},
{
"epoch": 0.44925575101488496,
"grad_norm": 2.1243245601654053,
"learning_rate": 1.3447316169560593e-05,
"loss": 0.1212,
"step": 664
},
{
"epoch": 0.44993234100135315,
"grad_norm": 6.11072301864624,
"learning_rate": 1.3425133512276284e-05,
"loss": 0.18,
"step": 665
},
{
"epoch": 0.4506089309878214,
"grad_norm": 3.8702616691589355,
"learning_rate": 1.3402931744416432e-05,
"loss": 0.1654,
"step": 666
},
{
"epoch": 0.4512855209742896,
"grad_norm": 2.126553535461426,
"learning_rate": 1.3380710989856086e-05,
"loss": 0.0749,
"step": 667
},
{
"epoch": 0.4519621109607578,
"grad_norm": 3.0334889888763428,
"learning_rate": 1.3358471372576229e-05,
"loss": 0.1168,
"step": 668
},
{
"epoch": 0.45263870094722597,
"grad_norm": 4.436944484710693,
"learning_rate": 1.3336213016663078e-05,
"loss": 0.1849,
"step": 669
},
{
"epoch": 0.45331529093369416,
"grad_norm": 2.813225030899048,
"learning_rate": 1.3313936046307411e-05,
"loss": 0.0991,
"step": 670
},
{
"epoch": 0.4539918809201624,
"grad_norm": 4.604645729064941,
"learning_rate": 1.3291640585803869e-05,
"loss": 0.181,
"step": 671
},
{
"epoch": 0.4546684709066306,
"grad_norm": 4.593608379364014,
"learning_rate": 1.3269326759550252e-05,
"loss": 0.1916,
"step": 672
},
{
"epoch": 0.4553450608930988,
"grad_norm": 4.45982551574707,
"learning_rate": 1.3246994692046837e-05,
"loss": 0.1576,
"step": 673
},
{
"epoch": 0.456021650879567,
"grad_norm": 3.094189167022705,
"learning_rate": 1.3224644507895672e-05,
"loss": 0.137,
"step": 674
},
{
"epoch": 0.45669824086603517,
"grad_norm": 4.386197090148926,
"learning_rate": 1.320227633179989e-05,
"loss": 0.1798,
"step": 675
},
{
"epoch": 0.45737483085250336,
"grad_norm": 6.814380168914795,
"learning_rate": 1.3179890288563015e-05,
"loss": 0.1869,
"step": 676
},
{
"epoch": 0.4580514208389716,
"grad_norm": 3.6425650119781494,
"learning_rate": 1.3157486503088255e-05,
"loss": 0.0971,
"step": 677
},
{
"epoch": 0.4587280108254398,
"grad_norm": 4.681551933288574,
"learning_rate": 1.3135065100377816e-05,
"loss": 0.1818,
"step": 678
},
{
"epoch": 0.459404600811908,
"grad_norm": 2.930051803588867,
"learning_rate": 1.3112626205532189e-05,
"loss": 0.1051,
"step": 679
},
{
"epoch": 0.46008119079837617,
"grad_norm": 4.322526454925537,
"learning_rate": 1.3090169943749475e-05,
"loss": 0.1372,
"step": 680
},
{
"epoch": 0.46008119079837617,
"eval_accuracy": 0.8018348623853211,
"eval_f1": 0.4881516587677725,
"eval_loss": 0.4219379127025604,
"eval_precision": 0.8306451612903226,
"eval_recall": 0.34563758389261745,
"eval_runtime": 53.1579,
"eval_samples_per_second": 5.606,
"eval_steps_per_second": 0.188,
"step": 680
},
{
"epoch": 0.46075778078484436,
"grad_norm": 6.767197132110596,
"learning_rate": 1.3067696440324671e-05,
"loss": 0.2199,
"step": 681
},
{
"epoch": 0.4614343707713126,
"grad_norm": 6.519789218902588,
"learning_rate": 1.3045205820648969e-05,
"loss": 0.1482,
"step": 682
},
{
"epoch": 0.4621109607577808,
"grad_norm": 5.150829792022705,
"learning_rate": 1.3022698210209069e-05,
"loss": 0.1252,
"step": 683
},
{
"epoch": 0.462787550744249,
"grad_norm": 3.6090028285980225,
"learning_rate": 1.3000173734586461e-05,
"loss": 0.1423,
"step": 684
},
{
"epoch": 0.4634641407307172,
"grad_norm": 3.7701938152313232,
"learning_rate": 1.2977632519456745e-05,
"loss": 0.1346,
"step": 685
},
{
"epoch": 0.46414073071718537,
"grad_norm": 2.650648355484009,
"learning_rate": 1.295507469058891e-05,
"loss": 0.1563,
"step": 686
},
{
"epoch": 0.4648173207036536,
"grad_norm": 3.0055038928985596,
"learning_rate": 1.293250037384465e-05,
"loss": 0.12,
"step": 687
},
{
"epoch": 0.4654939106901218,
"grad_norm": 3.10675311088562,
"learning_rate": 1.2909909695177647e-05,
"loss": 0.1294,
"step": 688
},
{
"epoch": 0.46617050067659,
"grad_norm": 3.340036630630493,
"learning_rate": 1.2887302780632876e-05,
"loss": 0.1208,
"step": 689
},
{
"epoch": 0.4668470906630582,
"grad_norm": 3.540969133377075,
"learning_rate": 1.2864679756345905e-05,
"loss": 0.1568,
"step": 690
},
{
"epoch": 0.4675236806495264,
"grad_norm": 2.290696620941162,
"learning_rate": 1.284204074854218e-05,
"loss": 0.0659,
"step": 691
},
{
"epoch": 0.46820027063599456,
"grad_norm": 2.736776351928711,
"learning_rate": 1.2819385883536332e-05,
"loss": 0.1017,
"step": 692
},
{
"epoch": 0.4688768606224628,
"grad_norm": 4.793468952178955,
"learning_rate": 1.2796715287731463e-05,
"loss": 0.1693,
"step": 693
},
{
"epoch": 0.469553450608931,
"grad_norm": 5.424515724182129,
"learning_rate": 1.2774029087618448e-05,
"loss": 0.2241,
"step": 694
},
{
"epoch": 0.4702300405953992,
"grad_norm": 3.8648910522460938,
"learning_rate": 1.2751327409775227e-05,
"loss": 0.1706,
"step": 695
},
{
"epoch": 0.4709066305818674,
"grad_norm": 5.154799461364746,
"learning_rate": 1.2728610380866097e-05,
"loss": 0.1878,
"step": 696
},
{
"epoch": 0.47158322056833557,
"grad_norm": 4.5547261238098145,
"learning_rate": 1.2705878127641007e-05,
"loss": 0.1794,
"step": 697
},
{
"epoch": 0.4722598105548038,
"grad_norm": 4.59852409362793,
"learning_rate": 1.268313077693485e-05,
"loss": 0.1402,
"step": 698
},
{
"epoch": 0.472936400541272,
"grad_norm": 3.0397305488586426,
"learning_rate": 1.2660368455666752e-05,
"loss": 0.1126,
"step": 699
},
{
"epoch": 0.4736129905277402,
"grad_norm": 2.698749303817749,
"learning_rate": 1.2637591290839377e-05,
"loss": 0.0677,
"step": 700
},
{
"epoch": 0.4736129905277402,
"eval_accuracy": 0.7990825688073394,
"eval_f1": 0.46973365617433416,
"eval_loss": 0.442281574010849,
"eval_precision": 0.8434782608695652,
"eval_recall": 0.32550335570469796,
"eval_runtime": 52.9641,
"eval_samples_per_second": 5.626,
"eval_steps_per_second": 0.189,
"step": 700
},
{
"epoch": 0.4742895805142084,
"grad_norm": 3.1701228618621826,
"learning_rate": 1.26147994095382e-05,
"loss": 0.1402,
"step": 701
},
{
"epoch": 0.4749661705006766,
"grad_norm": 3.6727280616760254,
"learning_rate": 1.2591992938930808e-05,
"loss": 0.1482,
"step": 702
},
{
"epoch": 0.47564276048714477,
"grad_norm": 2.606227159500122,
"learning_rate": 1.2569172006266192e-05,
"loss": 0.141,
"step": 703
},
{
"epoch": 0.476319350473613,
"grad_norm": 3.620513677597046,
"learning_rate": 1.2546336738874037e-05,
"loss": 0.1777,
"step": 704
},
{
"epoch": 0.4769959404600812,
"grad_norm": 4.079351902008057,
"learning_rate": 1.2523487264163997e-05,
"loss": 0.1614,
"step": 705
},
{
"epoch": 0.4776725304465494,
"grad_norm": 3.8010270595550537,
"learning_rate": 1.2500623709625008e-05,
"loss": 0.1359,
"step": 706
},
{
"epoch": 0.4783491204330176,
"grad_norm": 3.370544672012329,
"learning_rate": 1.2477746202824563e-05,
"loss": 0.1554,
"step": 707
},
{
"epoch": 0.4790257104194858,
"grad_norm": 5.698882579803467,
"learning_rate": 1.2454854871407993e-05,
"loss": 0.1387,
"step": 708
},
{
"epoch": 0.479702300405954,
"grad_norm": 3.655916213989258,
"learning_rate": 1.2431949843097776e-05,
"loss": 0.0942,
"step": 709
},
{
"epoch": 0.4803788903924222,
"grad_norm": 4.965806484222412,
"learning_rate": 1.2409031245692798e-05,
"loss": 0.1671,
"step": 710
},
{
"epoch": 0.4810554803788904,
"grad_norm": 4.127477645874023,
"learning_rate": 1.238609920706767e-05,
"loss": 0.1012,
"step": 711
},
{
"epoch": 0.4817320703653586,
"grad_norm": 4.219476222991943,
"learning_rate": 1.2363153855171985e-05,
"loss": 0.1406,
"step": 712
},
{
"epoch": 0.4824086603518268,
"grad_norm": 4.781118392944336,
"learning_rate": 1.2340195318029623e-05,
"loss": 0.1519,
"step": 713
},
{
"epoch": 0.48308525033829497,
"grad_norm": 2.8674471378326416,
"learning_rate": 1.2317223723738036e-05,
"loss": 0.0691,
"step": 714
},
{
"epoch": 0.4837618403247632,
"grad_norm": 3.4862661361694336,
"learning_rate": 1.2294239200467516e-05,
"loss": 0.1282,
"step": 715
},
{
"epoch": 0.4844384303112314,
"grad_norm": 2.642241954803467,
"learning_rate": 1.2271241876460507e-05,
"loss": 0.1144,
"step": 716
},
{
"epoch": 0.4851150202976996,
"grad_norm": 5.012102127075195,
"learning_rate": 1.2248231880030861e-05,
"loss": 0.1781,
"step": 717
},
{
"epoch": 0.4857916102841678,
"grad_norm": 3.223320484161377,
"learning_rate": 1.2225209339563144e-05,
"loss": 0.103,
"step": 718
},
{
"epoch": 0.486468200270636,
"grad_norm": 3.7974560260772705,
"learning_rate": 1.2202174383511916e-05,
"loss": 0.1277,
"step": 719
},
{
"epoch": 0.4871447902571042,
"grad_norm": 8.01950454711914,
"learning_rate": 1.2179127140400997e-05,
"loss": 0.2256,
"step": 720
},
{
"epoch": 0.4871447902571042,
"eval_accuracy": 0.7798165137614679,
"eval_f1": 0.375,
"eval_loss": 0.49802127480506897,
"eval_precision": 0.8372093023255814,
"eval_recall": 0.24161073825503357,
"eval_runtime": 52.7234,
"eval_samples_per_second": 5.652,
"eval_steps_per_second": 0.19,
"step": 720
},
{
"epoch": 0.4878213802435724,
"grad_norm": 5.34826135635376,
"learning_rate": 1.2156067738822777e-05,
"loss": 0.173,
"step": 721
},
{
"epoch": 0.4884979702300406,
"grad_norm": 2.6310982704162598,
"learning_rate": 1.213299630743747e-05,
"loss": 0.0958,
"step": 722
},
{
"epoch": 0.4891745602165088,
"grad_norm": 4.56181526184082,
"learning_rate": 1.2109912974972424e-05,
"loss": 0.1279,
"step": 723
},
{
"epoch": 0.489851150202977,
"grad_norm": 5.082941055297852,
"learning_rate": 1.2086817870221376e-05,
"loss": 0.158,
"step": 724
},
{
"epoch": 0.49052774018944517,
"grad_norm": 6.134848117828369,
"learning_rate": 1.2063711122043759e-05,
"loss": 0.1063,
"step": 725
},
{
"epoch": 0.4912043301759134,
"grad_norm": 3.6247506141662598,
"learning_rate": 1.204059285936397e-05,
"loss": 0.1078,
"step": 726
},
{
"epoch": 0.4918809201623816,
"grad_norm": 4.745319366455078,
"learning_rate": 1.2017463211170635e-05,
"loss": 0.1094,
"step": 727
},
{
"epoch": 0.4925575101488498,
"grad_norm": 5.845713138580322,
"learning_rate": 1.1994322306515926e-05,
"loss": 0.1373,
"step": 728
},
{
"epoch": 0.493234100135318,
"grad_norm": 3.4521119594573975,
"learning_rate": 1.1971170274514802e-05,
"loss": 0.0997,
"step": 729
},
{
"epoch": 0.4939106901217862,
"grad_norm": 4.090993404388428,
"learning_rate": 1.1948007244344334e-05,
"loss": 0.1578,
"step": 730
},
{
"epoch": 0.4945872801082544,
"grad_norm": 5.571890830993652,
"learning_rate": 1.1924833345242921e-05,
"loss": 0.1614,
"step": 731
},
{
"epoch": 0.4952638700947226,
"grad_norm": 4.286579132080078,
"learning_rate": 1.1901648706509637e-05,
"loss": 0.1557,
"step": 732
},
{
"epoch": 0.4959404600811908,
"grad_norm": 6.809506893157959,
"learning_rate": 1.1878453457503465e-05,
"loss": 0.1479,
"step": 733
},
{
"epoch": 0.496617050067659,
"grad_norm": 3.8739116191864014,
"learning_rate": 1.185524772764258e-05,
"loss": 0.1504,
"step": 734
},
{
"epoch": 0.4972936400541272,
"grad_norm": 3.7848074436187744,
"learning_rate": 1.1832031646403654e-05,
"loss": 0.1236,
"step": 735
},
{
"epoch": 0.4979702300405954,
"grad_norm": 4.305004596710205,
"learning_rate": 1.1808805343321102e-05,
"loss": 0.1505,
"step": 736
},
{
"epoch": 0.4986468200270636,
"grad_norm": 3.052428722381592,
"learning_rate": 1.1785568947986368e-05,
"loss": 0.1454,
"step": 737
},
{
"epoch": 0.4993234100135318,
"grad_norm": 7.71512508392334,
"learning_rate": 1.176232259004722e-05,
"loss": 0.2538,
"step": 738
},
{
"epoch": 0.5,
"grad_norm": 5.634016990661621,
"learning_rate": 1.1739066399206997e-05,
"loss": 0.1768,
"step": 739
},
{
"epoch": 0.5006765899864682,
"grad_norm": 4.39807653427124,
"learning_rate": 1.1715800505223918e-05,
"loss": 0.2035,
"step": 740
},
{
"epoch": 0.5006765899864682,
"eval_accuracy": 0.7871559633027523,
"eval_f1": 0.40816326530612246,
"eval_loss": 0.4451703131198883,
"eval_precision": 0.851063829787234,
"eval_recall": 0.2684563758389262,
"eval_runtime": 53.0418,
"eval_samples_per_second": 5.618,
"eval_steps_per_second": 0.189,
"step": 740
},
{
"epoch": 0.5013531799729364,
"grad_norm": 4.298307418823242,
"learning_rate": 1.1692525037910325e-05,
"loss": 0.155,
"step": 741
},
{
"epoch": 0.5020297699594046,
"grad_norm": 3.8376667499542236,
"learning_rate": 1.166924012713198e-05,
"loss": 0.1715,
"step": 742
},
{
"epoch": 0.5027063599458728,
"grad_norm": 4.111176013946533,
"learning_rate": 1.164594590280734e-05,
"loss": 0.139,
"step": 743
},
{
"epoch": 0.503382949932341,
"grad_norm": 3.5634899139404297,
"learning_rate": 1.1622642494906819e-05,
"loss": 0.1315,
"step": 744
},
{
"epoch": 0.5040595399188093,
"grad_norm": 3.7423675060272217,
"learning_rate": 1.1599330033452078e-05,
"loss": 0.1789,
"step": 745
},
{
"epoch": 0.5047361299052774,
"grad_norm": 7.1124958992004395,
"learning_rate": 1.1576008648515286e-05,
"loss": 0.1674,
"step": 746
},
{
"epoch": 0.5054127198917456,
"grad_norm": 3.564636468887329,
"learning_rate": 1.1552678470218406e-05,
"loss": 0.1415,
"step": 747
},
{
"epoch": 0.5060893098782138,
"grad_norm": 7.098050117492676,
"learning_rate": 1.1529339628732462e-05,
"loss": 0.1789,
"step": 748
},
{
"epoch": 0.506765899864682,
"grad_norm": 7.915879726409912,
"learning_rate": 1.1505992254276808e-05,
"loss": 0.184,
"step": 749
},
{
"epoch": 0.5074424898511503,
"grad_norm": 3.962040424346924,
"learning_rate": 1.148263647711842e-05,
"loss": 0.1447,
"step": 750
},
{
"epoch": 0.5081190798376184,
"grad_norm": 2.833312749862671,
"learning_rate": 1.1459272427571148e-05,
"loss": 0.1153,
"step": 751
},
{
"epoch": 0.5087956698240866,
"grad_norm": 3.094414234161377,
"learning_rate": 1.1435900235995004e-05,
"loss": 0.1555,
"step": 752
},
{
"epoch": 0.5094722598105548,
"grad_norm": 3.7343735694885254,
"learning_rate": 1.141252003279542e-05,
"loss": 0.1781,
"step": 753
},
{
"epoch": 0.510148849797023,
"grad_norm": 2.3985371589660645,
"learning_rate": 1.1389131948422534e-05,
"loss": 0.1266,
"step": 754
},
{
"epoch": 0.5108254397834912,
"grad_norm": 2.8854312896728516,
"learning_rate": 1.1365736113370463e-05,
"loss": 0.1198,
"step": 755
},
{
"epoch": 0.5115020297699594,
"grad_norm": 3.6787984371185303,
"learning_rate": 1.1342332658176556e-05,
"loss": 0.1701,
"step": 756
},
{
"epoch": 0.5121786197564276,
"grad_norm": 3.9833555221557617,
"learning_rate": 1.1318921713420691e-05,
"loss": 0.1883,
"step": 757
},
{
"epoch": 0.5128552097428958,
"grad_norm": 5.8127031326293945,
"learning_rate": 1.1295503409724526e-05,
"loss": 0.19,
"step": 758
},
{
"epoch": 0.513531799729364,
"grad_norm": 3.7036736011505127,
"learning_rate": 1.1272077877750782e-05,
"loss": 0.1532,
"step": 759
},
{
"epoch": 0.5142083897158322,
"grad_norm": 3.996150255203247,
"learning_rate": 1.124864524820251e-05,
"loss": 0.1412,
"step": 760
},
{
"epoch": 0.5142083897158322,
"eval_accuracy": 0.7761467889908257,
"eval_f1": 0.34408602150537637,
"eval_loss": 0.4691689610481262,
"eval_precision": 0.8648648648648649,
"eval_recall": 0.21476510067114093,
"eval_runtime": 52.6721,
"eval_samples_per_second": 5.658,
"eval_steps_per_second": 0.19,
"step": 760
},
{
"epoch": 0.5148849797023004,
"grad_norm": 6.409296989440918,
"learning_rate": 1.1225205651822359e-05,
"loss": 0.264,
"step": 761
},
{
"epoch": 0.5155615696887687,
"grad_norm": 3.7257473468780518,
"learning_rate": 1.1201759219391858e-05,
"loss": 0.1302,
"step": 762
},
{
"epoch": 0.5162381596752368,
"grad_norm": 2.880531072616577,
"learning_rate": 1.1178306081730666e-05,
"loss": 0.1321,
"step": 763
},
{
"epoch": 0.516914749661705,
"grad_norm": 3.284871816635132,
"learning_rate": 1.1154846369695864e-05,
"loss": 0.1171,
"step": 764
},
{
"epoch": 0.5175913396481732,
"grad_norm": 3.0467426776885986,
"learning_rate": 1.1131380214181205e-05,
"loss": 0.1664,
"step": 765
},
{
"epoch": 0.5182679296346414,
"grad_norm": 4.38018274307251,
"learning_rate": 1.1107907746116402e-05,
"loss": 0.167,
"step": 766
},
{
"epoch": 0.5189445196211097,
"grad_norm": 2.9475748538970947,
"learning_rate": 1.108442909646639e-05,
"loss": 0.1161,
"step": 767
},
{
"epoch": 0.5196211096075778,
"grad_norm": 5.2659687995910645,
"learning_rate": 1.1060944396230583e-05,
"loss": 0.1163,
"step": 768
},
{
"epoch": 0.520297699594046,
"grad_norm": 3.119966983795166,
"learning_rate": 1.1037453776442164e-05,
"loss": 0.1693,
"step": 769
},
{
"epoch": 0.5209742895805142,
"grad_norm": 3.546898603439331,
"learning_rate": 1.1013957368167343e-05,
"loss": 0.1362,
"step": 770
},
{
"epoch": 0.5216508795669824,
"grad_norm": 3.2547402381896973,
"learning_rate": 1.099045530250463e-05,
"loss": 0.1175,
"step": 771
},
{
"epoch": 0.5223274695534507,
"grad_norm": 2.995699882507324,
"learning_rate": 1.0966947710584086e-05,
"loss": 0.1131,
"step": 772
},
{
"epoch": 0.5230040595399188,
"grad_norm": 2.557124614715576,
"learning_rate": 1.0943434723566624e-05,
"loss": 0.139,
"step": 773
},
{
"epoch": 0.523680649526387,
"grad_norm": 3.192425489425659,
"learning_rate": 1.091991647264325e-05,
"loss": 0.1644,
"step": 774
},
{
"epoch": 0.5243572395128552,
"grad_norm": 5.2245025634765625,
"learning_rate": 1.0896393089034336e-05,
"loss": 0.1449,
"step": 775
},
{
"epoch": 0.5250338294993234,
"grad_norm": 3.094622850418091,
"learning_rate": 1.0872864703988903e-05,
"loss": 0.1066,
"step": 776
},
{
"epoch": 0.5257104194857916,
"grad_norm": 2.3677878379821777,
"learning_rate": 1.0849331448783869e-05,
"loss": 0.1244,
"step": 777
},
{
"epoch": 0.5263870094722598,
"grad_norm": 5.578123569488525,
"learning_rate": 1.0825793454723325e-05,
"loss": 0.1779,
"step": 778
},
{
"epoch": 0.527063599458728,
"grad_norm": 5.073922157287598,
"learning_rate": 1.0802250853137808e-05,
"loss": 0.2355,
"step": 779
},
{
"epoch": 0.5277401894451962,
"grad_norm": 2.925035238265991,
"learning_rate": 1.0778703775383559e-05,
"loss": 0.1346,
"step": 780
},
{
"epoch": 0.5277401894451962,
"eval_accuracy": 0.8119266055045872,
"eval_f1": 0.5287356321839081,
"eval_loss": 0.40138694643974304,
"eval_precision": 0.8394160583941606,
"eval_recall": 0.3859060402684564,
"eval_runtime": 52.7212,
"eval_samples_per_second": 5.652,
"eval_steps_per_second": 0.19,
"step": 780
},
{
"epoch": 0.5284167794316644,
"grad_norm": 4.117710113525391,
"learning_rate": 1.0755152352841798e-05,
"loss": 0.1435,
"step": 781
},
{
"epoch": 0.5290933694181326,
"grad_norm": 4.172342777252197,
"learning_rate": 1.0731596716917978e-05,
"loss": 0.1428,
"step": 782
},
{
"epoch": 0.5297699594046008,
"grad_norm": 2.936666250228882,
"learning_rate": 1.0708036999041072e-05,
"loss": 0.1051,
"step": 783
},
{
"epoch": 0.530446549391069,
"grad_norm": 2.487924098968506,
"learning_rate": 1.0684473330662815e-05,
"loss": 0.0967,
"step": 784
},
{
"epoch": 0.5311231393775372,
"grad_norm": 3.894505739212036,
"learning_rate": 1.0660905843256995e-05,
"loss": 0.1088,
"step": 785
},
{
"epoch": 0.5317997293640054,
"grad_norm": 5.016809463500977,
"learning_rate": 1.0637334668318708e-05,
"loss": 0.1301,
"step": 786
},
{
"epoch": 0.5324763193504736,
"grad_norm": 3.1163365840911865,
"learning_rate": 1.0613759937363617e-05,
"loss": 0.125,
"step": 787
},
{
"epoch": 0.5331529093369418,
"grad_norm": 3.6140894889831543,
"learning_rate": 1.0590181781927229e-05,
"loss": 0.1339,
"step": 788
},
{
"epoch": 0.5338294993234101,
"grad_norm": 4.263391971588135,
"learning_rate": 1.0566600333564163e-05,
"loss": 0.2378,
"step": 789
},
{
"epoch": 0.5345060893098782,
"grad_norm": 5.283838748931885,
"learning_rate": 1.0543015723847402e-05,
"loss": 0.1762,
"step": 790
},
{
"epoch": 0.5351826792963464,
"grad_norm": 4.460554599761963,
"learning_rate": 1.0519428084367583e-05,
"loss": 0.1446,
"step": 791
},
{
"epoch": 0.5358592692828146,
"grad_norm": 3.4405527114868164,
"learning_rate": 1.0495837546732224e-05,
"loss": 0.0647,
"step": 792
},
{
"epoch": 0.5365358592692828,
"grad_norm": 2.5219104290008545,
"learning_rate": 1.0472244242565035e-05,
"loss": 0.0866,
"step": 793
},
{
"epoch": 0.5372124492557511,
"grad_norm": 4.9833221435546875,
"learning_rate": 1.044864830350515e-05,
"loss": 0.1009,
"step": 794
},
{
"epoch": 0.5378890392422192,
"grad_norm": 3.444195032119751,
"learning_rate": 1.042504986120641e-05,
"loss": 0.0941,
"step": 795
},
{
"epoch": 0.5385656292286874,
"grad_norm": 5.0867509841918945,
"learning_rate": 1.0401449047336622e-05,
"loss": 0.1377,
"step": 796
},
{
"epoch": 0.5392422192151556,
"grad_norm": 4.2269606590271,
"learning_rate": 1.0377845993576819e-05,
"loss": 0.1651,
"step": 797
},
{
"epoch": 0.5399188092016238,
"grad_norm": 5.579936504364014,
"learning_rate": 1.0354240831620542e-05,
"loss": 0.2036,
"step": 798
},
{
"epoch": 0.540595399188092,
"grad_norm": 3.619593858718872,
"learning_rate": 1.0330633693173083e-05,
"loss": 0.0519,
"step": 799
},
{
"epoch": 0.5412719891745602,
"grad_norm": 5.691805362701416,
"learning_rate": 1.0307024709950775e-05,
"loss": 0.1691,
"step": 800
},
{
"epoch": 0.5412719891745602,
"eval_accuracy": 0.7926605504587156,
"eval_f1": 0.4321608040201005,
"eval_loss": 0.48544931411743164,
"eval_precision": 0.86,
"eval_recall": 0.28859060402684567,
"eval_runtime": 53.0047,
"eval_samples_per_second": 5.622,
"eval_steps_per_second": 0.189,
"step": 800
},
{
"epoch": 0.5419485791610285,
"grad_norm": 4.048157691955566,
"learning_rate": 1.0283414013680233e-05,
"loss": 0.1629,
"step": 801
},
{
"epoch": 0.5426251691474966,
"grad_norm": 8.180608749389648,
"learning_rate": 1.0259801736097634e-05,
"loss": 0.2779,
"step": 802
},
{
"epoch": 0.5433017591339648,
"grad_norm": 2.2637126445770264,
"learning_rate": 1.023618800894798e-05,
"loss": 0.0888,
"step": 803
},
{
"epoch": 0.543978349120433,
"grad_norm": 3.4222660064697266,
"learning_rate": 1.0212572963984358e-05,
"loss": 0.1072,
"step": 804
},
{
"epoch": 0.5446549391069012,
"grad_norm": 5.149094104766846,
"learning_rate": 1.0188956732967208e-05,
"loss": 0.1834,
"step": 805
},
{
"epoch": 0.5453315290933695,
"grad_norm": 4.928592681884766,
"learning_rate": 1.0165339447663586e-05,
"loss": 0.1065,
"step": 806
},
{
"epoch": 0.5460081190798376,
"grad_norm": 3.3745458126068115,
"learning_rate": 1.0141721239846436e-05,
"loss": 0.1105,
"step": 807
},
{
"epoch": 0.5466847090663058,
"grad_norm": 3.140056848526001,
"learning_rate": 1.0118102241293848e-05,
"loss": 0.1057,
"step": 808
},
{
"epoch": 0.547361299052774,
"grad_norm": 2.9225945472717285,
"learning_rate": 1.0094482583788311e-05,
"loss": 0.1409,
"step": 809
},
{
"epoch": 0.5480378890392422,
"grad_norm": 5.8072333335876465,
"learning_rate": 1.0070862399116016e-05,
"loss": 0.1697,
"step": 810
},
{
"epoch": 0.5487144790257105,
"grad_norm": 2.9896950721740723,
"learning_rate": 1.0047241819066069e-05,
"loss": 0.0893,
"step": 811
},
{
"epoch": 0.5493910690121786,
"grad_norm": 9.982451438903809,
"learning_rate": 1.0023620975429803e-05,
"loss": 0.2119,
"step": 812
},
{
"epoch": 0.5500676589986468,
"grad_norm": 5.057304382324219,
"learning_rate": 1e-05,
"loss": 0.1417,
"step": 813
},
{
"epoch": 0.550744248985115,
"grad_norm": 8.598350524902344,
"learning_rate": 9.976379024570202e-06,
"loss": 0.2122,
"step": 814
},
{
"epoch": 0.5514208389715832,
"grad_norm": 2.8650100231170654,
"learning_rate": 9.952758180933933e-06,
"loss": 0.0901,
"step": 815
},
{
"epoch": 0.5520974289580515,
"grad_norm": 5.409826278686523,
"learning_rate": 9.929137600883986e-06,
"loss": 0.1613,
"step": 816
},
{
"epoch": 0.5527740189445196,
"grad_norm": 2.837448835372925,
"learning_rate": 9.90551741621169e-06,
"loss": 0.0946,
"step": 817
},
{
"epoch": 0.5534506089309879,
"grad_norm": 3.9413063526153564,
"learning_rate": 9.881897758706155e-06,
"loss": 0.1099,
"step": 818
},
{
"epoch": 0.554127198917456,
"grad_norm": 2.284583330154419,
"learning_rate": 9.858278760153567e-06,
"loss": 0.0653,
"step": 819
},
{
"epoch": 0.5548037889039242,
"grad_norm": 6.636195182800293,
"learning_rate": 9.834660552336415e-06,
"loss": 0.1583,
"step": 820
},
{
"epoch": 0.5548037889039242,
"eval_accuracy": 0.7788990825688074,
"eval_f1": 0.3674540682414698,
"eval_loss": 0.47594916820526123,
"eval_precision": 0.8433734939759037,
"eval_recall": 0.2348993288590604,
"eval_runtime": 53.6382,
"eval_samples_per_second": 5.556,
"eval_steps_per_second": 0.186,
"step": 820
},
{
"epoch": 0.5554803788903924,
"grad_norm": 3.6005120277404785,
"learning_rate": 9.811043267032797e-06,
"loss": 0.0887,
"step": 821
},
{
"epoch": 0.5561569688768606,
"grad_norm": 3.6898558139801025,
"learning_rate": 9.787427036015647e-06,
"loss": 0.147,
"step": 822
},
{
"epoch": 0.5568335588633289,
"grad_norm": 6.481770992279053,
"learning_rate": 9.763811991052021e-06,
"loss": 0.1877,
"step": 823
},
{
"epoch": 0.557510148849797,
"grad_norm": 2.4457807540893555,
"learning_rate": 9.74019826390237e-06,
"loss": 0.1004,
"step": 824
},
{
"epoch": 0.5581867388362652,
"grad_norm": 2.3346476554870605,
"learning_rate": 9.716585986319769e-06,
"loss": 0.1199,
"step": 825
},
{
"epoch": 0.5588633288227334,
"grad_norm": 5.753274917602539,
"learning_rate": 9.692975290049228e-06,
"loss": 0.1646,
"step": 826
},
{
"epoch": 0.5595399188092016,
"grad_norm": 4.540411949157715,
"learning_rate": 9.669366306826919e-06,
"loss": 0.1275,
"step": 827
},
{
"epoch": 0.5602165087956699,
"grad_norm": 4.377498149871826,
"learning_rate": 9.645759168379463e-06,
"loss": 0.1508,
"step": 828
},
{
"epoch": 0.560893098782138,
"grad_norm": 4.894872188568115,
"learning_rate": 9.622154006423185e-06,
"loss": 0.1608,
"step": 829
},
{
"epoch": 0.5615696887686062,
"grad_norm": 3.9579596519470215,
"learning_rate": 9.598550952663383e-06,
"loss": 0.0748,
"step": 830
},
{
"epoch": 0.5622462787550744,
"grad_norm": 3.1920807361602783,
"learning_rate": 9.574950138793593e-06,
"loss": 0.0958,
"step": 831
},
{
"epoch": 0.5629228687415426,
"grad_norm": 2.8056745529174805,
"learning_rate": 9.551351696494854e-06,
"loss": 0.1528,
"step": 832
},
{
"epoch": 0.5635994587280109,
"grad_norm": 2.493863105773926,
"learning_rate": 9.527755757434968e-06,
"loss": 0.0693,
"step": 833
},
{
"epoch": 0.564276048714479,
"grad_norm": 3.3314931392669678,
"learning_rate": 9.504162453267776e-06,
"loss": 0.1227,
"step": 834
},
{
"epoch": 0.5649526387009473,
"grad_norm": 3.9033989906311035,
"learning_rate": 9.480571915632422e-06,
"loss": 0.1199,
"step": 835
},
{
"epoch": 0.5656292286874154,
"grad_norm": 3.6395678520202637,
"learning_rate": 9.456984276152598e-06,
"loss": 0.1057,
"step": 836
},
{
"epoch": 0.5663058186738836,
"grad_norm": 6.916732311248779,
"learning_rate": 9.43339966643584e-06,
"loss": 0.1741,
"step": 837
},
{
"epoch": 0.5669824086603519,
"grad_norm": 3.8561432361602783,
"learning_rate": 9.409818218072774e-06,
"loss": 0.1654,
"step": 838
},
{
"epoch": 0.56765899864682,
"grad_norm": 3.962113380432129,
"learning_rate": 9.386240062636388e-06,
"loss": 0.1459,
"step": 839
},
{
"epoch": 0.5683355886332883,
"grad_norm": 2.5661449432373047,
"learning_rate": 9.362665331681294e-06,
"loss": 0.1363,
"step": 840
},
{
"epoch": 0.5683355886332883,
"eval_accuracy": 0.8009174311926606,
"eval_f1": 0.4668304668304668,
"eval_loss": 0.43011632561683655,
"eval_precision": 0.8715596330275229,
"eval_recall": 0.3187919463087248,
"eval_runtime": 54.0976,
"eval_samples_per_second": 5.509,
"eval_steps_per_second": 0.185,
"step": 840
},
{
"epoch": 0.5690121786197564,
"grad_norm": 3.792685031890869,
"learning_rate": 9.339094156743007e-06,
"loss": 0.1408,
"step": 841
},
{
"epoch": 0.5696887686062246,
"grad_norm": 4.538841247558594,
"learning_rate": 9.315526669337189e-06,
"loss": 0.1399,
"step": 842
},
{
"epoch": 0.5703653585926928,
"grad_norm": 5.182969570159912,
"learning_rate": 9.291963000958932e-06,
"loss": 0.1753,
"step": 843
},
{
"epoch": 0.571041948579161,
"grad_norm": 3.905219554901123,
"learning_rate": 9.268403283082025e-06,
"loss": 0.143,
"step": 844
},
{
"epoch": 0.5717185385656293,
"grad_norm": 3.7634634971618652,
"learning_rate": 9.244847647158203e-06,
"loss": 0.1469,
"step": 845
},
{
"epoch": 0.5723951285520974,
"grad_norm": 3.5530450344085693,
"learning_rate": 9.221296224616443e-06,
"loss": 0.1334,
"step": 846
},
{
"epoch": 0.5730717185385656,
"grad_norm": 6.1246161460876465,
"learning_rate": 9.197749146862193e-06,
"loss": 0.1216,
"step": 847
},
{
"epoch": 0.5737483085250338,
"grad_norm": 7.231658458709717,
"learning_rate": 9.174206545276678e-06,
"loss": 0.2128,
"step": 848
},
{
"epoch": 0.574424898511502,
"grad_norm": 6.129051685333252,
"learning_rate": 9.150668551216134e-06,
"loss": 0.2178,
"step": 849
},
{
"epoch": 0.5751014884979703,
"grad_norm": 4.892454624176025,
"learning_rate": 9.127135296011102e-06,
"loss": 0.1496,
"step": 850
},
{
"epoch": 0.5757780784844384,
"grad_norm": 6.8777337074279785,
"learning_rate": 9.103606910965666e-06,
"loss": 0.2008,
"step": 851
},
{
"epoch": 0.5764546684709067,
"grad_norm": 3.538118600845337,
"learning_rate": 9.080083527356755e-06,
"loss": 0.1232,
"step": 852
},
{
"epoch": 0.5771312584573748,
"grad_norm": 5.2440080642700195,
"learning_rate": 9.056565276433378e-06,
"loss": 0.1973,
"step": 853
},
{
"epoch": 0.577807848443843,
"grad_norm": 2.282479763031006,
"learning_rate": 9.033052289415914e-06,
"loss": 0.0696,
"step": 854
},
{
"epoch": 0.5784844384303113,
"grad_norm": 3.643191337585449,
"learning_rate": 9.009544697495373e-06,
"loss": 0.1378,
"step": 855
},
{
"epoch": 0.5791610284167794,
"grad_norm": 3.0240986347198486,
"learning_rate": 8.986042631832656e-06,
"loss": 0.1579,
"step": 856
},
{
"epoch": 0.5798376184032477,
"grad_norm": 3.921018362045288,
"learning_rate": 8.962546223557838e-06,
"loss": 0.1194,
"step": 857
},
{
"epoch": 0.5805142083897158,
"grad_norm": 3.4192543029785156,
"learning_rate": 8.93905560376942e-06,
"loss": 0.1817,
"step": 858
},
{
"epoch": 0.581190798376184,
"grad_norm": 4.1514363288879395,
"learning_rate": 8.915570903533615e-06,
"loss": 0.1489,
"step": 859
},
{
"epoch": 0.5818673883626523,
"grad_norm": 3.885377883911133,
"learning_rate": 8.892092253883602e-06,
"loss": 0.1456,
"step": 860
},
{
"epoch": 0.5818673883626523,
"eval_accuracy": 0.7926605504587156,
"eval_f1": 0.42346938775510207,
"eval_loss": 0.4394099712371826,
"eval_precision": 0.8829787234042553,
"eval_recall": 0.2785234899328859,
"eval_runtime": 53.3745,
"eval_samples_per_second": 5.583,
"eval_steps_per_second": 0.187,
"step": 860
},
{
"epoch": 0.5825439783491204,
"grad_norm": 3.6462066173553467,
"learning_rate": 8.8686197858188e-06,
"loss": 0.1782,
"step": 861
},
{
"epoch": 0.5832205683355887,
"grad_norm": 3.2633800506591797,
"learning_rate": 8.84515363030414e-06,
"loss": 0.1656,
"step": 862
},
{
"epoch": 0.5838971583220568,
"grad_norm": 5.255461692810059,
"learning_rate": 8.821693918269334e-06,
"loss": 0.1306,
"step": 863
},
{
"epoch": 0.584573748308525,
"grad_norm": 4.525811672210693,
"learning_rate": 8.798240780608143e-06,
"loss": 0.1684,
"step": 864
},
{
"epoch": 0.5852503382949933,
"grad_norm": 2.788585901260376,
"learning_rate": 8.774794348177641e-06,
"loss": 0.1456,
"step": 865
},
{
"epoch": 0.5859269282814614,
"grad_norm": 3.1500301361083984,
"learning_rate": 8.751354751797492e-06,
"loss": 0.1347,
"step": 866
},
{
"epoch": 0.5866035182679297,
"grad_norm": 3.487180471420288,
"learning_rate": 8.727922122249221e-06,
"loss": 0.1393,
"step": 867
},
{
"epoch": 0.5872801082543978,
"grad_norm": 2.1133573055267334,
"learning_rate": 8.704496590275479e-06,
"loss": 0.0814,
"step": 868
},
{
"epoch": 0.587956698240866,
"grad_norm": 3.227505922317505,
"learning_rate": 8.68107828657931e-06,
"loss": 0.1104,
"step": 869
},
{
"epoch": 0.5886332882273342,
"grad_norm": 2.8195204734802246,
"learning_rate": 8.657667341823449e-06,
"loss": 0.1073,
"step": 870
},
{
"epoch": 0.5893098782138024,
"grad_norm": 6.85077428817749,
"learning_rate": 8.63426388662954e-06,
"loss": 0.2117,
"step": 871
},
{
"epoch": 0.5899864682002707,
"grad_norm": 3.027806043624878,
"learning_rate": 8.61086805157747e-06,
"loss": 0.13,
"step": 872
},
{
"epoch": 0.5906630581867388,
"grad_norm": 3.608955144882202,
"learning_rate": 8.587479967204584e-06,
"loss": 0.1323,
"step": 873
},
{
"epoch": 0.591339648173207,
"grad_norm": 3.6784167289733887,
"learning_rate": 8.564099764004998e-06,
"loss": 0.1205,
"step": 874
},
{
"epoch": 0.5920162381596752,
"grad_norm": 3.6753430366516113,
"learning_rate": 8.540727572428854e-06,
"loss": 0.1728,
"step": 875
},
{
"epoch": 0.5926928281461434,
"grad_norm": 3.4869165420532227,
"learning_rate": 8.51736352288158e-06,
"loss": 0.1363,
"step": 876
},
{
"epoch": 0.5933694181326117,
"grad_norm": 6.327773571014404,
"learning_rate": 8.494007745723197e-06,
"loss": 0.1723,
"step": 877
},
{
"epoch": 0.5940460081190798,
"grad_norm": 4.366674423217773,
"learning_rate": 8.47066037126754e-06,
"loss": 0.1557,
"step": 878
},
{
"epoch": 0.5947225981055481,
"grad_norm": 3.28305721282959,
"learning_rate": 8.447321529781597e-06,
"loss": 0.1253,
"step": 879
},
{
"epoch": 0.5953991880920162,
"grad_norm": 3.0015041828155518,
"learning_rate": 8.423991351484715e-06,
"loss": 0.1318,
"step": 880
},
{
"epoch": 0.5953991880920162,
"eval_accuracy": 0.8192660550458716,
"eval_f1": 0.5553047404063205,
"eval_loss": 0.3900049328804016,
"eval_precision": 0.8482758620689655,
"eval_recall": 0.412751677852349,
"eval_runtime": 53.5058,
"eval_samples_per_second": 5.569,
"eval_steps_per_second": 0.187,
"step": 880
},
{
"epoch": 0.5960757780784844,
"grad_norm": 2.9270071983337402,
"learning_rate": 8.400669966547925e-06,
"loss": 0.1256,
"step": 881
},
{
"epoch": 0.5967523680649527,
"grad_norm": 2.6574175357818604,
"learning_rate": 8.377357505093183e-06,
"loss": 0.0761,
"step": 882
},
{
"epoch": 0.5974289580514208,
"grad_norm": 3.648263692855835,
"learning_rate": 8.35405409719266e-06,
"loss": 0.124,
"step": 883
},
{
"epoch": 0.5981055480378891,
"grad_norm": 4.690035820007324,
"learning_rate": 8.330759872868022e-06,
"loss": 0.182,
"step": 884
},
{
"epoch": 0.5987821380243572,
"grad_norm": 3.0360960960388184,
"learning_rate": 8.307474962089676e-06,
"loss": 0.1437,
"step": 885
},
{
"epoch": 0.5994587280108254,
"grad_norm": 3.8773977756500244,
"learning_rate": 8.284199494776083e-06,
"loss": 0.0975,
"step": 886
},
{
"epoch": 0.6001353179972937,
"grad_norm": 3.7407238483428955,
"learning_rate": 8.260933600793003e-06,
"loss": 0.1422,
"step": 887
},
{
"epoch": 0.6008119079837618,
"grad_norm": 4.789558410644531,
"learning_rate": 8.237677409952784e-06,
"loss": 0.1737,
"step": 888
},
{
"epoch": 0.6014884979702301,
"grad_norm": 5.8444929122924805,
"learning_rate": 8.214431052013636e-06,
"loss": 0.1658,
"step": 889
},
{
"epoch": 0.6021650879566982,
"grad_norm": 2.8387224674224854,
"learning_rate": 8.191194656678905e-06,
"loss": 0.1317,
"step": 890
},
{
"epoch": 0.6028416779431665,
"grad_norm": 3.5490684509277344,
"learning_rate": 8.16796835359635e-06,
"loss": 0.0796,
"step": 891
},
{
"epoch": 0.6035182679296346,
"grad_norm": 3.600038766860962,
"learning_rate": 8.144752272357424e-06,
"loss": 0.1059,
"step": 892
},
{
"epoch": 0.6041948579161028,
"grad_norm": 6.19486665725708,
"learning_rate": 8.12154654249654e-06,
"loss": 0.2211,
"step": 893
},
{
"epoch": 0.6048714479025711,
"grad_norm": 3.217571973800659,
"learning_rate": 8.098351293490365e-06,
"loss": 0.0893,
"step": 894
},
{
"epoch": 0.6055480378890392,
"grad_norm": 3.447753667831421,
"learning_rate": 8.07516665475708e-06,
"loss": 0.1373,
"step": 895
},
{
"epoch": 0.6062246278755075,
"grad_norm": 4.001631259918213,
"learning_rate": 8.051992755655672e-06,
"loss": 0.1635,
"step": 896
},
{
"epoch": 0.6069012178619756,
"grad_norm": 3.870436191558838,
"learning_rate": 8.0288297254852e-06,
"loss": 0.1659,
"step": 897
},
{
"epoch": 0.6075778078484438,
"grad_norm": 7.8299479484558105,
"learning_rate": 8.005677693484077e-06,
"loss": 0.2432,
"step": 898
},
{
"epoch": 0.6082543978349121,
"grad_norm": 2.8993029594421387,
"learning_rate": 7.98253678882937e-06,
"loss": 0.0963,
"step": 899
},
{
"epoch": 0.6089309878213802,
"grad_norm": 3.3452529907226562,
"learning_rate": 7.959407140636034e-06,
"loss": 0.1336,
"step": 900
},
{
"epoch": 0.6089309878213802,
"eval_accuracy": 0.8,
"eval_f1": 0.47342995169082125,
"eval_loss": 0.42485949397087097,
"eval_precision": 0.8448275862068966,
"eval_recall": 0.3288590604026846,
"eval_runtime": 53.6449,
"eval_samples_per_second": 5.555,
"eval_steps_per_second": 0.186,
"step": 900
}
],
"logging_steps": 1,
"max_steps": 1478,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.732359159822418e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}