reasoning-segmentation-model-v0 / trainer_state.json
appier-rey's picture
Upload folder using huggingface_hub
9fe6b0b verified
{
"best_global_step": 1220,
"best_metric": 0.9425042335887034,
"best_model_checkpoint": "step_model_bert_large/checkpoint-1220",
"epoch": 9.922448979591836,
"eval_steps": 500,
"global_step": 1220,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00816326530612245,
"grad_norm": 20.771753311157227,
"learning_rate": 1.6000000000000001e-06,
"loss": 1.1365,
"step": 1
},
{
"epoch": 0.0163265306122449,
"grad_norm": 70.12383270263672,
"learning_rate": 3.2000000000000003e-06,
"loss": 1.051,
"step": 2
},
{
"epoch": 0.024489795918367346,
"grad_norm": 31.964258193969727,
"learning_rate": 4.800000000000001e-06,
"loss": 1.1296,
"step": 3
},
{
"epoch": 0.0326530612244898,
"grad_norm": 331.50640869140625,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.8467,
"step": 4
},
{
"epoch": 0.04081632653061224,
"grad_norm": 42.2370491027832,
"learning_rate": 8.000000000000001e-06,
"loss": 0.7934,
"step": 5
},
{
"epoch": 0.04897959183673469,
"grad_norm": 47.351070404052734,
"learning_rate": 9.600000000000001e-06,
"loss": 0.577,
"step": 6
},
{
"epoch": 0.05714285714285714,
"grad_norm": 18.631610870361328,
"learning_rate": 1.1200000000000001e-05,
"loss": 0.5044,
"step": 7
},
{
"epoch": 0.0653061224489796,
"grad_norm": 20.154150009155273,
"learning_rate": 1.2800000000000001e-05,
"loss": 0.4216,
"step": 8
},
{
"epoch": 0.07346938775510205,
"grad_norm": 12.097977638244629,
"learning_rate": 1.4400000000000001e-05,
"loss": 0.4446,
"step": 9
},
{
"epoch": 0.08163265306122448,
"grad_norm": 9.474892616271973,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.2463,
"step": 10
},
{
"epoch": 0.08979591836734693,
"grad_norm": 7.300495624542236,
"learning_rate": 1.76e-05,
"loss": 0.289,
"step": 11
},
{
"epoch": 0.09795918367346938,
"grad_norm": 7.943906307220459,
"learning_rate": 1.9200000000000003e-05,
"loss": 0.2522,
"step": 12
},
{
"epoch": 0.10612244897959183,
"grad_norm": 2.966740846633911,
"learning_rate": 2.08e-05,
"loss": 0.1157,
"step": 13
},
{
"epoch": 0.11428571428571428,
"grad_norm": 6.358625888824463,
"learning_rate": 2.2400000000000002e-05,
"loss": 0.2464,
"step": 14
},
{
"epoch": 0.12244897959183673,
"grad_norm": 2.499382972717285,
"learning_rate": 2.4e-05,
"loss": 0.1134,
"step": 15
},
{
"epoch": 0.1306122448979592,
"grad_norm": 3.328634023666382,
"learning_rate": 2.5600000000000002e-05,
"loss": 0.1879,
"step": 16
},
{
"epoch": 0.13877551020408163,
"grad_norm": 13.66771125793457,
"learning_rate": 2.7200000000000004e-05,
"loss": 0.2889,
"step": 17
},
{
"epoch": 0.1469387755102041,
"grad_norm": 11.846375465393066,
"learning_rate": 2.8800000000000002e-05,
"loss": 0.3625,
"step": 18
},
{
"epoch": 0.15510204081632653,
"grad_norm": 9.650957107543945,
"learning_rate": 3.0400000000000004e-05,
"loss": 0.1811,
"step": 19
},
{
"epoch": 0.16326530612244897,
"grad_norm": 5.533182621002197,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.1549,
"step": 20
},
{
"epoch": 0.17142857142857143,
"grad_norm": 6.746736526489258,
"learning_rate": 3.3600000000000004e-05,
"loss": 0.2285,
"step": 21
},
{
"epoch": 0.17959183673469387,
"grad_norm": 3.802558660507202,
"learning_rate": 3.52e-05,
"loss": 0.1377,
"step": 22
},
{
"epoch": 0.18775510204081633,
"grad_norm": 6.560689926147461,
"learning_rate": 3.680000000000001e-05,
"loss": 0.1661,
"step": 23
},
{
"epoch": 0.19591836734693877,
"grad_norm": 4.309365272521973,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.1222,
"step": 24
},
{
"epoch": 0.20408163265306123,
"grad_norm": 2.247054100036621,
"learning_rate": 4e-05,
"loss": 0.1269,
"step": 25
},
{
"epoch": 0.21224489795918366,
"grad_norm": 8.494240760803223,
"learning_rate": 4.16e-05,
"loss": 0.2676,
"step": 26
},
{
"epoch": 0.22040816326530613,
"grad_norm": 2.9743239879608154,
"learning_rate": 4.3200000000000007e-05,
"loss": 0.1479,
"step": 27
},
{
"epoch": 0.22857142857142856,
"grad_norm": 2.945835590362549,
"learning_rate": 4.4800000000000005e-05,
"loss": 0.1216,
"step": 28
},
{
"epoch": 0.23673469387755103,
"grad_norm": 10.777807235717773,
"learning_rate": 4.64e-05,
"loss": 0.3272,
"step": 29
},
{
"epoch": 0.24489795918367346,
"grad_norm": 9.199320793151855,
"learning_rate": 4.8e-05,
"loss": 0.2623,
"step": 30
},
{
"epoch": 0.2530612244897959,
"grad_norm": 3.7975552082061768,
"learning_rate": 4.9600000000000006e-05,
"loss": 0.1542,
"step": 31
},
{
"epoch": 0.2612244897959184,
"grad_norm": 4.61702823638916,
"learning_rate": 5.1200000000000004e-05,
"loss": 0.1828,
"step": 32
},
{
"epoch": 0.2693877551020408,
"grad_norm": 2.2006444931030273,
"learning_rate": 5.280000000000001e-05,
"loss": 0.1663,
"step": 33
},
{
"epoch": 0.27755102040816326,
"grad_norm": 3.2792046070098877,
"learning_rate": 5.440000000000001e-05,
"loss": 0.1296,
"step": 34
},
{
"epoch": 0.2857142857142857,
"grad_norm": 2.931986093521118,
"learning_rate": 5.6e-05,
"loss": 0.1744,
"step": 35
},
{
"epoch": 0.2938775510204082,
"grad_norm": 1.6390964984893799,
"learning_rate": 5.7600000000000004e-05,
"loss": 0.129,
"step": 36
},
{
"epoch": 0.3020408163265306,
"grad_norm": 1.0177584886550903,
"learning_rate": 5.92e-05,
"loss": 0.0843,
"step": 37
},
{
"epoch": 0.31020408163265306,
"grad_norm": 1.9508525133132935,
"learning_rate": 6.080000000000001e-05,
"loss": 0.0753,
"step": 38
},
{
"epoch": 0.3183673469387755,
"grad_norm": 2.6601903438568115,
"learning_rate": 6.240000000000001e-05,
"loss": 0.1817,
"step": 39
},
{
"epoch": 0.32653061224489793,
"grad_norm": 3.9204416275024414,
"learning_rate": 6.400000000000001e-05,
"loss": 0.1773,
"step": 40
},
{
"epoch": 0.3346938775510204,
"grad_norm": 2.708138942718506,
"learning_rate": 6.56e-05,
"loss": 0.1899,
"step": 41
},
{
"epoch": 0.34285714285714286,
"grad_norm": 2.626298666000366,
"learning_rate": 6.720000000000001e-05,
"loss": 0.1134,
"step": 42
},
{
"epoch": 0.3510204081632653,
"grad_norm": 0.8519235253334045,
"learning_rate": 6.88e-05,
"loss": 0.1421,
"step": 43
},
{
"epoch": 0.35918367346938773,
"grad_norm": 1.024389624595642,
"learning_rate": 7.04e-05,
"loss": 0.1215,
"step": 44
},
{
"epoch": 0.3673469387755102,
"grad_norm": 4.064274311065674,
"learning_rate": 7.2e-05,
"loss": 0.1537,
"step": 45
},
{
"epoch": 0.37551020408163266,
"grad_norm": 6.495007514953613,
"learning_rate": 7.360000000000001e-05,
"loss": 0.2303,
"step": 46
},
{
"epoch": 0.3836734693877551,
"grad_norm": 1.5005526542663574,
"learning_rate": 7.52e-05,
"loss": 0.1672,
"step": 47
},
{
"epoch": 0.39183673469387753,
"grad_norm": 2.671656370162964,
"learning_rate": 7.680000000000001e-05,
"loss": 0.1365,
"step": 48
},
{
"epoch": 0.4,
"grad_norm": 1.1992692947387695,
"learning_rate": 7.840000000000001e-05,
"loss": 0.1168,
"step": 49
},
{
"epoch": 0.40816326530612246,
"grad_norm": 0.7369860410690308,
"learning_rate": 8e-05,
"loss": 0.1121,
"step": 50
},
{
"epoch": 0.4163265306122449,
"grad_norm": 3.4734129905700684,
"learning_rate": 7.993162393162394e-05,
"loss": 0.1931,
"step": 51
},
{
"epoch": 0.42448979591836733,
"grad_norm": 1.9376534223556519,
"learning_rate": 7.986324786324788e-05,
"loss": 0.2017,
"step": 52
},
{
"epoch": 0.4326530612244898,
"grad_norm": 0.9877994656562805,
"learning_rate": 7.97948717948718e-05,
"loss": 0.088,
"step": 53
},
{
"epoch": 0.44081632653061226,
"grad_norm": 2.119616746902466,
"learning_rate": 7.972649572649573e-05,
"loss": 0.1099,
"step": 54
},
{
"epoch": 0.4489795918367347,
"grad_norm": 1.2248163223266602,
"learning_rate": 7.965811965811966e-05,
"loss": 0.1335,
"step": 55
},
{
"epoch": 0.45714285714285713,
"grad_norm": 0.7621262669563293,
"learning_rate": 7.95897435897436e-05,
"loss": 0.071,
"step": 56
},
{
"epoch": 0.46530612244897956,
"grad_norm": 3.021927833557129,
"learning_rate": 7.952136752136753e-05,
"loss": 0.1307,
"step": 57
},
{
"epoch": 0.47346938775510206,
"grad_norm": 0.6796000599861145,
"learning_rate": 7.945299145299147e-05,
"loss": 0.0827,
"step": 58
},
{
"epoch": 0.4816326530612245,
"grad_norm": 1.4021790027618408,
"learning_rate": 7.938461538461539e-05,
"loss": 0.1144,
"step": 59
},
{
"epoch": 0.4897959183673469,
"grad_norm": 1.6122393608093262,
"learning_rate": 7.931623931623932e-05,
"loss": 0.0864,
"step": 60
},
{
"epoch": 0.49795918367346936,
"grad_norm": 1.5375205278396606,
"learning_rate": 7.924786324786326e-05,
"loss": 0.0939,
"step": 61
},
{
"epoch": 0.5061224489795918,
"grad_norm": 0.8782357573509216,
"learning_rate": 7.917948717948719e-05,
"loss": 0.0918,
"step": 62
},
{
"epoch": 0.5142857142857142,
"grad_norm": 2.1417829990386963,
"learning_rate": 7.911111111111112e-05,
"loss": 0.0922,
"step": 63
},
{
"epoch": 0.5224489795918368,
"grad_norm": 0.5513861179351807,
"learning_rate": 7.904273504273506e-05,
"loss": 0.081,
"step": 64
},
{
"epoch": 0.5306122448979592,
"grad_norm": 0.6667738556861877,
"learning_rate": 7.897435897435898e-05,
"loss": 0.0451,
"step": 65
},
{
"epoch": 0.5387755102040817,
"grad_norm": 1.489629864692688,
"learning_rate": 7.890598290598291e-05,
"loss": 0.0761,
"step": 66
},
{
"epoch": 0.5469387755102041,
"grad_norm": 2.571826457977295,
"learning_rate": 7.883760683760685e-05,
"loss": 0.115,
"step": 67
},
{
"epoch": 0.5551020408163265,
"grad_norm": 0.7235465049743652,
"learning_rate": 7.876923076923078e-05,
"loss": 0.0794,
"step": 68
},
{
"epoch": 0.563265306122449,
"grad_norm": 0.9554158449172974,
"learning_rate": 7.87008547008547e-05,
"loss": 0.0733,
"step": 69
},
{
"epoch": 0.5714285714285714,
"grad_norm": 5.222618103027344,
"learning_rate": 7.863247863247864e-05,
"loss": 0.171,
"step": 70
},
{
"epoch": 0.5795918367346938,
"grad_norm": 0.9777473211288452,
"learning_rate": 7.856410256410257e-05,
"loss": 0.0922,
"step": 71
},
{
"epoch": 0.5877551020408164,
"grad_norm": 0.7969712615013123,
"learning_rate": 7.849572649572649e-05,
"loss": 0.0741,
"step": 72
},
{
"epoch": 0.5959183673469388,
"grad_norm": 1.5794110298156738,
"learning_rate": 7.842735042735043e-05,
"loss": 0.1096,
"step": 73
},
{
"epoch": 0.6040816326530613,
"grad_norm": 0.6841784119606018,
"learning_rate": 7.835897435897436e-05,
"loss": 0.0809,
"step": 74
},
{
"epoch": 0.6122448979591837,
"grad_norm": 0.4617042541503906,
"learning_rate": 7.82905982905983e-05,
"loss": 0.0678,
"step": 75
},
{
"epoch": 0.6204081632653061,
"grad_norm": 5.837403297424316,
"learning_rate": 7.822222222222223e-05,
"loss": 0.1176,
"step": 76
},
{
"epoch": 0.6285714285714286,
"grad_norm": 4.160293102264404,
"learning_rate": 7.815384615384616e-05,
"loss": 0.2872,
"step": 77
},
{
"epoch": 0.636734693877551,
"grad_norm": 2.905837059020996,
"learning_rate": 7.808547008547008e-05,
"loss": 0.1861,
"step": 78
},
{
"epoch": 0.6448979591836734,
"grad_norm": 1.3452632427215576,
"learning_rate": 7.801709401709402e-05,
"loss": 0.1925,
"step": 79
},
{
"epoch": 0.6530612244897959,
"grad_norm": 0.8246756792068481,
"learning_rate": 7.794871794871795e-05,
"loss": 0.0676,
"step": 80
},
{
"epoch": 0.6612244897959184,
"grad_norm": 2.3986732959747314,
"learning_rate": 7.788034188034189e-05,
"loss": 0.1415,
"step": 81
},
{
"epoch": 0.6693877551020408,
"grad_norm": 2.3531203269958496,
"learning_rate": 7.781196581196582e-05,
"loss": 0.1225,
"step": 82
},
{
"epoch": 0.6775510204081633,
"grad_norm": 1.0808194875717163,
"learning_rate": 7.774358974358976e-05,
"loss": 0.1161,
"step": 83
},
{
"epoch": 0.6857142857142857,
"grad_norm": 2.469728708267212,
"learning_rate": 7.767521367521368e-05,
"loss": 0.2704,
"step": 84
},
{
"epoch": 0.6938775510204082,
"grad_norm": 1.8167622089385986,
"learning_rate": 7.760683760683761e-05,
"loss": 0.1206,
"step": 85
},
{
"epoch": 0.7020408163265306,
"grad_norm": 0.6328538656234741,
"learning_rate": 7.753846153846154e-05,
"loss": 0.1357,
"step": 86
},
{
"epoch": 0.710204081632653,
"grad_norm": 1.4388840198516846,
"learning_rate": 7.747008547008548e-05,
"loss": 0.1348,
"step": 87
},
{
"epoch": 0.7183673469387755,
"grad_norm": 0.9719085097312927,
"learning_rate": 7.740170940170941e-05,
"loss": 0.1777,
"step": 88
},
{
"epoch": 0.726530612244898,
"grad_norm": 0.39901578426361084,
"learning_rate": 7.733333333333333e-05,
"loss": 0.0822,
"step": 89
},
{
"epoch": 0.7346938775510204,
"grad_norm": 1.6550663709640503,
"learning_rate": 7.726495726495727e-05,
"loss": 0.1101,
"step": 90
},
{
"epoch": 0.7428571428571429,
"grad_norm": 0.5123804211616516,
"learning_rate": 7.71965811965812e-05,
"loss": 0.0794,
"step": 91
},
{
"epoch": 0.7510204081632653,
"grad_norm": 0.5998639464378357,
"learning_rate": 7.712820512820514e-05,
"loss": 0.1104,
"step": 92
},
{
"epoch": 0.7591836734693878,
"grad_norm": 0.5731858611106873,
"learning_rate": 7.705982905982907e-05,
"loss": 0.148,
"step": 93
},
{
"epoch": 0.7673469387755102,
"grad_norm": 1.3274452686309814,
"learning_rate": 7.6991452991453e-05,
"loss": 0.1201,
"step": 94
},
{
"epoch": 0.7755102040816326,
"grad_norm": 0.5123000741004944,
"learning_rate": 7.692307692307693e-05,
"loss": 0.0675,
"step": 95
},
{
"epoch": 0.7836734693877551,
"grad_norm": 0.558822751045227,
"learning_rate": 7.685470085470086e-05,
"loss": 0.0394,
"step": 96
},
{
"epoch": 0.7918367346938775,
"grad_norm": 0.893507719039917,
"learning_rate": 7.67863247863248e-05,
"loss": 0.1375,
"step": 97
},
{
"epoch": 0.8,
"grad_norm": 0.26979655027389526,
"learning_rate": 7.671794871794873e-05,
"loss": 0.0371,
"step": 98
},
{
"epoch": 0.8081632653061225,
"grad_norm": 1.924552321434021,
"learning_rate": 7.664957264957266e-05,
"loss": 0.0691,
"step": 99
},
{
"epoch": 0.8163265306122449,
"grad_norm": 1.3021774291992188,
"learning_rate": 7.65811965811966e-05,
"loss": 0.1223,
"step": 100
},
{
"epoch": 0.8244897959183674,
"grad_norm": 1.297431230545044,
"learning_rate": 7.651282051282052e-05,
"loss": 0.0751,
"step": 101
},
{
"epoch": 0.8326530612244898,
"grad_norm": 0.5344750881195068,
"learning_rate": 7.644444444444445e-05,
"loss": 0.0973,
"step": 102
},
{
"epoch": 0.8408163265306122,
"grad_norm": 2.1697866916656494,
"learning_rate": 7.637606837606839e-05,
"loss": 0.1845,
"step": 103
},
{
"epoch": 0.8489795918367347,
"grad_norm": 1.7886005640029907,
"learning_rate": 7.630769230769232e-05,
"loss": 0.1352,
"step": 104
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.9444314241409302,
"learning_rate": 7.623931623931624e-05,
"loss": 0.1281,
"step": 105
},
{
"epoch": 0.8653061224489796,
"grad_norm": 1.9579631090164185,
"learning_rate": 7.617094017094018e-05,
"loss": 0.1374,
"step": 106
},
{
"epoch": 0.8734693877551021,
"grad_norm": 0.9300099015235901,
"learning_rate": 7.610256410256411e-05,
"loss": 0.0889,
"step": 107
},
{
"epoch": 0.8816326530612245,
"grad_norm": 0.27003607153892517,
"learning_rate": 7.603418803418803e-05,
"loss": 0.0428,
"step": 108
},
{
"epoch": 0.889795918367347,
"grad_norm": 0.3849831819534302,
"learning_rate": 7.596581196581196e-05,
"loss": 0.063,
"step": 109
},
{
"epoch": 0.8979591836734694,
"grad_norm": 0.7666690945625305,
"learning_rate": 7.58974358974359e-05,
"loss": 0.0481,
"step": 110
},
{
"epoch": 0.9061224489795918,
"grad_norm": 0.4851779639720917,
"learning_rate": 7.582905982905983e-05,
"loss": 0.0303,
"step": 111
},
{
"epoch": 0.9142857142857143,
"grad_norm": 0.21981802582740784,
"learning_rate": 7.576068376068377e-05,
"loss": 0.0263,
"step": 112
},
{
"epoch": 0.9224489795918367,
"grad_norm": 0.8145710229873657,
"learning_rate": 7.56923076923077e-05,
"loss": 0.0995,
"step": 113
},
{
"epoch": 0.9306122448979591,
"grad_norm": 0.6933770179748535,
"learning_rate": 7.562393162393162e-05,
"loss": 0.1012,
"step": 114
},
{
"epoch": 0.9387755102040817,
"grad_norm": 1.0062135457992554,
"learning_rate": 7.555555555555556e-05,
"loss": 0.1136,
"step": 115
},
{
"epoch": 0.9469387755102041,
"grad_norm": 0.7434647083282471,
"learning_rate": 7.548717948717949e-05,
"loss": 0.0967,
"step": 116
},
{
"epoch": 0.9551020408163265,
"grad_norm": 0.5385677218437195,
"learning_rate": 7.541880341880342e-05,
"loss": 0.0536,
"step": 117
},
{
"epoch": 0.963265306122449,
"grad_norm": 1.739890694618225,
"learning_rate": 7.535042735042736e-05,
"loss": 0.2595,
"step": 118
},
{
"epoch": 0.9714285714285714,
"grad_norm": 1.1082196235656738,
"learning_rate": 7.52820512820513e-05,
"loss": 0.2055,
"step": 119
},
{
"epoch": 0.9795918367346939,
"grad_norm": 0.943108320236206,
"learning_rate": 7.521367521367521e-05,
"loss": 0.0494,
"step": 120
},
{
"epoch": 0.9877551020408163,
"grad_norm": 1.5185785293579102,
"learning_rate": 7.514529914529915e-05,
"loss": 0.0991,
"step": 121
},
{
"epoch": 0.9959183673469387,
"grad_norm": 0.9091699123382568,
"learning_rate": 7.507692307692308e-05,
"loss": 0.1214,
"step": 122
},
{
"epoch": 1.0,
"grad_norm": 1.1665633916854858,
"learning_rate": 7.500854700854702e-05,
"loss": 0.0315,
"step": 123
},
{
"epoch": 1.0,
"eval_accuracy": 0.926479222388936,
"eval_f1": 0.926986696070817,
"eval_loss": 0.1320565938949585,
"eval_precision": 0.9281119315465131,
"eval_recall": 0.926479222388936,
"eval_runtime": 27.6732,
"eval_samples_per_second": 15.755,
"eval_steps_per_second": 15.755,
"step": 123
},
{
"epoch": 1.0081632653061225,
"grad_norm": 0.6033539175987244,
"learning_rate": 7.494017094017095e-05,
"loss": 0.0929,
"step": 124
},
{
"epoch": 1.0163265306122449,
"grad_norm": 6.349571704864502,
"learning_rate": 7.487179487179487e-05,
"loss": 0.2896,
"step": 125
},
{
"epoch": 1.0244897959183674,
"grad_norm": 0.9741121530532837,
"learning_rate": 7.48034188034188e-05,
"loss": 0.0812,
"step": 126
},
{
"epoch": 1.0326530612244897,
"grad_norm": 0.3600277304649353,
"learning_rate": 7.473504273504274e-05,
"loss": 0.072,
"step": 127
},
{
"epoch": 1.0408163265306123,
"grad_norm": 0.32495835423469543,
"learning_rate": 7.466666666666667e-05,
"loss": 0.0385,
"step": 128
},
{
"epoch": 1.0489795918367346,
"grad_norm": 0.8278759121894836,
"learning_rate": 7.459829059829061e-05,
"loss": 0.143,
"step": 129
},
{
"epoch": 1.0571428571428572,
"grad_norm": 0.7462875247001648,
"learning_rate": 7.452991452991454e-05,
"loss": 0.1005,
"step": 130
},
{
"epoch": 1.0653061224489795,
"grad_norm": 1.0437004566192627,
"learning_rate": 7.446153846153846e-05,
"loss": 0.0937,
"step": 131
},
{
"epoch": 1.073469387755102,
"grad_norm": 0.7965916991233826,
"learning_rate": 7.43931623931624e-05,
"loss": 0.1018,
"step": 132
},
{
"epoch": 1.0816326530612246,
"grad_norm": 0.33361029624938965,
"learning_rate": 7.432478632478633e-05,
"loss": 0.0326,
"step": 133
},
{
"epoch": 1.089795918367347,
"grad_norm": 1.7304506301879883,
"learning_rate": 7.425641025641027e-05,
"loss": 0.195,
"step": 134
},
{
"epoch": 1.0979591836734695,
"grad_norm": 0.43233224749565125,
"learning_rate": 7.41880341880342e-05,
"loss": 0.0492,
"step": 135
},
{
"epoch": 1.1061224489795918,
"grad_norm": 0.6107978820800781,
"learning_rate": 7.411965811965814e-05,
"loss": 0.0877,
"step": 136
},
{
"epoch": 1.1142857142857143,
"grad_norm": 0.41433191299438477,
"learning_rate": 7.405128205128206e-05,
"loss": 0.0604,
"step": 137
},
{
"epoch": 1.1224489795918366,
"grad_norm": 0.44921883940696716,
"learning_rate": 7.398290598290599e-05,
"loss": 0.0549,
"step": 138
},
{
"epoch": 1.1306122448979592,
"grad_norm": 0.5607606172561646,
"learning_rate": 7.391452991452992e-05,
"loss": 0.0865,
"step": 139
},
{
"epoch": 1.1387755102040815,
"grad_norm": 0.46840882301330566,
"learning_rate": 7.384615384615386e-05,
"loss": 0.0884,
"step": 140
},
{
"epoch": 1.146938775510204,
"grad_norm": 0.4898800253868103,
"learning_rate": 7.377777777777779e-05,
"loss": 0.0657,
"step": 141
},
{
"epoch": 1.1551020408163266,
"grad_norm": 0.5272047519683838,
"learning_rate": 7.370940170940171e-05,
"loss": 0.11,
"step": 142
},
{
"epoch": 1.163265306122449,
"grad_norm": 0.563673198223114,
"learning_rate": 7.364102564102565e-05,
"loss": 0.0906,
"step": 143
},
{
"epoch": 1.1714285714285715,
"grad_norm": 0.3086874186992645,
"learning_rate": 7.357264957264957e-05,
"loss": 0.0327,
"step": 144
},
{
"epoch": 1.1795918367346938,
"grad_norm": 0.8381314873695374,
"learning_rate": 7.35042735042735e-05,
"loss": 0.0474,
"step": 145
},
{
"epoch": 1.1877551020408164,
"grad_norm": 0.47310200333595276,
"learning_rate": 7.343589743589744e-05,
"loss": 0.0459,
"step": 146
},
{
"epoch": 1.1959183673469387,
"grad_norm": 1.3210771083831787,
"learning_rate": 7.336752136752137e-05,
"loss": 0.2073,
"step": 147
},
{
"epoch": 1.2040816326530612,
"grad_norm": 2.012478828430176,
"learning_rate": 7.32991452991453e-05,
"loss": 0.1268,
"step": 148
},
{
"epoch": 1.2122448979591836,
"grad_norm": 0.31665048003196716,
"learning_rate": 7.323076923076924e-05,
"loss": 0.0362,
"step": 149
},
{
"epoch": 1.220408163265306,
"grad_norm": 0.4273552894592285,
"learning_rate": 7.316239316239316e-05,
"loss": 0.0429,
"step": 150
},
{
"epoch": 1.2285714285714286,
"grad_norm": 0.6782147884368896,
"learning_rate": 7.30940170940171e-05,
"loss": 0.0825,
"step": 151
},
{
"epoch": 1.236734693877551,
"grad_norm": 1.7549127340316772,
"learning_rate": 7.302564102564103e-05,
"loss": 0.0874,
"step": 152
},
{
"epoch": 1.2448979591836735,
"grad_norm": 1.1840424537658691,
"learning_rate": 7.295726495726496e-05,
"loss": 0.1183,
"step": 153
},
{
"epoch": 1.2530612244897958,
"grad_norm": 0.40625783801078796,
"learning_rate": 7.28888888888889e-05,
"loss": 0.0583,
"step": 154
},
{
"epoch": 1.2612244897959184,
"grad_norm": 1.0906763076782227,
"learning_rate": 7.282051282051283e-05,
"loss": 0.1058,
"step": 155
},
{
"epoch": 1.269387755102041,
"grad_norm": 1.2108123302459717,
"learning_rate": 7.275213675213675e-05,
"loss": 0.0991,
"step": 156
},
{
"epoch": 1.2775510204081633,
"grad_norm": 0.6065159440040588,
"learning_rate": 7.268376068376069e-05,
"loss": 0.0481,
"step": 157
},
{
"epoch": 1.2857142857142856,
"grad_norm": 1.5350733995437622,
"learning_rate": 7.261538461538462e-05,
"loss": 0.0569,
"step": 158
},
{
"epoch": 1.2938775510204081,
"grad_norm": 0.49242013692855835,
"learning_rate": 7.254700854700855e-05,
"loss": 0.0664,
"step": 159
},
{
"epoch": 1.3020408163265307,
"grad_norm": 0.40741172432899475,
"learning_rate": 7.247863247863249e-05,
"loss": 0.0804,
"step": 160
},
{
"epoch": 1.310204081632653,
"grad_norm": 0.1994098573923111,
"learning_rate": 7.241025641025641e-05,
"loss": 0.0315,
"step": 161
},
{
"epoch": 1.3183673469387756,
"grad_norm": 0.24300333857536316,
"learning_rate": 7.234188034188034e-05,
"loss": 0.028,
"step": 162
},
{
"epoch": 1.3265306122448979,
"grad_norm": 0.2567465007305145,
"learning_rate": 7.227350427350428e-05,
"loss": 0.046,
"step": 163
},
{
"epoch": 1.3346938775510204,
"grad_norm": 1.0523180961608887,
"learning_rate": 7.220512820512821e-05,
"loss": 0.0833,
"step": 164
},
{
"epoch": 1.342857142857143,
"grad_norm": 1.451838731765747,
"learning_rate": 7.213675213675215e-05,
"loss": 0.1063,
"step": 165
},
{
"epoch": 1.3510204081632653,
"grad_norm": 0.3077213764190674,
"learning_rate": 7.206837606837608e-05,
"loss": 0.0137,
"step": 166
},
{
"epoch": 1.3591836734693876,
"grad_norm": 1.869543194770813,
"learning_rate": 7.2e-05,
"loss": 0.1303,
"step": 167
},
{
"epoch": 1.3673469387755102,
"grad_norm": 0.4209451675415039,
"learning_rate": 7.193162393162394e-05,
"loss": 0.0765,
"step": 168
},
{
"epoch": 1.3755102040816327,
"grad_norm": 0.5088202953338623,
"learning_rate": 7.186324786324787e-05,
"loss": 0.0578,
"step": 169
},
{
"epoch": 1.383673469387755,
"grad_norm": 0.37874314188957214,
"learning_rate": 7.17948717948718e-05,
"loss": 0.0433,
"step": 170
},
{
"epoch": 1.3918367346938776,
"grad_norm": 1.1338469982147217,
"learning_rate": 7.172649572649574e-05,
"loss": 0.0552,
"step": 171
},
{
"epoch": 1.4,
"grad_norm": 0.8180486559867859,
"learning_rate": 7.165811965811967e-05,
"loss": 0.0663,
"step": 172
},
{
"epoch": 1.4081632653061225,
"grad_norm": 0.8368282914161682,
"learning_rate": 7.15897435897436e-05,
"loss": 0.0706,
"step": 173
},
{
"epoch": 1.416326530612245,
"grad_norm": 0.4246748089790344,
"learning_rate": 7.152136752136753e-05,
"loss": 0.0765,
"step": 174
},
{
"epoch": 1.4244897959183673,
"grad_norm": 1.6097643375396729,
"learning_rate": 7.145299145299146e-05,
"loss": 0.1153,
"step": 175
},
{
"epoch": 1.4326530612244899,
"grad_norm": 1.8011287450790405,
"learning_rate": 7.13846153846154e-05,
"loss": 0.1639,
"step": 176
},
{
"epoch": 1.4408163265306122,
"grad_norm": 0.49854835867881775,
"learning_rate": 7.131623931623933e-05,
"loss": 0.0945,
"step": 177
},
{
"epoch": 1.4489795918367347,
"grad_norm": 0.8556779623031616,
"learning_rate": 7.124786324786325e-05,
"loss": 0.0688,
"step": 178
},
{
"epoch": 1.457142857142857,
"grad_norm": 0.3972812294960022,
"learning_rate": 7.117948717948719e-05,
"loss": 0.0838,
"step": 179
},
{
"epoch": 1.4653061224489796,
"grad_norm": 1.9628223180770874,
"learning_rate": 7.11111111111111e-05,
"loss": 0.1525,
"step": 180
},
{
"epoch": 1.473469387755102,
"grad_norm": 1.4790903329849243,
"learning_rate": 7.104273504273504e-05,
"loss": 0.0896,
"step": 181
},
{
"epoch": 1.4816326530612245,
"grad_norm": 1.1479032039642334,
"learning_rate": 7.097435897435897e-05,
"loss": 0.0908,
"step": 182
},
{
"epoch": 1.489795918367347,
"grad_norm": 1.7905946969985962,
"learning_rate": 7.090598290598291e-05,
"loss": 0.1608,
"step": 183
},
{
"epoch": 1.4979591836734694,
"grad_norm": 0.5437545776367188,
"learning_rate": 7.083760683760684e-05,
"loss": 0.0571,
"step": 184
},
{
"epoch": 1.5061224489795917,
"grad_norm": 0.401934415102005,
"learning_rate": 7.076923076923078e-05,
"loss": 0.0304,
"step": 185
},
{
"epoch": 1.5142857142857142,
"grad_norm": 1.8619917631149292,
"learning_rate": 7.07008547008547e-05,
"loss": 0.2575,
"step": 186
},
{
"epoch": 1.5224489795918368,
"grad_norm": 1.164413332939148,
"learning_rate": 7.063247863247863e-05,
"loss": 0.1058,
"step": 187
},
{
"epoch": 1.5306122448979593,
"grad_norm": 0.6225583553314209,
"learning_rate": 7.056410256410257e-05,
"loss": 0.0939,
"step": 188
},
{
"epoch": 1.5387755102040817,
"grad_norm": 0.49891531467437744,
"learning_rate": 7.04957264957265e-05,
"loss": 0.1356,
"step": 189
},
{
"epoch": 1.546938775510204,
"grad_norm": 0.4343854784965515,
"learning_rate": 7.042735042735044e-05,
"loss": 0.0822,
"step": 190
},
{
"epoch": 1.5551020408163265,
"grad_norm": 0.6476932168006897,
"learning_rate": 7.035897435897437e-05,
"loss": 0.0851,
"step": 191
},
{
"epoch": 1.563265306122449,
"grad_norm": 0.5360276699066162,
"learning_rate": 7.029059829059829e-05,
"loss": 0.1255,
"step": 192
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.3216440975666046,
"learning_rate": 7.022222222222222e-05,
"loss": 0.0479,
"step": 193
},
{
"epoch": 1.5795918367346937,
"grad_norm": 0.4395454525947571,
"learning_rate": 7.015384615384616e-05,
"loss": 0.0359,
"step": 194
},
{
"epoch": 1.5877551020408163,
"grad_norm": 0.4362776577472687,
"learning_rate": 7.008547008547009e-05,
"loss": 0.0926,
"step": 195
},
{
"epoch": 1.5959183673469388,
"grad_norm": 0.2993446886539459,
"learning_rate": 7.001709401709403e-05,
"loss": 0.06,
"step": 196
},
{
"epoch": 1.6040816326530614,
"grad_norm": 0.4686635136604309,
"learning_rate": 6.994871794871795e-05,
"loss": 0.1206,
"step": 197
},
{
"epoch": 1.6122448979591837,
"grad_norm": 0.30804672837257385,
"learning_rate": 6.988034188034188e-05,
"loss": 0.0525,
"step": 198
},
{
"epoch": 1.620408163265306,
"grad_norm": 0.4085846245288849,
"learning_rate": 6.981196581196582e-05,
"loss": 0.0537,
"step": 199
},
{
"epoch": 1.6285714285714286,
"grad_norm": 0.33213263750076294,
"learning_rate": 6.974358974358975e-05,
"loss": 0.0392,
"step": 200
},
{
"epoch": 1.636734693877551,
"grad_norm": 0.3664593994617462,
"learning_rate": 6.967521367521368e-05,
"loss": 0.0743,
"step": 201
},
{
"epoch": 1.6448979591836734,
"grad_norm": 0.9924725294113159,
"learning_rate": 6.960683760683762e-05,
"loss": 0.0589,
"step": 202
},
{
"epoch": 1.6530612244897958,
"grad_norm": 0.4398384690284729,
"learning_rate": 6.953846153846154e-05,
"loss": 0.0669,
"step": 203
},
{
"epoch": 1.6612244897959183,
"grad_norm": 0.5044336318969727,
"learning_rate": 6.947008547008547e-05,
"loss": 0.0679,
"step": 204
},
{
"epoch": 1.6693877551020408,
"grad_norm": 0.9199581146240234,
"learning_rate": 6.940170940170941e-05,
"loss": 0.1626,
"step": 205
},
{
"epoch": 1.6775510204081634,
"grad_norm": 1.3309195041656494,
"learning_rate": 6.933333333333334e-05,
"loss": 0.1953,
"step": 206
},
{
"epoch": 1.6857142857142857,
"grad_norm": 0.22333349287509918,
"learning_rate": 6.926495726495728e-05,
"loss": 0.0414,
"step": 207
},
{
"epoch": 1.693877551020408,
"grad_norm": 0.6131277680397034,
"learning_rate": 6.919658119658121e-05,
"loss": 0.0808,
"step": 208
},
{
"epoch": 1.7020408163265306,
"grad_norm": 0.7656962871551514,
"learning_rate": 6.912820512820513e-05,
"loss": 0.0693,
"step": 209
},
{
"epoch": 1.7102040816326531,
"grad_norm": 0.8141290545463562,
"learning_rate": 6.905982905982907e-05,
"loss": 0.0801,
"step": 210
},
{
"epoch": 1.7183673469387755,
"grad_norm": 0.5475645065307617,
"learning_rate": 6.8991452991453e-05,
"loss": 0.066,
"step": 211
},
{
"epoch": 1.726530612244898,
"grad_norm": 0.33460551500320435,
"learning_rate": 6.892307692307693e-05,
"loss": 0.1342,
"step": 212
},
{
"epoch": 1.7346938775510203,
"grad_norm": 0.347888708114624,
"learning_rate": 6.885470085470087e-05,
"loss": 0.0534,
"step": 213
},
{
"epoch": 1.7428571428571429,
"grad_norm": 0.32002532482147217,
"learning_rate": 6.878632478632479e-05,
"loss": 0.0563,
"step": 214
},
{
"epoch": 1.7510204081632654,
"grad_norm": 1.1996527910232544,
"learning_rate": 6.871794871794872e-05,
"loss": 0.1047,
"step": 215
},
{
"epoch": 1.7591836734693878,
"grad_norm": 0.3641211688518524,
"learning_rate": 6.864957264957264e-05,
"loss": 0.0422,
"step": 216
},
{
"epoch": 1.76734693877551,
"grad_norm": 0.3480508029460907,
"learning_rate": 6.858119658119658e-05,
"loss": 0.0491,
"step": 217
},
{
"epoch": 1.7755102040816326,
"grad_norm": 0.4025651812553406,
"learning_rate": 6.851282051282051e-05,
"loss": 0.0523,
"step": 218
},
{
"epoch": 1.7836734693877552,
"grad_norm": 0.5428391098976135,
"learning_rate": 6.844444444444445e-05,
"loss": 0.0684,
"step": 219
},
{
"epoch": 1.7918367346938775,
"grad_norm": 0.6161150932312012,
"learning_rate": 6.837606837606838e-05,
"loss": 0.0795,
"step": 220
},
{
"epoch": 1.8,
"grad_norm": 0.4824294149875641,
"learning_rate": 6.830769230769232e-05,
"loss": 0.0926,
"step": 221
},
{
"epoch": 1.8081632653061224,
"grad_norm": 1.10171639919281,
"learning_rate": 6.823931623931624e-05,
"loss": 0.1577,
"step": 222
},
{
"epoch": 1.816326530612245,
"grad_norm": 0.4547288417816162,
"learning_rate": 6.817094017094017e-05,
"loss": 0.0496,
"step": 223
},
{
"epoch": 1.8244897959183675,
"grad_norm": 0.44235700368881226,
"learning_rate": 6.81025641025641e-05,
"loss": 0.0453,
"step": 224
},
{
"epoch": 1.8326530612244898,
"grad_norm": 0.8762555718421936,
"learning_rate": 6.803418803418804e-05,
"loss": 0.1378,
"step": 225
},
{
"epoch": 1.8408163265306121,
"grad_norm": 0.8897255063056946,
"learning_rate": 6.796581196581197e-05,
"loss": 0.0713,
"step": 226
},
{
"epoch": 1.8489795918367347,
"grad_norm": 0.7720091938972473,
"learning_rate": 6.789743589743591e-05,
"loss": 0.0849,
"step": 227
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.5211923718452454,
"learning_rate": 6.782905982905983e-05,
"loss": 0.1556,
"step": 228
},
{
"epoch": 1.8653061224489798,
"grad_norm": 0.6279407739639282,
"learning_rate": 6.776068376068376e-05,
"loss": 0.0815,
"step": 229
},
{
"epoch": 1.873469387755102,
"grad_norm": 0.28113502264022827,
"learning_rate": 6.76923076923077e-05,
"loss": 0.0581,
"step": 230
},
{
"epoch": 1.8816326530612244,
"grad_norm": 0.4654277563095093,
"learning_rate": 6.762393162393163e-05,
"loss": 0.0372,
"step": 231
},
{
"epoch": 1.889795918367347,
"grad_norm": 0.6376189589500427,
"learning_rate": 6.755555555555557e-05,
"loss": 0.066,
"step": 232
},
{
"epoch": 1.8979591836734695,
"grad_norm": 0.28285926580429077,
"learning_rate": 6.748717948717949e-05,
"loss": 0.0605,
"step": 233
},
{
"epoch": 1.9061224489795918,
"grad_norm": 0.6249369382858276,
"learning_rate": 6.741880341880342e-05,
"loss": 0.0789,
"step": 234
},
{
"epoch": 1.9142857142857141,
"grad_norm": 1.2941783666610718,
"learning_rate": 6.735042735042735e-05,
"loss": 0.1235,
"step": 235
},
{
"epoch": 1.9224489795918367,
"grad_norm": 0.27006468176841736,
"learning_rate": 6.728205128205129e-05,
"loss": 0.0458,
"step": 236
},
{
"epoch": 1.9306122448979592,
"grad_norm": 0.595116376876831,
"learning_rate": 6.721367521367522e-05,
"loss": 0.0895,
"step": 237
},
{
"epoch": 1.9387755102040818,
"grad_norm": 0.24352626502513885,
"learning_rate": 6.714529914529916e-05,
"loss": 0.0302,
"step": 238
},
{
"epoch": 1.9469387755102041,
"grad_norm": 0.8554300665855408,
"learning_rate": 6.707692307692308e-05,
"loss": 0.1388,
"step": 239
},
{
"epoch": 1.9551020408163264,
"grad_norm": 0.6483756899833679,
"learning_rate": 6.700854700854701e-05,
"loss": 0.1062,
"step": 240
},
{
"epoch": 1.963265306122449,
"grad_norm": 0.8733114004135132,
"learning_rate": 6.694017094017095e-05,
"loss": 0.0837,
"step": 241
},
{
"epoch": 1.9714285714285715,
"grad_norm": 0.35388854146003723,
"learning_rate": 6.687179487179488e-05,
"loss": 0.0546,
"step": 242
},
{
"epoch": 1.9795918367346939,
"grad_norm": 0.20294742286205292,
"learning_rate": 6.680341880341881e-05,
"loss": 0.0234,
"step": 243
},
{
"epoch": 1.9877551020408162,
"grad_norm": 0.5089455842971802,
"learning_rate": 6.673504273504275e-05,
"loss": 0.0426,
"step": 244
},
{
"epoch": 1.9959183673469387,
"grad_norm": 0.3105431795120239,
"learning_rate": 6.666666666666667e-05,
"loss": 0.0402,
"step": 245
},
{
"epoch": 2.0,
"grad_norm": 0.25790804624557495,
"learning_rate": 6.65982905982906e-05,
"loss": 0.0216,
"step": 246
},
{
"epoch": 2.0,
"eval_accuracy": 0.9410268119251093,
"eval_f1": 0.940523295891238,
"eval_loss": 0.1141602024435997,
"eval_precision": 0.9408064294905566,
"eval_recall": 0.9410268119251093,
"eval_runtime": 24.7522,
"eval_samples_per_second": 17.615,
"eval_steps_per_second": 17.615,
"step": 246
},
{
"epoch": 2.0081632653061225,
"grad_norm": 0.35438716411590576,
"learning_rate": 6.652991452991454e-05,
"loss": 0.0662,
"step": 247
},
{
"epoch": 2.016326530612245,
"grad_norm": 0.4929123818874359,
"learning_rate": 6.646153846153847e-05,
"loss": 0.0615,
"step": 248
},
{
"epoch": 2.024489795918367,
"grad_norm": 0.360612690448761,
"learning_rate": 6.639316239316241e-05,
"loss": 0.0469,
"step": 249
},
{
"epoch": 2.0326530612244897,
"grad_norm": 0.4224632978439331,
"learning_rate": 6.632478632478634e-05,
"loss": 0.0565,
"step": 250
},
{
"epoch": 2.0408163265306123,
"grad_norm": 0.32445797324180603,
"learning_rate": 6.625641025641026e-05,
"loss": 0.0288,
"step": 251
},
{
"epoch": 2.048979591836735,
"grad_norm": 0.8451316356658936,
"learning_rate": 6.618803418803418e-05,
"loss": 0.1075,
"step": 252
},
{
"epoch": 2.057142857142857,
"grad_norm": 0.6300255656242371,
"learning_rate": 6.611965811965812e-05,
"loss": 0.0955,
"step": 253
},
{
"epoch": 2.0653061224489795,
"grad_norm": 0.17683130502700806,
"learning_rate": 6.605128205128205e-05,
"loss": 0.028,
"step": 254
},
{
"epoch": 2.073469387755102,
"grad_norm": 0.2518182098865509,
"learning_rate": 6.598290598290599e-05,
"loss": 0.018,
"step": 255
},
{
"epoch": 2.0816326530612246,
"grad_norm": 0.29837292432785034,
"learning_rate": 6.591452991452992e-05,
"loss": 0.0331,
"step": 256
},
{
"epoch": 2.089795918367347,
"grad_norm": 0.44716575741767883,
"learning_rate": 6.584615384615385e-05,
"loss": 0.0701,
"step": 257
},
{
"epoch": 2.0979591836734692,
"grad_norm": 0.47956037521362305,
"learning_rate": 6.577777777777777e-05,
"loss": 0.0373,
"step": 258
},
{
"epoch": 2.1061224489795918,
"grad_norm": 0.2430446296930313,
"learning_rate": 6.570940170940171e-05,
"loss": 0.035,
"step": 259
},
{
"epoch": 2.1142857142857143,
"grad_norm": 0.9001370072364807,
"learning_rate": 6.564102564102564e-05,
"loss": 0.0927,
"step": 260
},
{
"epoch": 2.122448979591837,
"grad_norm": 0.37475600838661194,
"learning_rate": 6.557264957264958e-05,
"loss": 0.0655,
"step": 261
},
{
"epoch": 2.130612244897959,
"grad_norm": 0.33517739176750183,
"learning_rate": 6.550427350427351e-05,
"loss": 0.0472,
"step": 262
},
{
"epoch": 2.1387755102040815,
"grad_norm": 0.22833134233951569,
"learning_rate": 6.543589743589745e-05,
"loss": 0.0285,
"step": 263
},
{
"epoch": 2.146938775510204,
"grad_norm": 0.34427952766418457,
"learning_rate": 6.536752136752137e-05,
"loss": 0.0592,
"step": 264
},
{
"epoch": 2.1551020408163266,
"grad_norm": 0.6570462584495544,
"learning_rate": 6.52991452991453e-05,
"loss": 0.0594,
"step": 265
},
{
"epoch": 2.163265306122449,
"grad_norm": 0.9344841837882996,
"learning_rate": 6.523076923076923e-05,
"loss": 0.0732,
"step": 266
},
{
"epoch": 2.1714285714285713,
"grad_norm": 0.1269778609275818,
"learning_rate": 6.516239316239317e-05,
"loss": 0.0192,
"step": 267
},
{
"epoch": 2.179591836734694,
"grad_norm": 0.3492695391178131,
"learning_rate": 6.50940170940171e-05,
"loss": 0.0285,
"step": 268
},
{
"epoch": 2.1877551020408164,
"grad_norm": 0.34235745668411255,
"learning_rate": 6.502564102564104e-05,
"loss": 0.0472,
"step": 269
},
{
"epoch": 2.195918367346939,
"grad_norm": 0.7924789786338806,
"learning_rate": 6.495726495726496e-05,
"loss": 0.0603,
"step": 270
},
{
"epoch": 2.204081632653061,
"grad_norm": 0.8306127190589905,
"learning_rate": 6.488888888888889e-05,
"loss": 0.0688,
"step": 271
},
{
"epoch": 2.2122448979591836,
"grad_norm": 0.6912404894828796,
"learning_rate": 6.482051282051283e-05,
"loss": 0.0596,
"step": 272
},
{
"epoch": 2.220408163265306,
"grad_norm": 0.557754635810852,
"learning_rate": 6.475213675213676e-05,
"loss": 0.0591,
"step": 273
},
{
"epoch": 2.2285714285714286,
"grad_norm": 0.9123916029930115,
"learning_rate": 6.46837606837607e-05,
"loss": 0.0554,
"step": 274
},
{
"epoch": 2.236734693877551,
"grad_norm": 1.5349823236465454,
"learning_rate": 6.461538461538462e-05,
"loss": 0.1467,
"step": 275
},
{
"epoch": 2.2448979591836733,
"grad_norm": 0.6170281767845154,
"learning_rate": 6.454700854700855e-05,
"loss": 0.0721,
"step": 276
},
{
"epoch": 2.253061224489796,
"grad_norm": 0.49400895833969116,
"learning_rate": 6.447863247863248e-05,
"loss": 0.0576,
"step": 277
},
{
"epoch": 2.2612244897959184,
"grad_norm": 0.2935521602630615,
"learning_rate": 6.441025641025642e-05,
"loss": 0.0241,
"step": 278
},
{
"epoch": 2.269387755102041,
"grad_norm": 1.0610945224761963,
"learning_rate": 6.434188034188035e-05,
"loss": 0.1248,
"step": 279
},
{
"epoch": 2.277551020408163,
"grad_norm": 0.6663985252380371,
"learning_rate": 6.427350427350429e-05,
"loss": 0.0525,
"step": 280
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.9457942247390747,
"learning_rate": 6.420512820512821e-05,
"loss": 0.0665,
"step": 281
},
{
"epoch": 2.293877551020408,
"grad_norm": 0.17783474922180176,
"learning_rate": 6.413675213675214e-05,
"loss": 0.0203,
"step": 282
},
{
"epoch": 2.3020408163265307,
"grad_norm": 0.6842500567436218,
"learning_rate": 6.406837606837608e-05,
"loss": 0.065,
"step": 283
},
{
"epoch": 2.3102040816326532,
"grad_norm": 0.39229097962379456,
"learning_rate": 6.400000000000001e-05,
"loss": 0.0382,
"step": 284
},
{
"epoch": 2.3183673469387753,
"grad_norm": 0.5632251501083374,
"learning_rate": 6.393162393162394e-05,
"loss": 0.0468,
"step": 285
},
{
"epoch": 2.326530612244898,
"grad_norm": 1.3715561628341675,
"learning_rate": 6.386324786324788e-05,
"loss": 0.0812,
"step": 286
},
{
"epoch": 2.3346938775510204,
"grad_norm": 0.5640051960945129,
"learning_rate": 6.37948717948718e-05,
"loss": 0.0513,
"step": 287
},
{
"epoch": 2.342857142857143,
"grad_norm": 0.438909113407135,
"learning_rate": 6.372649572649573e-05,
"loss": 0.027,
"step": 288
},
{
"epoch": 2.351020408163265,
"grad_norm": 0.8141554594039917,
"learning_rate": 6.365811965811965e-05,
"loss": 0.1043,
"step": 289
},
{
"epoch": 2.3591836734693876,
"grad_norm": 0.381523072719574,
"learning_rate": 6.358974358974359e-05,
"loss": 0.0339,
"step": 290
},
{
"epoch": 2.36734693877551,
"grad_norm": 0.5834519863128662,
"learning_rate": 6.352136752136752e-05,
"loss": 0.0652,
"step": 291
},
{
"epoch": 2.3755102040816327,
"grad_norm": 0.5051060914993286,
"learning_rate": 6.345299145299146e-05,
"loss": 0.071,
"step": 292
},
{
"epoch": 2.3836734693877553,
"grad_norm": 1.0093952417373657,
"learning_rate": 6.338461538461539e-05,
"loss": 0.0988,
"step": 293
},
{
"epoch": 2.3918367346938774,
"grad_norm": 0.9834301471710205,
"learning_rate": 6.331623931623931e-05,
"loss": 0.1045,
"step": 294
},
{
"epoch": 2.4,
"grad_norm": 0.25004687905311584,
"learning_rate": 6.324786324786325e-05,
"loss": 0.026,
"step": 295
},
{
"epoch": 2.4081632653061225,
"grad_norm": 0.3586501479148865,
"learning_rate": 6.317948717948718e-05,
"loss": 0.0364,
"step": 296
},
{
"epoch": 2.416326530612245,
"grad_norm": 1.293340802192688,
"learning_rate": 6.311111111111112e-05,
"loss": 0.1104,
"step": 297
},
{
"epoch": 2.424489795918367,
"grad_norm": 0.8027708530426025,
"learning_rate": 6.304273504273505e-05,
"loss": 0.0581,
"step": 298
},
{
"epoch": 2.4326530612244897,
"grad_norm": 0.5780189633369446,
"learning_rate": 6.297435897435898e-05,
"loss": 0.0485,
"step": 299
},
{
"epoch": 2.440816326530612,
"grad_norm": 1.5172946453094482,
"learning_rate": 6.29059829059829e-05,
"loss": 0.1051,
"step": 300
},
{
"epoch": 2.4489795918367347,
"grad_norm": 0.3469559848308563,
"learning_rate": 6.283760683760684e-05,
"loss": 0.0499,
"step": 301
},
{
"epoch": 2.4571428571428573,
"grad_norm": 0.4521036744117737,
"learning_rate": 6.276923076923077e-05,
"loss": 0.0273,
"step": 302
},
{
"epoch": 2.4653061224489794,
"grad_norm": 0.43015456199645996,
"learning_rate": 6.270085470085471e-05,
"loss": 0.0515,
"step": 303
},
{
"epoch": 2.473469387755102,
"grad_norm": 0.9717444181442261,
"learning_rate": 6.263247863247864e-05,
"loss": 0.0945,
"step": 304
},
{
"epoch": 2.4816326530612245,
"grad_norm": 1.5522366762161255,
"learning_rate": 6.256410256410258e-05,
"loss": 0.122,
"step": 305
},
{
"epoch": 2.489795918367347,
"grad_norm": 0.4901497960090637,
"learning_rate": 6.24957264957265e-05,
"loss": 0.0408,
"step": 306
},
{
"epoch": 2.497959183673469,
"grad_norm": 0.5403264164924622,
"learning_rate": 6.242735042735043e-05,
"loss": 0.0413,
"step": 307
},
{
"epoch": 2.5061224489795917,
"grad_norm": 0.25704896450042725,
"learning_rate": 6.235897435897436e-05,
"loss": 0.0437,
"step": 308
},
{
"epoch": 2.5142857142857142,
"grad_norm": 0.43089812994003296,
"learning_rate": 6.22905982905983e-05,
"loss": 0.0488,
"step": 309
},
{
"epoch": 2.522448979591837,
"grad_norm": 1.8273061513900757,
"learning_rate": 6.222222222222223e-05,
"loss": 0.1857,
"step": 310
},
{
"epoch": 2.5306122448979593,
"grad_norm": 0.46482834219932556,
"learning_rate": 6.215384615384615e-05,
"loss": 0.0571,
"step": 311
},
{
"epoch": 2.538775510204082,
"grad_norm": 0.7548282742500305,
"learning_rate": 6.208547008547009e-05,
"loss": 0.0681,
"step": 312
},
{
"epoch": 2.546938775510204,
"grad_norm": 0.4708963930606842,
"learning_rate": 6.201709401709402e-05,
"loss": 0.0416,
"step": 313
},
{
"epoch": 2.5551020408163265,
"grad_norm": 0.4314163029193878,
"learning_rate": 6.194871794871796e-05,
"loss": 0.0344,
"step": 314
},
{
"epoch": 2.563265306122449,
"grad_norm": 0.23221971094608307,
"learning_rate": 6.188034188034189e-05,
"loss": 0.0315,
"step": 315
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.35419610142707825,
"learning_rate": 6.181196581196583e-05,
"loss": 0.0223,
"step": 316
},
{
"epoch": 2.5795918367346937,
"grad_norm": 0.3372536897659302,
"learning_rate": 6.174358974358975e-05,
"loss": 0.0491,
"step": 317
},
{
"epoch": 2.5877551020408163,
"grad_norm": 0.262692391872406,
"learning_rate": 6.167521367521368e-05,
"loss": 0.026,
"step": 318
},
{
"epoch": 2.595918367346939,
"grad_norm": 0.30098071694374084,
"learning_rate": 6.160683760683761e-05,
"loss": 0.0295,
"step": 319
},
{
"epoch": 2.6040816326530614,
"grad_norm": 0.3119616210460663,
"learning_rate": 6.153846153846155e-05,
"loss": 0.0364,
"step": 320
},
{
"epoch": 2.612244897959184,
"grad_norm": 0.5946303606033325,
"learning_rate": 6.147008547008548e-05,
"loss": 0.0525,
"step": 321
},
{
"epoch": 2.620408163265306,
"grad_norm": 0.4923531711101532,
"learning_rate": 6.140170940170942e-05,
"loss": 0.0352,
"step": 322
},
{
"epoch": 2.6285714285714286,
"grad_norm": 1.0566976070404053,
"learning_rate": 6.133333333333334e-05,
"loss": 0.1034,
"step": 323
},
{
"epoch": 2.636734693877551,
"grad_norm": 0.44877099990844727,
"learning_rate": 6.126495726495727e-05,
"loss": 0.0447,
"step": 324
},
{
"epoch": 2.644897959183673,
"grad_norm": 0.749967098236084,
"learning_rate": 6.119658119658119e-05,
"loss": 0.103,
"step": 325
},
{
"epoch": 2.6530612244897958,
"grad_norm": 0.5334205031394958,
"learning_rate": 6.112820512820513e-05,
"loss": 0.045,
"step": 326
},
{
"epoch": 2.6612244897959183,
"grad_norm": 0.6284877061843872,
"learning_rate": 6.105982905982906e-05,
"loss": 0.0766,
"step": 327
},
{
"epoch": 2.669387755102041,
"grad_norm": 0.1609213799238205,
"learning_rate": 6.0991452991452996e-05,
"loss": 0.0149,
"step": 328
},
{
"epoch": 2.6775510204081634,
"grad_norm": 0.208161860704422,
"learning_rate": 6.092307692307692e-05,
"loss": 0.0228,
"step": 329
},
{
"epoch": 2.685714285714286,
"grad_norm": 0.5576138496398926,
"learning_rate": 6.085470085470086e-05,
"loss": 0.0649,
"step": 330
},
{
"epoch": 2.693877551020408,
"grad_norm": 0.17669138312339783,
"learning_rate": 6.078632478632479e-05,
"loss": 0.0175,
"step": 331
},
{
"epoch": 2.7020408163265306,
"grad_norm": 0.39149168133735657,
"learning_rate": 6.071794871794872e-05,
"loss": 0.0443,
"step": 332
},
{
"epoch": 2.710204081632653,
"grad_norm": 0.2674602270126343,
"learning_rate": 6.064957264957265e-05,
"loss": 0.0371,
"step": 333
},
{
"epoch": 2.7183673469387752,
"grad_norm": 0.39889994263648987,
"learning_rate": 6.058119658119658e-05,
"loss": 0.0775,
"step": 334
},
{
"epoch": 2.726530612244898,
"grad_norm": 0.2623346745967865,
"learning_rate": 6.0512820512820515e-05,
"loss": 0.0318,
"step": 335
},
{
"epoch": 2.7346938775510203,
"grad_norm": 0.34579864144325256,
"learning_rate": 6.044444444444445e-05,
"loss": 0.0379,
"step": 336
},
{
"epoch": 2.742857142857143,
"grad_norm": 0.2815621495246887,
"learning_rate": 6.037606837606838e-05,
"loss": 0.0511,
"step": 337
},
{
"epoch": 2.7510204081632654,
"grad_norm": 0.34444335103034973,
"learning_rate": 6.030769230769231e-05,
"loss": 0.056,
"step": 338
},
{
"epoch": 2.759183673469388,
"grad_norm": 0.5257909893989563,
"learning_rate": 6.0239316239316245e-05,
"loss": 0.0736,
"step": 339
},
{
"epoch": 2.76734693877551,
"grad_norm": 0.32044798135757446,
"learning_rate": 6.017094017094017e-05,
"loss": 0.0376,
"step": 340
},
{
"epoch": 2.7755102040816326,
"grad_norm": 0.37229958176612854,
"learning_rate": 6.010256410256411e-05,
"loss": 0.0414,
"step": 341
},
{
"epoch": 2.783673469387755,
"grad_norm": 0.4967615306377411,
"learning_rate": 6.003418803418804e-05,
"loss": 0.0925,
"step": 342
},
{
"epoch": 2.7918367346938773,
"grad_norm": 0.320186048746109,
"learning_rate": 5.996581196581197e-05,
"loss": 0.0444,
"step": 343
},
{
"epoch": 2.8,
"grad_norm": 0.5603439807891846,
"learning_rate": 5.98974358974359e-05,
"loss": 0.0534,
"step": 344
},
{
"epoch": 2.8081632653061224,
"grad_norm": 0.33432868123054504,
"learning_rate": 5.982905982905984e-05,
"loss": 0.0262,
"step": 345
},
{
"epoch": 2.816326530612245,
"grad_norm": 0.4188118278980255,
"learning_rate": 5.9760683760683765e-05,
"loss": 0.0823,
"step": 346
},
{
"epoch": 2.8244897959183675,
"grad_norm": 0.24907562136650085,
"learning_rate": 5.96923076923077e-05,
"loss": 0.0134,
"step": 347
},
{
"epoch": 2.83265306122449,
"grad_norm": 0.42357468605041504,
"learning_rate": 5.962393162393163e-05,
"loss": 0.0386,
"step": 348
},
{
"epoch": 2.840816326530612,
"grad_norm": 0.9156424403190613,
"learning_rate": 5.955555555555556e-05,
"loss": 0.0803,
"step": 349
},
{
"epoch": 2.8489795918367347,
"grad_norm": 0.45649656653404236,
"learning_rate": 5.9487179487179495e-05,
"loss": 0.0627,
"step": 350
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.3821771442890167,
"learning_rate": 5.941880341880343e-05,
"loss": 0.0809,
"step": 351
},
{
"epoch": 2.8653061224489798,
"grad_norm": 0.4800649881362915,
"learning_rate": 5.9350427350427357e-05,
"loss": 0.0367,
"step": 352
},
{
"epoch": 2.873469387755102,
"grad_norm": 0.4957541525363922,
"learning_rate": 5.928205128205129e-05,
"loss": 0.0577,
"step": 353
},
{
"epoch": 2.8816326530612244,
"grad_norm": 0.5091708898544312,
"learning_rate": 5.921367521367522e-05,
"loss": 0.086,
"step": 354
},
{
"epoch": 2.889795918367347,
"grad_norm": 0.6676978468894958,
"learning_rate": 5.914529914529915e-05,
"loss": 0.0234,
"step": 355
},
{
"epoch": 2.8979591836734695,
"grad_norm": 0.6917971968650818,
"learning_rate": 5.907692307692309e-05,
"loss": 0.0561,
"step": 356
},
{
"epoch": 2.906122448979592,
"grad_norm": 0.5658119916915894,
"learning_rate": 5.9008547008547014e-05,
"loss": 0.0497,
"step": 357
},
{
"epoch": 2.914285714285714,
"grad_norm": 0.6094546318054199,
"learning_rate": 5.894017094017095e-05,
"loss": 0.0952,
"step": 358
},
{
"epoch": 2.9224489795918367,
"grad_norm": 0.39741581678390503,
"learning_rate": 5.887179487179488e-05,
"loss": 0.0618,
"step": 359
},
{
"epoch": 2.9306122448979592,
"grad_norm": 0.6804453730583191,
"learning_rate": 5.880341880341881e-05,
"loss": 0.0616,
"step": 360
},
{
"epoch": 2.938775510204082,
"grad_norm": 0.9075838923454285,
"learning_rate": 5.873504273504274e-05,
"loss": 0.0499,
"step": 361
},
{
"epoch": 2.946938775510204,
"grad_norm": 0.33524271845817566,
"learning_rate": 5.8666666666666665e-05,
"loss": 0.049,
"step": 362
},
{
"epoch": 2.9551020408163264,
"grad_norm": 0.3323378264904022,
"learning_rate": 5.85982905982906e-05,
"loss": 0.0281,
"step": 363
},
{
"epoch": 2.963265306122449,
"grad_norm": 0.24611520767211914,
"learning_rate": 5.8529914529914534e-05,
"loss": 0.0708,
"step": 364
},
{
"epoch": 2.9714285714285715,
"grad_norm": 1.779318928718567,
"learning_rate": 5.846153846153846e-05,
"loss": 0.1223,
"step": 365
},
{
"epoch": 2.979591836734694,
"grad_norm": 0.5884644985198975,
"learning_rate": 5.8393162393162395e-05,
"loss": 0.0352,
"step": 366
},
{
"epoch": 2.987755102040816,
"grad_norm": 0.47888728976249695,
"learning_rate": 5.832478632478633e-05,
"loss": 0.0689,
"step": 367
},
{
"epoch": 2.9959183673469387,
"grad_norm": 0.33537718653678894,
"learning_rate": 5.825641025641026e-05,
"loss": 0.0357,
"step": 368
},
{
"epoch": 3.0,
"grad_norm": 0.3279494047164917,
"learning_rate": 5.818803418803419e-05,
"loss": 0.0286,
"step": 369
},
{
"epoch": 3.0,
"eval_accuracy": 0.9323504468654185,
"eval_f1": 0.9307983009212302,
"eval_loss": 0.15686975419521332,
"eval_precision": 0.9341369053023137,
"eval_recall": 0.9323504468654185,
"eval_runtime": 25.0411,
"eval_samples_per_second": 17.411,
"eval_steps_per_second": 17.411,
"step": 369
},
{
"epoch": 3.0081632653061225,
"grad_norm": 0.8343735933303833,
"learning_rate": 5.8119658119658126e-05,
"loss": 0.082,
"step": 370
},
{
"epoch": 3.016326530612245,
"grad_norm": 0.6073836088180542,
"learning_rate": 5.805128205128205e-05,
"loss": 0.0558,
"step": 371
},
{
"epoch": 3.024489795918367,
"grad_norm": 0.42929086089134216,
"learning_rate": 5.798290598290599e-05,
"loss": 0.029,
"step": 372
},
{
"epoch": 3.0326530612244897,
"grad_norm": 0.3041161298751831,
"learning_rate": 5.7914529914529915e-05,
"loss": 0.0346,
"step": 373
},
{
"epoch": 3.0408163265306123,
"grad_norm": 0.3552093505859375,
"learning_rate": 5.784615384615385e-05,
"loss": 0.0231,
"step": 374
},
{
"epoch": 3.048979591836735,
"grad_norm": 0.5431807637214661,
"learning_rate": 5.777777777777778e-05,
"loss": 0.0455,
"step": 375
},
{
"epoch": 3.057142857142857,
"grad_norm": 0.5495198369026184,
"learning_rate": 5.770940170940171e-05,
"loss": 0.0103,
"step": 376
},
{
"epoch": 3.0653061224489795,
"grad_norm": 1.091922402381897,
"learning_rate": 5.7641025641025645e-05,
"loss": 0.0615,
"step": 377
},
{
"epoch": 3.073469387755102,
"grad_norm": 0.39272844791412354,
"learning_rate": 5.757264957264958e-05,
"loss": 0.0652,
"step": 378
},
{
"epoch": 3.0816326530612246,
"grad_norm": 0.39859914779663086,
"learning_rate": 5.750427350427351e-05,
"loss": 0.0523,
"step": 379
},
{
"epoch": 3.089795918367347,
"grad_norm": 0.17209431529045105,
"learning_rate": 5.743589743589744e-05,
"loss": 0.0262,
"step": 380
},
{
"epoch": 3.0979591836734692,
"grad_norm": 0.6512501239776611,
"learning_rate": 5.7367521367521375e-05,
"loss": 0.0568,
"step": 381
},
{
"epoch": 3.1061224489795918,
"grad_norm": 0.4490756690502167,
"learning_rate": 5.72991452991453e-05,
"loss": 0.0206,
"step": 382
},
{
"epoch": 3.1142857142857143,
"grad_norm": 0.54147869348526,
"learning_rate": 5.723076923076924e-05,
"loss": 0.0411,
"step": 383
},
{
"epoch": 3.122448979591837,
"grad_norm": 0.6576200723648071,
"learning_rate": 5.716239316239317e-05,
"loss": 0.0537,
"step": 384
},
{
"epoch": 3.130612244897959,
"grad_norm": 0.3526168763637543,
"learning_rate": 5.70940170940171e-05,
"loss": 0.0348,
"step": 385
},
{
"epoch": 3.1387755102040815,
"grad_norm": 0.43255260586738586,
"learning_rate": 5.702564102564103e-05,
"loss": 0.0279,
"step": 386
},
{
"epoch": 3.146938775510204,
"grad_norm": 0.6807562708854675,
"learning_rate": 5.695726495726497e-05,
"loss": 0.0805,
"step": 387
},
{
"epoch": 3.1551020408163266,
"grad_norm": 0.21039244532585144,
"learning_rate": 5.6888888888888895e-05,
"loss": 0.0255,
"step": 388
},
{
"epoch": 3.163265306122449,
"grad_norm": 0.2559818625450134,
"learning_rate": 5.682051282051283e-05,
"loss": 0.0269,
"step": 389
},
{
"epoch": 3.1714285714285713,
"grad_norm": 0.14890031516551971,
"learning_rate": 5.6752136752136756e-05,
"loss": 0.0131,
"step": 390
},
{
"epoch": 3.179591836734694,
"grad_norm": 0.3709496855735779,
"learning_rate": 5.668376068376069e-05,
"loss": 0.0379,
"step": 391
},
{
"epoch": 3.1877551020408164,
"grad_norm": 0.0764717385172844,
"learning_rate": 5.6615384615384625e-05,
"loss": 0.0042,
"step": 392
},
{
"epoch": 3.195918367346939,
"grad_norm": 0.7881947755813599,
"learning_rate": 5.654700854700855e-05,
"loss": 0.0576,
"step": 393
},
{
"epoch": 3.204081632653061,
"grad_norm": 0.25396037101745605,
"learning_rate": 5.6478632478632487e-05,
"loss": 0.0259,
"step": 394
},
{
"epoch": 3.2122448979591836,
"grad_norm": 0.7596886157989502,
"learning_rate": 5.641025641025642e-05,
"loss": 0.0524,
"step": 395
},
{
"epoch": 3.220408163265306,
"grad_norm": 0.371158242225647,
"learning_rate": 5.634188034188035e-05,
"loss": 0.0512,
"step": 396
},
{
"epoch": 3.2285714285714286,
"grad_norm": 0.33201202750205994,
"learning_rate": 5.627350427350428e-05,
"loss": 0.0226,
"step": 397
},
{
"epoch": 3.236734693877551,
"grad_norm": 0.4768315553665161,
"learning_rate": 5.62051282051282e-05,
"loss": 0.0325,
"step": 398
},
{
"epoch": 3.2448979591836733,
"grad_norm": 0.755546510219574,
"learning_rate": 5.613675213675214e-05,
"loss": 0.0262,
"step": 399
},
{
"epoch": 3.253061224489796,
"grad_norm": 0.6119134426116943,
"learning_rate": 5.606837606837607e-05,
"loss": 0.042,
"step": 400
},
{
"epoch": 3.2612244897959184,
"grad_norm": 0.5160887837409973,
"learning_rate": 5.6e-05,
"loss": 0.0331,
"step": 401
},
{
"epoch": 3.269387755102041,
"grad_norm": 0.47487494349479675,
"learning_rate": 5.5931623931623933e-05,
"loss": 0.0146,
"step": 402
},
{
"epoch": 3.277551020408163,
"grad_norm": 0.1713923066854477,
"learning_rate": 5.586324786324787e-05,
"loss": 0.0126,
"step": 403
},
{
"epoch": 3.2857142857142856,
"grad_norm": 0.5118584632873535,
"learning_rate": 5.5794871794871795e-05,
"loss": 0.0501,
"step": 404
},
{
"epoch": 3.293877551020408,
"grad_norm": 0.21943572163581848,
"learning_rate": 5.572649572649573e-05,
"loss": 0.021,
"step": 405
},
{
"epoch": 3.3020408163265307,
"grad_norm": 0.447221040725708,
"learning_rate": 5.5658119658119664e-05,
"loss": 0.034,
"step": 406
},
{
"epoch": 3.3102040816326532,
"grad_norm": 0.10484689474105835,
"learning_rate": 5.558974358974359e-05,
"loss": 0.0134,
"step": 407
},
{
"epoch": 3.3183673469387753,
"grad_norm": 0.14459459483623505,
"learning_rate": 5.5521367521367525e-05,
"loss": 0.0222,
"step": 408
},
{
"epoch": 3.326530612244898,
"grad_norm": 0.1565796285867691,
"learning_rate": 5.545299145299145e-05,
"loss": 0.0185,
"step": 409
},
{
"epoch": 3.3346938775510204,
"grad_norm": 0.17262019217014313,
"learning_rate": 5.538461538461539e-05,
"loss": 0.0152,
"step": 410
},
{
"epoch": 3.342857142857143,
"grad_norm": 0.154659703373909,
"learning_rate": 5.531623931623932e-05,
"loss": 0.0211,
"step": 411
},
{
"epoch": 3.351020408163265,
"grad_norm": 0.16751931607723236,
"learning_rate": 5.524786324786325e-05,
"loss": 0.0255,
"step": 412
},
{
"epoch": 3.3591836734693876,
"grad_norm": 0.15946514904499054,
"learning_rate": 5.517948717948718e-05,
"loss": 0.0112,
"step": 413
},
{
"epoch": 3.36734693877551,
"grad_norm": 0.4122481048107147,
"learning_rate": 5.511111111111112e-05,
"loss": 0.0306,
"step": 414
},
{
"epoch": 3.3755102040816327,
"grad_norm": 0.4134220480918884,
"learning_rate": 5.5042735042735045e-05,
"loss": 0.0384,
"step": 415
},
{
"epoch": 3.3836734693877553,
"grad_norm": 0.4064358174800873,
"learning_rate": 5.497435897435898e-05,
"loss": 0.0276,
"step": 416
},
{
"epoch": 3.3918367346938774,
"grad_norm": 0.3870207369327545,
"learning_rate": 5.490598290598291e-05,
"loss": 0.0274,
"step": 417
},
{
"epoch": 3.4,
"grad_norm": 0.20213182270526886,
"learning_rate": 5.483760683760684e-05,
"loss": 0.0207,
"step": 418
},
{
"epoch": 3.4081632653061225,
"grad_norm": 0.7050586342811584,
"learning_rate": 5.4769230769230775e-05,
"loss": 0.0686,
"step": 419
},
{
"epoch": 3.416326530612245,
"grad_norm": 0.26291969418525696,
"learning_rate": 5.470085470085471e-05,
"loss": 0.0178,
"step": 420
},
{
"epoch": 3.424489795918367,
"grad_norm": 0.24425692856311798,
"learning_rate": 5.463247863247864e-05,
"loss": 0.0239,
"step": 421
},
{
"epoch": 3.4326530612244897,
"grad_norm": 0.17826378345489502,
"learning_rate": 5.456410256410257e-05,
"loss": 0.0129,
"step": 422
},
{
"epoch": 3.440816326530612,
"grad_norm": 0.88796067237854,
"learning_rate": 5.4495726495726505e-05,
"loss": 0.0784,
"step": 423
},
{
"epoch": 3.4489795918367347,
"grad_norm": 0.3603014051914215,
"learning_rate": 5.442735042735043e-05,
"loss": 0.0263,
"step": 424
},
{
"epoch": 3.4571428571428573,
"grad_norm": 0.344101220369339,
"learning_rate": 5.435897435897437e-05,
"loss": 0.0357,
"step": 425
},
{
"epoch": 3.4653061224489794,
"grad_norm": 1.5145072937011719,
"learning_rate": 5.4290598290598294e-05,
"loss": 0.0482,
"step": 426
},
{
"epoch": 3.473469387755102,
"grad_norm": 0.4510698616504669,
"learning_rate": 5.422222222222223e-05,
"loss": 0.0478,
"step": 427
},
{
"epoch": 3.4816326530612245,
"grad_norm": 1.4540811777114868,
"learning_rate": 5.415384615384616e-05,
"loss": 0.1315,
"step": 428
},
{
"epoch": 3.489795918367347,
"grad_norm": 0.3050004839897156,
"learning_rate": 5.408547008547009e-05,
"loss": 0.0409,
"step": 429
},
{
"epoch": 3.497959183673469,
"grad_norm": 0.40628471970558167,
"learning_rate": 5.4017094017094025e-05,
"loss": 0.0405,
"step": 430
},
{
"epoch": 3.5061224489795917,
"grad_norm": 0.28513649106025696,
"learning_rate": 5.394871794871796e-05,
"loss": 0.0242,
"step": 431
},
{
"epoch": 3.5142857142857142,
"grad_norm": 0.1387534886598587,
"learning_rate": 5.3880341880341886e-05,
"loss": 0.0138,
"step": 432
},
{
"epoch": 3.522448979591837,
"grad_norm": 0.21195898950099945,
"learning_rate": 5.381196581196582e-05,
"loss": 0.0116,
"step": 433
},
{
"epoch": 3.5306122448979593,
"grad_norm": 0.5661990642547607,
"learning_rate": 5.374358974358974e-05,
"loss": 0.0493,
"step": 434
},
{
"epoch": 3.538775510204082,
"grad_norm": 0.1676921248435974,
"learning_rate": 5.3675213675213675e-05,
"loss": 0.0172,
"step": 435
},
{
"epoch": 3.546938775510204,
"grad_norm": 0.5208879113197327,
"learning_rate": 5.360683760683761e-05,
"loss": 0.0243,
"step": 436
},
{
"epoch": 3.5551020408163265,
"grad_norm": 0.30240461230278015,
"learning_rate": 5.353846153846154e-05,
"loss": 0.0251,
"step": 437
},
{
"epoch": 3.563265306122449,
"grad_norm": 0.3899058699607849,
"learning_rate": 5.347008547008547e-05,
"loss": 0.0289,
"step": 438
},
{
"epoch": 3.571428571428571,
"grad_norm": 0.6027955412864685,
"learning_rate": 5.3401709401709406e-05,
"loss": 0.0465,
"step": 439
},
{
"epoch": 3.5795918367346937,
"grad_norm": 0.30090829730033875,
"learning_rate": 5.333333333333333e-05,
"loss": 0.0165,
"step": 440
},
{
"epoch": 3.5877551020408163,
"grad_norm": 0.6930409669876099,
"learning_rate": 5.326495726495727e-05,
"loss": 0.0603,
"step": 441
},
{
"epoch": 3.595918367346939,
"grad_norm": 0.375592440366745,
"learning_rate": 5.31965811965812e-05,
"loss": 0.0287,
"step": 442
},
{
"epoch": 3.6040816326530614,
"grad_norm": 0.8024219870567322,
"learning_rate": 5.312820512820513e-05,
"loss": 0.0476,
"step": 443
},
{
"epoch": 3.612244897959184,
"grad_norm": 0.4552713632583618,
"learning_rate": 5.3059829059829063e-05,
"loss": 0.0136,
"step": 444
},
{
"epoch": 3.620408163265306,
"grad_norm": 0.6890722513198853,
"learning_rate": 5.299145299145299e-05,
"loss": 0.0257,
"step": 445
},
{
"epoch": 3.6285714285714286,
"grad_norm": 0.4332926869392395,
"learning_rate": 5.2923076923076925e-05,
"loss": 0.0171,
"step": 446
},
{
"epoch": 3.636734693877551,
"grad_norm": 0.21104751527309418,
"learning_rate": 5.285470085470086e-05,
"loss": 0.0218,
"step": 447
},
{
"epoch": 3.644897959183673,
"grad_norm": 0.2659521698951721,
"learning_rate": 5.278632478632479e-05,
"loss": 0.0079,
"step": 448
},
{
"epoch": 3.6530612244897958,
"grad_norm": 1.0414408445358276,
"learning_rate": 5.271794871794872e-05,
"loss": 0.0998,
"step": 449
},
{
"epoch": 3.6612244897959183,
"grad_norm": 0.5479117035865784,
"learning_rate": 5.2649572649572655e-05,
"loss": 0.0259,
"step": 450
},
{
"epoch": 3.669387755102041,
"grad_norm": 0.5206521153450012,
"learning_rate": 5.258119658119658e-05,
"loss": 0.0455,
"step": 451
},
{
"epoch": 3.6775510204081634,
"grad_norm": 0.7257238030433655,
"learning_rate": 5.251282051282052e-05,
"loss": 0.0418,
"step": 452
},
{
"epoch": 3.685714285714286,
"grad_norm": 0.3261127471923828,
"learning_rate": 5.244444444444445e-05,
"loss": 0.0366,
"step": 453
},
{
"epoch": 3.693877551020408,
"grad_norm": 0.27776315808296204,
"learning_rate": 5.237606837606838e-05,
"loss": 0.0238,
"step": 454
},
{
"epoch": 3.7020408163265306,
"grad_norm": 0.43985986709594727,
"learning_rate": 5.230769230769231e-05,
"loss": 0.0304,
"step": 455
},
{
"epoch": 3.710204081632653,
"grad_norm": 0.7832798361778259,
"learning_rate": 5.223931623931625e-05,
"loss": 0.0394,
"step": 456
},
{
"epoch": 3.7183673469387752,
"grad_norm": 0.610369861125946,
"learning_rate": 5.2170940170940175e-05,
"loss": 0.0328,
"step": 457
},
{
"epoch": 3.726530612244898,
"grad_norm": 0.7184517979621887,
"learning_rate": 5.210256410256411e-05,
"loss": 0.0492,
"step": 458
},
{
"epoch": 3.7346938775510203,
"grad_norm": 0.23181170225143433,
"learning_rate": 5.203418803418804e-05,
"loss": 0.0165,
"step": 459
},
{
"epoch": 3.742857142857143,
"grad_norm": 0.440939337015152,
"learning_rate": 5.196581196581197e-05,
"loss": 0.0397,
"step": 460
},
{
"epoch": 3.7510204081632654,
"grad_norm": 2.7026727199554443,
"learning_rate": 5.1897435897435905e-05,
"loss": 0.0126,
"step": 461
},
{
"epoch": 3.759183673469388,
"grad_norm": 0.20904560387134552,
"learning_rate": 5.182905982905983e-05,
"loss": 0.0238,
"step": 462
},
{
"epoch": 3.76734693877551,
"grad_norm": 0.42715728282928467,
"learning_rate": 5.176068376068377e-05,
"loss": 0.032,
"step": 463
},
{
"epoch": 3.7755102040816326,
"grad_norm": 0.6544044613838196,
"learning_rate": 5.16923076923077e-05,
"loss": 0.0357,
"step": 464
},
{
"epoch": 3.783673469387755,
"grad_norm": 0.4051888883113861,
"learning_rate": 5.162393162393163e-05,
"loss": 0.038,
"step": 465
},
{
"epoch": 3.7918367346938773,
"grad_norm": 0.3358502686023712,
"learning_rate": 5.155555555555556e-05,
"loss": 0.0278,
"step": 466
},
{
"epoch": 3.8,
"grad_norm": 0.20578329265117645,
"learning_rate": 5.14871794871795e-05,
"loss": 0.0139,
"step": 467
},
{
"epoch": 3.8081632653061224,
"grad_norm": 0.5573644638061523,
"learning_rate": 5.1418803418803424e-05,
"loss": 0.0494,
"step": 468
},
{
"epoch": 3.816326530612245,
"grad_norm": 0.3017401695251465,
"learning_rate": 5.135042735042736e-05,
"loss": 0.0199,
"step": 469
},
{
"epoch": 3.8244897959183675,
"grad_norm": 0.73221755027771,
"learning_rate": 5.128205128205129e-05,
"loss": 0.0768,
"step": 470
},
{
"epoch": 3.83265306122449,
"grad_norm": 0.3336375951766968,
"learning_rate": 5.1213675213675214e-05,
"loss": 0.0263,
"step": 471
},
{
"epoch": 3.840816326530612,
"grad_norm": 0.38057103753089905,
"learning_rate": 5.114529914529915e-05,
"loss": 0.0224,
"step": 472
},
{
"epoch": 3.8489795918367347,
"grad_norm": 1.4463324546813965,
"learning_rate": 5.1076923076923075e-05,
"loss": 0.0933,
"step": 473
},
{
"epoch": 3.857142857142857,
"grad_norm": 0.20391391217708588,
"learning_rate": 5.100854700854701e-05,
"loss": 0.0358,
"step": 474
},
{
"epoch": 3.8653061224489798,
"grad_norm": 0.9996618628501892,
"learning_rate": 5.0940170940170944e-05,
"loss": 0.0687,
"step": 475
},
{
"epoch": 3.873469387755102,
"grad_norm": 0.2686821520328522,
"learning_rate": 5.087179487179487e-05,
"loss": 0.0226,
"step": 476
},
{
"epoch": 3.8816326530612244,
"grad_norm": 0.49687331914901733,
"learning_rate": 5.0803418803418805e-05,
"loss": 0.0377,
"step": 477
},
{
"epoch": 3.889795918367347,
"grad_norm": 0.6116603016853333,
"learning_rate": 5.073504273504274e-05,
"loss": 0.0668,
"step": 478
},
{
"epoch": 3.8979591836734695,
"grad_norm": 0.27489814162254333,
"learning_rate": 5.066666666666667e-05,
"loss": 0.0201,
"step": 479
},
{
"epoch": 3.906122448979592,
"grad_norm": 0.24494236707687378,
"learning_rate": 5.05982905982906e-05,
"loss": 0.0212,
"step": 480
},
{
"epoch": 3.914285714285714,
"grad_norm": 0.20727446675300598,
"learning_rate": 5.052991452991453e-05,
"loss": 0.0252,
"step": 481
},
{
"epoch": 3.9224489795918367,
"grad_norm": 0.2882131040096283,
"learning_rate": 5.046153846153846e-05,
"loss": 0.0214,
"step": 482
},
{
"epoch": 3.9306122448979592,
"grad_norm": 0.40643471479415894,
"learning_rate": 5.03931623931624e-05,
"loss": 0.0303,
"step": 483
},
{
"epoch": 3.938775510204082,
"grad_norm": 0.25172147154808044,
"learning_rate": 5.0324786324786325e-05,
"loss": 0.0173,
"step": 484
},
{
"epoch": 3.946938775510204,
"grad_norm": 0.5423907041549683,
"learning_rate": 5.025641025641026e-05,
"loss": 0.0612,
"step": 485
},
{
"epoch": 3.9551020408163264,
"grad_norm": 0.6183574795722961,
"learning_rate": 5.018803418803419e-05,
"loss": 0.0504,
"step": 486
},
{
"epoch": 3.963265306122449,
"grad_norm": 0.30423736572265625,
"learning_rate": 5.011965811965812e-05,
"loss": 0.0332,
"step": 487
},
{
"epoch": 3.9714285714285715,
"grad_norm": 0.5488356947898865,
"learning_rate": 5.0051282051282055e-05,
"loss": 0.0318,
"step": 488
},
{
"epoch": 3.979591836734694,
"grad_norm": 0.4721347987651825,
"learning_rate": 4.998290598290599e-05,
"loss": 0.0385,
"step": 489
},
{
"epoch": 3.987755102040816,
"grad_norm": 0.26058709621429443,
"learning_rate": 4.991452991452992e-05,
"loss": 0.0306,
"step": 490
},
{
"epoch": 3.9959183673469387,
"grad_norm": 0.6200685501098633,
"learning_rate": 4.984615384615385e-05,
"loss": 0.0661,
"step": 491
},
{
"epoch": 4.0,
"grad_norm": 0.33986982703208923,
"learning_rate": 4.9777777777777785e-05,
"loss": 0.0131,
"step": 492
},
{
"epoch": 4.0,
"eval_accuracy": 0.9418096418553069,
"eval_f1": 0.9408911746349587,
"eval_loss": 0.11904772371053696,
"eval_precision": 0.9424741467431446,
"eval_recall": 0.9418096418553069,
"eval_runtime": 26.5542,
"eval_samples_per_second": 16.419,
"eval_steps_per_second": 16.419,
"step": 492
},
{
"epoch": 4.0081632653061225,
"grad_norm": 0.37561219930648804,
"learning_rate": 4.970940170940171e-05,
"loss": 0.0331,
"step": 493
},
{
"epoch": 4.016326530612245,
"grad_norm": 0.3382563292980194,
"learning_rate": 4.964102564102565e-05,
"loss": 0.0247,
"step": 494
},
{
"epoch": 4.024489795918368,
"grad_norm": 0.1595546305179596,
"learning_rate": 4.957264957264958e-05,
"loss": 0.01,
"step": 495
},
{
"epoch": 4.03265306122449,
"grad_norm": 0.5388701558113098,
"learning_rate": 4.950427350427351e-05,
"loss": 0.0391,
"step": 496
},
{
"epoch": 4.040816326530612,
"grad_norm": 0.5022732615470886,
"learning_rate": 4.943589743589744e-05,
"loss": 0.0113,
"step": 497
},
{
"epoch": 4.048979591836734,
"grad_norm": 0.6687523126602173,
"learning_rate": 4.936752136752137e-05,
"loss": 0.0489,
"step": 498
},
{
"epoch": 4.057142857142857,
"grad_norm": 0.12865830957889557,
"learning_rate": 4.9299145299145305e-05,
"loss": 0.0044,
"step": 499
},
{
"epoch": 4.0653061224489795,
"grad_norm": 0.2922806739807129,
"learning_rate": 4.923076923076924e-05,
"loss": 0.0204,
"step": 500
},
{
"epoch": 4.073469387755102,
"grad_norm": 0.2284225970506668,
"learning_rate": 4.9162393162393166e-05,
"loss": 0.0385,
"step": 501
},
{
"epoch": 4.081632653061225,
"grad_norm": 0.19399873912334442,
"learning_rate": 4.90940170940171e-05,
"loss": 0.0088,
"step": 502
},
{
"epoch": 4.089795918367347,
"grad_norm": 0.22957640886306763,
"learning_rate": 4.9025641025641035e-05,
"loss": 0.0238,
"step": 503
},
{
"epoch": 4.09795918367347,
"grad_norm": 0.1837630271911621,
"learning_rate": 4.895726495726496e-05,
"loss": 0.0181,
"step": 504
},
{
"epoch": 4.106122448979592,
"grad_norm": 0.17346547544002533,
"learning_rate": 4.88888888888889e-05,
"loss": 0.0134,
"step": 505
},
{
"epoch": 4.114285714285714,
"grad_norm": 0.5421778559684753,
"learning_rate": 4.882051282051283e-05,
"loss": 0.0256,
"step": 506
},
{
"epoch": 4.122448979591836,
"grad_norm": 0.33288803696632385,
"learning_rate": 4.875213675213676e-05,
"loss": 0.0134,
"step": 507
},
{
"epoch": 4.130612244897959,
"grad_norm": 0.44486677646636963,
"learning_rate": 4.8683760683760686e-05,
"loss": 0.0105,
"step": 508
},
{
"epoch": 4.1387755102040815,
"grad_norm": 0.3703673183917999,
"learning_rate": 4.861538461538461e-05,
"loss": 0.0219,
"step": 509
},
{
"epoch": 4.146938775510204,
"grad_norm": 0.3831328749656677,
"learning_rate": 4.854700854700855e-05,
"loss": 0.0211,
"step": 510
},
{
"epoch": 4.155102040816327,
"grad_norm": 0.29360687732696533,
"learning_rate": 4.847863247863248e-05,
"loss": 0.0195,
"step": 511
},
{
"epoch": 4.163265306122449,
"grad_norm": 0.2633644640445709,
"learning_rate": 4.841025641025641e-05,
"loss": 0.0099,
"step": 512
},
{
"epoch": 4.171428571428572,
"grad_norm": 0.8873499631881714,
"learning_rate": 4.8341880341880344e-05,
"loss": 0.0423,
"step": 513
},
{
"epoch": 4.179591836734694,
"grad_norm": 0.6571605801582336,
"learning_rate": 4.827350427350428e-05,
"loss": 0.0374,
"step": 514
},
{
"epoch": 4.187755102040816,
"grad_norm": 0.22063897550106049,
"learning_rate": 4.8205128205128205e-05,
"loss": 0.0098,
"step": 515
},
{
"epoch": 4.1959183673469385,
"grad_norm": 0.5064321756362915,
"learning_rate": 4.813675213675214e-05,
"loss": 0.0316,
"step": 516
},
{
"epoch": 4.204081632653061,
"grad_norm": 0.474624365568161,
"learning_rate": 4.806837606837607e-05,
"loss": 0.0141,
"step": 517
},
{
"epoch": 4.2122448979591836,
"grad_norm": 0.2246454656124115,
"learning_rate": 4.8e-05,
"loss": 0.0101,
"step": 518
},
{
"epoch": 4.220408163265306,
"grad_norm": 0.13797767460346222,
"learning_rate": 4.7931623931623935e-05,
"loss": 0.0135,
"step": 519
},
{
"epoch": 4.228571428571429,
"grad_norm": 0.3289293646812439,
"learning_rate": 4.786324786324786e-05,
"loss": 0.0159,
"step": 520
},
{
"epoch": 4.236734693877551,
"grad_norm": 0.33728495240211487,
"learning_rate": 4.77948717948718e-05,
"loss": 0.029,
"step": 521
},
{
"epoch": 4.244897959183674,
"grad_norm": 0.3062879741191864,
"learning_rate": 4.772649572649573e-05,
"loss": 0.0179,
"step": 522
},
{
"epoch": 4.253061224489796,
"grad_norm": 1.0204277038574219,
"learning_rate": 4.765811965811966e-05,
"loss": 0.0455,
"step": 523
},
{
"epoch": 4.261224489795918,
"grad_norm": 0.4178808927536011,
"learning_rate": 4.758974358974359e-05,
"loss": 0.0213,
"step": 524
},
{
"epoch": 4.2693877551020405,
"grad_norm": 0.31695181131362915,
"learning_rate": 4.752136752136753e-05,
"loss": 0.0304,
"step": 525
},
{
"epoch": 4.277551020408163,
"grad_norm": 0.2975795269012451,
"learning_rate": 4.7452991452991455e-05,
"loss": 0.0175,
"step": 526
},
{
"epoch": 4.285714285714286,
"grad_norm": 0.32276976108551025,
"learning_rate": 4.738461538461539e-05,
"loss": 0.0092,
"step": 527
},
{
"epoch": 4.293877551020408,
"grad_norm": 0.9083264470100403,
"learning_rate": 4.731623931623932e-05,
"loss": 0.0141,
"step": 528
},
{
"epoch": 4.302040816326531,
"grad_norm": 0.6064096689224243,
"learning_rate": 4.724786324786325e-05,
"loss": 0.0345,
"step": 529
},
{
"epoch": 4.310204081632653,
"grad_norm": 1.0197865962982178,
"learning_rate": 4.7179487179487185e-05,
"loss": 0.0766,
"step": 530
},
{
"epoch": 4.318367346938776,
"grad_norm": 0.5525026321411133,
"learning_rate": 4.711111111111112e-05,
"loss": 0.0309,
"step": 531
},
{
"epoch": 4.326530612244898,
"grad_norm": 0.25368353724479675,
"learning_rate": 4.704273504273505e-05,
"loss": 0.0111,
"step": 532
},
{
"epoch": 4.33469387755102,
"grad_norm": 0.8449558019638062,
"learning_rate": 4.697435897435898e-05,
"loss": 0.0422,
"step": 533
},
{
"epoch": 4.3428571428571425,
"grad_norm": 0.33069613575935364,
"learning_rate": 4.6905982905982915e-05,
"loss": 0.012,
"step": 534
},
{
"epoch": 4.351020408163265,
"grad_norm": 0.20474620163440704,
"learning_rate": 4.683760683760684e-05,
"loss": 0.0078,
"step": 535
},
{
"epoch": 4.359183673469388,
"grad_norm": 0.7975245118141174,
"learning_rate": 4.676923076923078e-05,
"loss": 0.0343,
"step": 536
},
{
"epoch": 4.36734693877551,
"grad_norm": 0.1958104521036148,
"learning_rate": 4.6700854700854704e-05,
"loss": 0.0089,
"step": 537
},
{
"epoch": 4.375510204081633,
"grad_norm": 0.1404639035463333,
"learning_rate": 4.663247863247864e-05,
"loss": 0.007,
"step": 538
},
{
"epoch": 4.383673469387755,
"grad_norm": 0.4322687089443207,
"learning_rate": 4.656410256410257e-05,
"loss": 0.0127,
"step": 539
},
{
"epoch": 4.391836734693878,
"grad_norm": 0.3357923924922943,
"learning_rate": 4.64957264957265e-05,
"loss": 0.0283,
"step": 540
},
{
"epoch": 4.4,
"grad_norm": 0.23733913898468018,
"learning_rate": 4.6427350427350435e-05,
"loss": 0.0128,
"step": 541
},
{
"epoch": 4.408163265306122,
"grad_norm": 0.24629896879196167,
"learning_rate": 4.635897435897437e-05,
"loss": 0.0124,
"step": 542
},
{
"epoch": 4.416326530612245,
"grad_norm": 0.38652303814888,
"learning_rate": 4.6290598290598296e-05,
"loss": 0.0167,
"step": 543
},
{
"epoch": 4.424489795918367,
"grad_norm": 0.268097847700119,
"learning_rate": 4.6222222222222224e-05,
"loss": 0.0168,
"step": 544
},
{
"epoch": 4.43265306122449,
"grad_norm": 0.6078333854675293,
"learning_rate": 4.615384615384615e-05,
"loss": 0.0147,
"step": 545
},
{
"epoch": 4.440816326530612,
"grad_norm": 0.12846365571022034,
"learning_rate": 4.6085470085470086e-05,
"loss": 0.0047,
"step": 546
},
{
"epoch": 4.448979591836735,
"grad_norm": 0.23714855313301086,
"learning_rate": 4.601709401709402e-05,
"loss": 0.0142,
"step": 547
},
{
"epoch": 4.457142857142857,
"grad_norm": 0.3480728268623352,
"learning_rate": 4.594871794871795e-05,
"loss": 0.0144,
"step": 548
},
{
"epoch": 4.46530612244898,
"grad_norm": 0.24332262575626373,
"learning_rate": 4.588034188034188e-05,
"loss": 0.0155,
"step": 549
},
{
"epoch": 4.473469387755102,
"grad_norm": 0.13778908550739288,
"learning_rate": 4.5811965811965816e-05,
"loss": 0.0051,
"step": 550
},
{
"epoch": 4.481632653061224,
"grad_norm": 5.389389991760254,
"learning_rate": 4.574358974358974e-05,
"loss": 0.042,
"step": 551
},
{
"epoch": 4.489795918367347,
"grad_norm": 0.2608646750450134,
"learning_rate": 4.567521367521368e-05,
"loss": 0.0233,
"step": 552
},
{
"epoch": 4.497959183673469,
"grad_norm": 0.42670515179634094,
"learning_rate": 4.560683760683761e-05,
"loss": 0.0253,
"step": 553
},
{
"epoch": 4.506122448979592,
"grad_norm": 1.0257692337036133,
"learning_rate": 4.553846153846154e-05,
"loss": 0.0571,
"step": 554
},
{
"epoch": 4.514285714285714,
"grad_norm": 0.7005570530891418,
"learning_rate": 4.5470085470085474e-05,
"loss": 0.0314,
"step": 555
},
{
"epoch": 4.522448979591837,
"grad_norm": 0.39354655146598816,
"learning_rate": 4.54017094017094e-05,
"loss": 0.0286,
"step": 556
},
{
"epoch": 4.530612244897959,
"grad_norm": 0.2712419629096985,
"learning_rate": 4.5333333333333335e-05,
"loss": 0.0186,
"step": 557
},
{
"epoch": 4.538775510204082,
"grad_norm": 0.19760717451572418,
"learning_rate": 4.526495726495727e-05,
"loss": 0.0165,
"step": 558
},
{
"epoch": 4.546938775510204,
"grad_norm": 0.4761720895767212,
"learning_rate": 4.51965811965812e-05,
"loss": 0.0201,
"step": 559
},
{
"epoch": 4.555102040816326,
"grad_norm": 0.29089584946632385,
"learning_rate": 4.512820512820513e-05,
"loss": 0.0094,
"step": 560
},
{
"epoch": 4.563265306122449,
"grad_norm": 0.2839196026325226,
"learning_rate": 4.5059829059829065e-05,
"loss": 0.0109,
"step": 561
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.786836564540863,
"learning_rate": 4.499145299145299e-05,
"loss": 0.0452,
"step": 562
},
{
"epoch": 4.579591836734694,
"grad_norm": 0.719528317451477,
"learning_rate": 4.492307692307693e-05,
"loss": 0.0379,
"step": 563
},
{
"epoch": 4.587755102040816,
"grad_norm": 0.138275608420372,
"learning_rate": 4.485470085470086e-05,
"loss": 0.0128,
"step": 564
},
{
"epoch": 4.595918367346939,
"grad_norm": 0.5043324828147888,
"learning_rate": 4.478632478632479e-05,
"loss": 0.045,
"step": 565
},
{
"epoch": 4.604081632653061,
"grad_norm": 0.2945300042629242,
"learning_rate": 4.471794871794872e-05,
"loss": 0.0258,
"step": 566
},
{
"epoch": 4.612244897959184,
"grad_norm": 0.14936916530132294,
"learning_rate": 4.464957264957266e-05,
"loss": 0.0069,
"step": 567
},
{
"epoch": 4.6204081632653065,
"grad_norm": 0.23340395092964172,
"learning_rate": 4.4581196581196585e-05,
"loss": 0.0179,
"step": 568
},
{
"epoch": 4.628571428571428,
"grad_norm": 0.49968650937080383,
"learning_rate": 4.451282051282052e-05,
"loss": 0.0225,
"step": 569
},
{
"epoch": 4.636734693877551,
"grad_norm": 0.25828638672828674,
"learning_rate": 4.444444444444445e-05,
"loss": 0.0163,
"step": 570
},
{
"epoch": 4.644897959183673,
"grad_norm": 0.853486955165863,
"learning_rate": 4.437606837606838e-05,
"loss": 0.0136,
"step": 571
},
{
"epoch": 4.653061224489796,
"grad_norm": 0.9049561619758606,
"learning_rate": 4.4307692307692315e-05,
"loss": 0.0393,
"step": 572
},
{
"epoch": 4.661224489795918,
"grad_norm": 0.2781153619289398,
"learning_rate": 4.423931623931624e-05,
"loss": 0.0171,
"step": 573
},
{
"epoch": 4.669387755102041,
"grad_norm": 0.29508426785469055,
"learning_rate": 4.417094017094018e-05,
"loss": 0.0184,
"step": 574
},
{
"epoch": 4.677551020408163,
"grad_norm": 0.35638830065727234,
"learning_rate": 4.410256410256411e-05,
"loss": 0.0219,
"step": 575
},
{
"epoch": 4.685714285714286,
"grad_norm": 0.4791906476020813,
"learning_rate": 4.403418803418804e-05,
"loss": 0.0155,
"step": 576
},
{
"epoch": 4.6938775510204085,
"grad_norm": 0.48481646180152893,
"learning_rate": 4.396581196581197e-05,
"loss": 0.0333,
"step": 577
},
{
"epoch": 4.70204081632653,
"grad_norm": 0.6990826725959778,
"learning_rate": 4.389743589743591e-05,
"loss": 0.0199,
"step": 578
},
{
"epoch": 4.710204081632653,
"grad_norm": 0.18636147677898407,
"learning_rate": 4.3829059829059834e-05,
"loss": 0.0071,
"step": 579
},
{
"epoch": 4.718367346938775,
"grad_norm": 0.3480212986469269,
"learning_rate": 4.376068376068377e-05,
"loss": 0.026,
"step": 580
},
{
"epoch": 4.726530612244898,
"grad_norm": 0.43473413586616516,
"learning_rate": 4.369230769230769e-05,
"loss": 0.0078,
"step": 581
},
{
"epoch": 4.73469387755102,
"grad_norm": 0.4350218176841736,
"learning_rate": 4.3623931623931624e-05,
"loss": 0.0193,
"step": 582
},
{
"epoch": 4.742857142857143,
"grad_norm": 0.11488892883062363,
"learning_rate": 4.355555555555556e-05,
"loss": 0.0079,
"step": 583
},
{
"epoch": 4.751020408163265,
"grad_norm": 0.3166949152946472,
"learning_rate": 4.3487179487179485e-05,
"loss": 0.017,
"step": 584
},
{
"epoch": 4.759183673469388,
"grad_norm": 0.6318601369857788,
"learning_rate": 4.341880341880342e-05,
"loss": 0.0156,
"step": 585
},
{
"epoch": 4.7673469387755105,
"grad_norm": 0.47680795192718506,
"learning_rate": 4.3350427350427354e-05,
"loss": 0.0325,
"step": 586
},
{
"epoch": 4.775510204081632,
"grad_norm": 0.1389036327600479,
"learning_rate": 4.328205128205128e-05,
"loss": 0.0151,
"step": 587
},
{
"epoch": 4.783673469387755,
"grad_norm": 0.4114654064178467,
"learning_rate": 4.3213675213675216e-05,
"loss": 0.0261,
"step": 588
},
{
"epoch": 4.791836734693877,
"grad_norm": 0.3760494291782379,
"learning_rate": 4.314529914529915e-05,
"loss": 0.0149,
"step": 589
},
{
"epoch": 4.8,
"grad_norm": 0.3259177803993225,
"learning_rate": 4.307692307692308e-05,
"loss": 0.0105,
"step": 590
},
{
"epoch": 4.808163265306122,
"grad_norm": 0.19598586857318878,
"learning_rate": 4.300854700854701e-05,
"loss": 0.0192,
"step": 591
},
{
"epoch": 4.816326530612245,
"grad_norm": 0.24014200270175934,
"learning_rate": 4.294017094017094e-05,
"loss": 0.0238,
"step": 592
},
{
"epoch": 4.8244897959183675,
"grad_norm": 0.25813162326812744,
"learning_rate": 4.287179487179487e-05,
"loss": 0.0169,
"step": 593
},
{
"epoch": 4.83265306122449,
"grad_norm": 0.15530163049697876,
"learning_rate": 4.280341880341881e-05,
"loss": 0.0113,
"step": 594
},
{
"epoch": 4.840816326530613,
"grad_norm": 0.9454264044761658,
"learning_rate": 4.2735042735042735e-05,
"loss": 0.071,
"step": 595
},
{
"epoch": 4.848979591836734,
"grad_norm": 0.4564417004585266,
"learning_rate": 4.266666666666667e-05,
"loss": 0.028,
"step": 596
},
{
"epoch": 4.857142857142857,
"grad_norm": 0.41204723715782166,
"learning_rate": 4.2598290598290604e-05,
"loss": 0.0337,
"step": 597
},
{
"epoch": 4.865306122448979,
"grad_norm": 0.2819851338863373,
"learning_rate": 4.252991452991453e-05,
"loss": 0.0286,
"step": 598
},
{
"epoch": 4.873469387755102,
"grad_norm": 1.5066794157028198,
"learning_rate": 4.2461538461538465e-05,
"loss": 0.0428,
"step": 599
},
{
"epoch": 4.881632653061224,
"grad_norm": 0.5286157131195068,
"learning_rate": 4.23931623931624e-05,
"loss": 0.0189,
"step": 600
},
{
"epoch": 4.889795918367347,
"grad_norm": 0.94499272108078,
"learning_rate": 4.232478632478633e-05,
"loss": 0.0531,
"step": 601
},
{
"epoch": 4.8979591836734695,
"grad_norm": 0.10032657533884048,
"learning_rate": 4.225641025641026e-05,
"loss": 0.0081,
"step": 602
},
{
"epoch": 4.906122448979592,
"grad_norm": 0.23001764714717865,
"learning_rate": 4.2188034188034195e-05,
"loss": 0.0209,
"step": 603
},
{
"epoch": 4.914285714285715,
"grad_norm": 0.34312617778778076,
"learning_rate": 4.211965811965812e-05,
"loss": 0.0286,
"step": 604
},
{
"epoch": 4.922448979591836,
"grad_norm": 0.3228893280029297,
"learning_rate": 4.205128205128206e-05,
"loss": 0.0197,
"step": 605
},
{
"epoch": 4.930612244897959,
"grad_norm": 0.48081985116004944,
"learning_rate": 4.198290598290599e-05,
"loss": 0.022,
"step": 606
},
{
"epoch": 4.938775510204081,
"grad_norm": 0.18656276166439056,
"learning_rate": 4.191452991452992e-05,
"loss": 0.0157,
"step": 607
},
{
"epoch": 4.946938775510204,
"grad_norm": 0.29330140352249146,
"learning_rate": 4.184615384615385e-05,
"loss": 0.0266,
"step": 608
},
{
"epoch": 4.955102040816326,
"grad_norm": 0.22990168631076813,
"learning_rate": 4.177777777777778e-05,
"loss": 0.0094,
"step": 609
},
{
"epoch": 4.963265306122449,
"grad_norm": 0.31795695424079895,
"learning_rate": 4.1709401709401715e-05,
"loss": 0.0248,
"step": 610
},
{
"epoch": 4.9714285714285715,
"grad_norm": 0.5349370241165161,
"learning_rate": 4.164102564102565e-05,
"loss": 0.0126,
"step": 611
},
{
"epoch": 4.979591836734694,
"grad_norm": 0.08685897290706635,
"learning_rate": 4.1572649572649577e-05,
"loss": 0.0089,
"step": 612
},
{
"epoch": 4.987755102040817,
"grad_norm": 0.17170557379722595,
"learning_rate": 4.150427350427351e-05,
"loss": 0.024,
"step": 613
},
{
"epoch": 4.995918367346938,
"grad_norm": 0.19321708381175995,
"learning_rate": 4.1435897435897445e-05,
"loss": 0.0134,
"step": 614
},
{
"epoch": 5.0,
"grad_norm": 0.31885647773742676,
"learning_rate": 4.136752136752137e-05,
"loss": 0.0177,
"step": 615
},
{
"epoch": 5.0,
"eval_accuracy": 0.9421358209928893,
"eval_f1": 0.94177929748661,
"eval_loss": 0.11898548156023026,
"eval_precision": 0.9418441520307643,
"eval_recall": 0.9421358209928893,
"eval_runtime": 24.9117,
"eval_samples_per_second": 17.502,
"eval_steps_per_second": 17.502,
"step": 615
},
{
"epoch": 5.0081632653061225,
"grad_norm": 0.152436301112175,
"learning_rate": 4.129914529914531e-05,
"loss": 0.0126,
"step": 616
},
{
"epoch": 5.016326530612245,
"grad_norm": 0.29216599464416504,
"learning_rate": 4.123076923076923e-05,
"loss": 0.0227,
"step": 617
},
{
"epoch": 5.024489795918368,
"grad_norm": 0.28651708364486694,
"learning_rate": 4.116239316239316e-05,
"loss": 0.0212,
"step": 618
},
{
"epoch": 5.03265306122449,
"grad_norm": 0.4106895327568054,
"learning_rate": 4.1094017094017096e-05,
"loss": 0.0133,
"step": 619
},
{
"epoch": 5.040816326530612,
"grad_norm": 0.41109445691108704,
"learning_rate": 4.1025641025641023e-05,
"loss": 0.0327,
"step": 620
},
{
"epoch": 5.048979591836734,
"grad_norm": 0.16403941810131073,
"learning_rate": 4.095726495726496e-05,
"loss": 0.0147,
"step": 621
},
{
"epoch": 5.057142857142857,
"grad_norm": 0.06824915111064911,
"learning_rate": 4.088888888888889e-05,
"loss": 0.0034,
"step": 622
},
{
"epoch": 5.0653061224489795,
"grad_norm": 0.27841898798942566,
"learning_rate": 4.082051282051282e-05,
"loss": 0.0089,
"step": 623
},
{
"epoch": 5.073469387755102,
"grad_norm": 0.4965389370918274,
"learning_rate": 4.0752136752136754e-05,
"loss": 0.0341,
"step": 624
},
{
"epoch": 5.081632653061225,
"grad_norm": 0.13556616008281708,
"learning_rate": 4.068376068376069e-05,
"loss": 0.0066,
"step": 625
},
{
"epoch": 5.089795918367347,
"grad_norm": 0.17582395672798157,
"learning_rate": 4.0615384615384615e-05,
"loss": 0.0121,
"step": 626
},
{
"epoch": 5.09795918367347,
"grad_norm": 0.1906500905752182,
"learning_rate": 4.054700854700855e-05,
"loss": 0.0121,
"step": 627
},
{
"epoch": 5.106122448979592,
"grad_norm": 0.10774006694555283,
"learning_rate": 4.047863247863248e-05,
"loss": 0.003,
"step": 628
},
{
"epoch": 5.114285714285714,
"grad_norm": 0.2364247441291809,
"learning_rate": 4.041025641025641e-05,
"loss": 0.0204,
"step": 629
},
{
"epoch": 5.122448979591836,
"grad_norm": 0.296677827835083,
"learning_rate": 4.0341880341880346e-05,
"loss": 0.0257,
"step": 630
},
{
"epoch": 5.130612244897959,
"grad_norm": 1.0964839458465576,
"learning_rate": 4.027350427350427e-05,
"loss": 0.0143,
"step": 631
},
{
"epoch": 5.1387755102040815,
"grad_norm": 0.14557228982448578,
"learning_rate": 4.020512820512821e-05,
"loss": 0.0083,
"step": 632
},
{
"epoch": 5.146938775510204,
"grad_norm": 0.10422372817993164,
"learning_rate": 4.013675213675214e-05,
"loss": 0.007,
"step": 633
},
{
"epoch": 5.155102040816327,
"grad_norm": 0.13321611285209656,
"learning_rate": 4.006837606837607e-05,
"loss": 0.0025,
"step": 634
},
{
"epoch": 5.163265306122449,
"grad_norm": 0.16438782215118408,
"learning_rate": 4e-05,
"loss": 0.0069,
"step": 635
},
{
"epoch": 5.171428571428572,
"grad_norm": 0.14023995399475098,
"learning_rate": 3.993162393162394e-05,
"loss": 0.009,
"step": 636
},
{
"epoch": 5.179591836734694,
"grad_norm": 0.1476777046918869,
"learning_rate": 3.9863247863247865e-05,
"loss": 0.0071,
"step": 637
},
{
"epoch": 5.187755102040816,
"grad_norm": 0.10355421155691147,
"learning_rate": 3.97948717948718e-05,
"loss": 0.0035,
"step": 638
},
{
"epoch": 5.1959183673469385,
"grad_norm": 0.20340070128440857,
"learning_rate": 3.9726495726495733e-05,
"loss": 0.0122,
"step": 639
},
{
"epoch": 5.204081632653061,
"grad_norm": 0.20554479956626892,
"learning_rate": 3.965811965811966e-05,
"loss": 0.0049,
"step": 640
},
{
"epoch": 5.2122448979591836,
"grad_norm": 0.8425185680389404,
"learning_rate": 3.9589743589743595e-05,
"loss": 0.051,
"step": 641
},
{
"epoch": 5.220408163265306,
"grad_norm": 0.21183599531650543,
"learning_rate": 3.952136752136753e-05,
"loss": 0.0112,
"step": 642
},
{
"epoch": 5.228571428571429,
"grad_norm": 0.31147268414497375,
"learning_rate": 3.945299145299146e-05,
"loss": 0.0253,
"step": 643
},
{
"epoch": 5.236734693877551,
"grad_norm": 0.14557214081287384,
"learning_rate": 3.938461538461539e-05,
"loss": 0.0122,
"step": 644
},
{
"epoch": 5.244897959183674,
"grad_norm": 0.3843270540237427,
"learning_rate": 3.931623931623932e-05,
"loss": 0.0137,
"step": 645
},
{
"epoch": 5.253061224489796,
"grad_norm": 0.29683414101600647,
"learning_rate": 3.9247863247863246e-05,
"loss": 0.0152,
"step": 646
},
{
"epoch": 5.261224489795918,
"grad_norm": 0.16846010088920593,
"learning_rate": 3.917948717948718e-05,
"loss": 0.0062,
"step": 647
},
{
"epoch": 5.2693877551020405,
"grad_norm": 0.6791422963142395,
"learning_rate": 3.9111111111111115e-05,
"loss": 0.0321,
"step": 648
},
{
"epoch": 5.277551020408163,
"grad_norm": 0.09047195315361023,
"learning_rate": 3.904273504273504e-05,
"loss": 0.0015,
"step": 649
},
{
"epoch": 5.285714285714286,
"grad_norm": 0.1297522634267807,
"learning_rate": 3.8974358974358976e-05,
"loss": 0.004,
"step": 650
},
{
"epoch": 5.293877551020408,
"grad_norm": 0.11985556781291962,
"learning_rate": 3.890598290598291e-05,
"loss": 0.0037,
"step": 651
},
{
"epoch": 5.302040816326531,
"grad_norm": 0.22316373884677887,
"learning_rate": 3.883760683760684e-05,
"loss": 0.011,
"step": 652
},
{
"epoch": 5.310204081632653,
"grad_norm": 0.4095396399497986,
"learning_rate": 3.876923076923077e-05,
"loss": 0.0297,
"step": 653
},
{
"epoch": 5.318367346938776,
"grad_norm": 0.49460023641586304,
"learning_rate": 3.8700854700854707e-05,
"loss": 0.026,
"step": 654
},
{
"epoch": 5.326530612244898,
"grad_norm": 0.01936427690088749,
"learning_rate": 3.8632478632478634e-05,
"loss": 0.0005,
"step": 655
},
{
"epoch": 5.33469387755102,
"grad_norm": 0.22673501074314117,
"learning_rate": 3.856410256410257e-05,
"loss": 0.0096,
"step": 656
},
{
"epoch": 5.3428571428571425,
"grad_norm": 0.24311856925487518,
"learning_rate": 3.84957264957265e-05,
"loss": 0.0092,
"step": 657
},
{
"epoch": 5.351020408163265,
"grad_norm": 0.15321022272109985,
"learning_rate": 3.842735042735043e-05,
"loss": 0.0057,
"step": 658
},
{
"epoch": 5.359183673469388,
"grad_norm": 0.9374060034751892,
"learning_rate": 3.8358974358974364e-05,
"loss": 0.0191,
"step": 659
},
{
"epoch": 5.36734693877551,
"grad_norm": 0.10835573077201843,
"learning_rate": 3.82905982905983e-05,
"loss": 0.0034,
"step": 660
},
{
"epoch": 5.375510204081633,
"grad_norm": 0.18718655407428741,
"learning_rate": 3.8222222222222226e-05,
"loss": 0.009,
"step": 661
},
{
"epoch": 5.383673469387755,
"grad_norm": 0.2158612310886383,
"learning_rate": 3.815384615384616e-05,
"loss": 0.0121,
"step": 662
},
{
"epoch": 5.391836734693878,
"grad_norm": 0.26239001750946045,
"learning_rate": 3.808547008547009e-05,
"loss": 0.0173,
"step": 663
},
{
"epoch": 5.4,
"grad_norm": 0.05576225370168686,
"learning_rate": 3.8017094017094015e-05,
"loss": 0.0013,
"step": 664
},
{
"epoch": 5.408163265306122,
"grad_norm": 0.5364289879798889,
"learning_rate": 3.794871794871795e-05,
"loss": 0.028,
"step": 665
},
{
"epoch": 5.416326530612245,
"grad_norm": 0.2207004874944687,
"learning_rate": 3.7880341880341884e-05,
"loss": 0.0096,
"step": 666
},
{
"epoch": 5.424489795918367,
"grad_norm": 0.22549040615558624,
"learning_rate": 3.781196581196581e-05,
"loss": 0.0067,
"step": 667
},
{
"epoch": 5.43265306122449,
"grad_norm": 0.1842697411775589,
"learning_rate": 3.7743589743589745e-05,
"loss": 0.0081,
"step": 668
},
{
"epoch": 5.440816326530612,
"grad_norm": 0.34317877888679504,
"learning_rate": 3.767521367521368e-05,
"loss": 0.008,
"step": 669
},
{
"epoch": 5.448979591836735,
"grad_norm": 0.0797891840338707,
"learning_rate": 3.760683760683761e-05,
"loss": 0.0027,
"step": 670
},
{
"epoch": 5.457142857142857,
"grad_norm": 0.09596482664346695,
"learning_rate": 3.753846153846154e-05,
"loss": 0.004,
"step": 671
},
{
"epoch": 5.46530612244898,
"grad_norm": 0.13847698271274567,
"learning_rate": 3.7470085470085476e-05,
"loss": 0.008,
"step": 672
},
{
"epoch": 5.473469387755102,
"grad_norm": 0.4299347400665283,
"learning_rate": 3.74017094017094e-05,
"loss": 0.0129,
"step": 673
},
{
"epoch": 5.481632653061224,
"grad_norm": 0.37290269136428833,
"learning_rate": 3.733333333333334e-05,
"loss": 0.009,
"step": 674
},
{
"epoch": 5.489795918367347,
"grad_norm": 0.1274859458208084,
"learning_rate": 3.726495726495727e-05,
"loss": 0.0026,
"step": 675
},
{
"epoch": 5.497959183673469,
"grad_norm": 0.055473342537879944,
"learning_rate": 3.71965811965812e-05,
"loss": 0.0011,
"step": 676
},
{
"epoch": 5.506122448979592,
"grad_norm": 0.41247642040252686,
"learning_rate": 3.712820512820513e-05,
"loss": 0.0205,
"step": 677
},
{
"epoch": 5.514285714285714,
"grad_norm": 0.37154918909072876,
"learning_rate": 3.705982905982907e-05,
"loss": 0.0163,
"step": 678
},
{
"epoch": 5.522448979591837,
"grad_norm": 0.1918550282716751,
"learning_rate": 3.6991452991452995e-05,
"loss": 0.009,
"step": 679
},
{
"epoch": 5.530612244897959,
"grad_norm": 0.15373460948467255,
"learning_rate": 3.692307692307693e-05,
"loss": 0.0031,
"step": 680
},
{
"epoch": 5.538775510204082,
"grad_norm": 0.20702648162841797,
"learning_rate": 3.685470085470086e-05,
"loss": 0.0096,
"step": 681
},
{
"epoch": 5.546938775510204,
"grad_norm": 0.1343519240617752,
"learning_rate": 3.6786324786324784e-05,
"loss": 0.0082,
"step": 682
},
{
"epoch": 5.555102040816326,
"grad_norm": 0.12029368430376053,
"learning_rate": 3.671794871794872e-05,
"loss": 0.0018,
"step": 683
},
{
"epoch": 5.563265306122449,
"grad_norm": 0.28310906887054443,
"learning_rate": 3.664957264957265e-05,
"loss": 0.0103,
"step": 684
},
{
"epoch": 5.571428571428571,
"grad_norm": 0.2390466034412384,
"learning_rate": 3.658119658119658e-05,
"loss": 0.0053,
"step": 685
},
{
"epoch": 5.579591836734694,
"grad_norm": 0.24921439588069916,
"learning_rate": 3.6512820512820514e-05,
"loss": 0.0089,
"step": 686
},
{
"epoch": 5.587755102040816,
"grad_norm": 0.3118392527103424,
"learning_rate": 3.644444444444445e-05,
"loss": 0.0121,
"step": 687
},
{
"epoch": 5.595918367346939,
"grad_norm": 0.18694745004177094,
"learning_rate": 3.6376068376068376e-05,
"loss": 0.0103,
"step": 688
},
{
"epoch": 5.604081632653061,
"grad_norm": 0.19770139455795288,
"learning_rate": 3.630769230769231e-05,
"loss": 0.0067,
"step": 689
},
{
"epoch": 5.612244897959184,
"grad_norm": 0.7333056926727295,
"learning_rate": 3.6239316239316245e-05,
"loss": 0.013,
"step": 690
},
{
"epoch": 5.6204081632653065,
"grad_norm": 0.17407962679862976,
"learning_rate": 3.617094017094017e-05,
"loss": 0.0082,
"step": 691
},
{
"epoch": 5.628571428571428,
"grad_norm": 0.20941323041915894,
"learning_rate": 3.6102564102564106e-05,
"loss": 0.0085,
"step": 692
},
{
"epoch": 5.636734693877551,
"grad_norm": 0.08693760633468628,
"learning_rate": 3.603418803418804e-05,
"loss": 0.0058,
"step": 693
},
{
"epoch": 5.644897959183673,
"grad_norm": 0.5758926272392273,
"learning_rate": 3.596581196581197e-05,
"loss": 0.0086,
"step": 694
},
{
"epoch": 5.653061224489796,
"grad_norm": 0.34267696738243103,
"learning_rate": 3.58974358974359e-05,
"loss": 0.003,
"step": 695
},
{
"epoch": 5.661224489795918,
"grad_norm": 0.43159055709838867,
"learning_rate": 3.5829059829059837e-05,
"loss": 0.0182,
"step": 696
},
{
"epoch": 5.669387755102041,
"grad_norm": 0.23170888423919678,
"learning_rate": 3.5760683760683764e-05,
"loss": 0.0015,
"step": 697
},
{
"epoch": 5.677551020408163,
"grad_norm": 0.23891058564186096,
"learning_rate": 3.56923076923077e-05,
"loss": 0.009,
"step": 698
},
{
"epoch": 5.685714285714286,
"grad_norm": 0.28715649247169495,
"learning_rate": 3.5623931623931626e-05,
"loss": 0.0053,
"step": 699
},
{
"epoch": 5.6938775510204085,
"grad_norm": 0.6357800364494324,
"learning_rate": 3.555555555555555e-05,
"loss": 0.0117,
"step": 700
},
{
"epoch": 5.70204081632653,
"grad_norm": 0.8419680595397949,
"learning_rate": 3.548717948717949e-05,
"loss": 0.0252,
"step": 701
},
{
"epoch": 5.710204081632653,
"grad_norm": 0.5171332359313965,
"learning_rate": 3.541880341880342e-05,
"loss": 0.0181,
"step": 702
},
{
"epoch": 5.718367346938775,
"grad_norm": 0.10230698436498642,
"learning_rate": 3.535042735042735e-05,
"loss": 0.008,
"step": 703
},
{
"epoch": 5.726530612244898,
"grad_norm": 0.2651132643222809,
"learning_rate": 3.5282051282051283e-05,
"loss": 0.0113,
"step": 704
},
{
"epoch": 5.73469387755102,
"grad_norm": 0.17068640887737274,
"learning_rate": 3.521367521367522e-05,
"loss": 0.007,
"step": 705
},
{
"epoch": 5.742857142857143,
"grad_norm": 0.3176792562007904,
"learning_rate": 3.5145299145299145e-05,
"loss": 0.012,
"step": 706
},
{
"epoch": 5.751020408163265,
"grad_norm": 0.5791796445846558,
"learning_rate": 3.507692307692308e-05,
"loss": 0.0185,
"step": 707
},
{
"epoch": 5.759183673469388,
"grad_norm": 0.193172425031662,
"learning_rate": 3.5008547008547014e-05,
"loss": 0.0184,
"step": 708
},
{
"epoch": 5.7673469387755105,
"grad_norm": 0.38892728090286255,
"learning_rate": 3.494017094017094e-05,
"loss": 0.01,
"step": 709
},
{
"epoch": 5.775510204081632,
"grad_norm": 0.17030949890613556,
"learning_rate": 3.4871794871794875e-05,
"loss": 0.0036,
"step": 710
},
{
"epoch": 5.783673469387755,
"grad_norm": 0.27598991990089417,
"learning_rate": 3.480341880341881e-05,
"loss": 0.0221,
"step": 711
},
{
"epoch": 5.791836734693877,
"grad_norm": 0.16970708966255188,
"learning_rate": 3.473504273504274e-05,
"loss": 0.0054,
"step": 712
},
{
"epoch": 5.8,
"grad_norm": 0.3334875702857971,
"learning_rate": 3.466666666666667e-05,
"loss": 0.0121,
"step": 713
},
{
"epoch": 5.808163265306122,
"grad_norm": 0.044451236724853516,
"learning_rate": 3.4598290598290606e-05,
"loss": 0.0019,
"step": 714
},
{
"epoch": 5.816326530612245,
"grad_norm": 0.1450454741716385,
"learning_rate": 3.452991452991453e-05,
"loss": 0.009,
"step": 715
},
{
"epoch": 5.8244897959183675,
"grad_norm": 0.19222688674926758,
"learning_rate": 3.446153846153847e-05,
"loss": 0.0081,
"step": 716
},
{
"epoch": 5.83265306122449,
"grad_norm": 0.09491467475891113,
"learning_rate": 3.4393162393162395e-05,
"loss": 0.001,
"step": 717
},
{
"epoch": 5.840816326530613,
"grad_norm": 0.1450124979019165,
"learning_rate": 3.432478632478632e-05,
"loss": 0.0109,
"step": 718
},
{
"epoch": 5.848979591836734,
"grad_norm": 0.2537761330604553,
"learning_rate": 3.4256410256410256e-05,
"loss": 0.0188,
"step": 719
},
{
"epoch": 5.857142857142857,
"grad_norm": 0.19120068848133087,
"learning_rate": 3.418803418803419e-05,
"loss": 0.0096,
"step": 720
},
{
"epoch": 5.865306122448979,
"grad_norm": 0.06364692002534866,
"learning_rate": 3.411965811965812e-05,
"loss": 0.0019,
"step": 721
},
{
"epoch": 5.873469387755102,
"grad_norm": 0.22512775659561157,
"learning_rate": 3.405128205128205e-05,
"loss": 0.0057,
"step": 722
},
{
"epoch": 5.881632653061224,
"grad_norm": 0.21793381869792938,
"learning_rate": 3.398290598290599e-05,
"loss": 0.0114,
"step": 723
},
{
"epoch": 5.889795918367347,
"grad_norm": 1.5978583097457886,
"learning_rate": 3.3914529914529914e-05,
"loss": 0.0324,
"step": 724
},
{
"epoch": 5.8979591836734695,
"grad_norm": 0.22985956072807312,
"learning_rate": 3.384615384615385e-05,
"loss": 0.0112,
"step": 725
},
{
"epoch": 5.906122448979592,
"grad_norm": 0.24227608740329742,
"learning_rate": 3.377777777777778e-05,
"loss": 0.0128,
"step": 726
},
{
"epoch": 5.914285714285715,
"grad_norm": 0.1724122315645218,
"learning_rate": 3.370940170940171e-05,
"loss": 0.0086,
"step": 727
},
{
"epoch": 5.922448979591836,
"grad_norm": 0.15982292592525482,
"learning_rate": 3.3641025641025644e-05,
"loss": 0.0078,
"step": 728
},
{
"epoch": 5.930612244897959,
"grad_norm": 0.12565600872039795,
"learning_rate": 3.357264957264958e-05,
"loss": 0.0027,
"step": 729
},
{
"epoch": 5.938775510204081,
"grad_norm": 0.2536553144454956,
"learning_rate": 3.3504273504273506e-05,
"loss": 0.0114,
"step": 730
},
{
"epoch": 5.946938775510204,
"grad_norm": 0.15599173307418823,
"learning_rate": 3.343589743589744e-05,
"loss": 0.0077,
"step": 731
},
{
"epoch": 5.955102040816326,
"grad_norm": 0.185344398021698,
"learning_rate": 3.3367521367521375e-05,
"loss": 0.0017,
"step": 732
},
{
"epoch": 5.963265306122449,
"grad_norm": 0.08160512149333954,
"learning_rate": 3.32991452991453e-05,
"loss": 0.0026,
"step": 733
},
{
"epoch": 5.9714285714285715,
"grad_norm": 0.18903043866157532,
"learning_rate": 3.3230769230769236e-05,
"loss": 0.0063,
"step": 734
},
{
"epoch": 5.979591836734694,
"grad_norm": 0.1677568107843399,
"learning_rate": 3.316239316239317e-05,
"loss": 0.0059,
"step": 735
},
{
"epoch": 5.987755102040817,
"grad_norm": 0.1872265338897705,
"learning_rate": 3.309401709401709e-05,
"loss": 0.0067,
"step": 736
},
{
"epoch": 5.995918367346938,
"grad_norm": 0.1347961127758026,
"learning_rate": 3.3025641025641025e-05,
"loss": 0.0046,
"step": 737
},
{
"epoch": 6.0,
"grad_norm": 0.05940549820661545,
"learning_rate": 3.295726495726496e-05,
"loss": 0.0019,
"step": 738
},
{
"epoch": 6.0,
"eval_accuracy": 0.9420053493378564,
"eval_f1": 0.9416409769383864,
"eval_loss": 0.15586227178573608,
"eval_precision": 0.9417140689962101,
"eval_recall": 0.9420053493378564,
"eval_runtime": 24.6319,
"eval_samples_per_second": 17.701,
"eval_steps_per_second": 17.701,
"step": 738
},
{
"epoch": 6.0081632653061225,
"grad_norm": 0.07184652984142303,
"learning_rate": 3.288888888888889e-05,
"loss": 0.0024,
"step": 739
},
{
"epoch": 6.016326530612245,
"grad_norm": 0.3214910328388214,
"learning_rate": 3.282051282051282e-05,
"loss": 0.0068,
"step": 740
},
{
"epoch": 6.024489795918368,
"grad_norm": 0.22907869517803192,
"learning_rate": 3.2752136752136756e-05,
"loss": 0.0084,
"step": 741
},
{
"epoch": 6.03265306122449,
"grad_norm": 0.22705793380737305,
"learning_rate": 3.268376068376068e-05,
"loss": 0.0049,
"step": 742
},
{
"epoch": 6.040816326530612,
"grad_norm": 0.14635081589221954,
"learning_rate": 3.261538461538462e-05,
"loss": 0.0102,
"step": 743
},
{
"epoch": 6.048979591836734,
"grad_norm": 0.19782468676567078,
"learning_rate": 3.254700854700855e-05,
"loss": 0.0044,
"step": 744
},
{
"epoch": 6.057142857142857,
"grad_norm": 0.08663511276245117,
"learning_rate": 3.247863247863248e-05,
"loss": 0.0052,
"step": 745
},
{
"epoch": 6.0653061224489795,
"grad_norm": 0.056835856288671494,
"learning_rate": 3.2410256410256413e-05,
"loss": 0.0015,
"step": 746
},
{
"epoch": 6.073469387755102,
"grad_norm": 0.08768238127231598,
"learning_rate": 3.234188034188035e-05,
"loss": 0.0023,
"step": 747
},
{
"epoch": 6.081632653061225,
"grad_norm": 0.23458008468151093,
"learning_rate": 3.2273504273504275e-05,
"loss": 0.0131,
"step": 748
},
{
"epoch": 6.089795918367347,
"grad_norm": 0.10216531157493591,
"learning_rate": 3.220512820512821e-05,
"loss": 0.0032,
"step": 749
},
{
"epoch": 6.09795918367347,
"grad_norm": 0.27003157138824463,
"learning_rate": 3.2136752136752144e-05,
"loss": 0.0167,
"step": 750
},
{
"epoch": 6.106122448979592,
"grad_norm": 0.1322830319404602,
"learning_rate": 3.206837606837607e-05,
"loss": 0.0074,
"step": 751
},
{
"epoch": 6.114285714285714,
"grad_norm": 0.06595811247825623,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0029,
"step": 752
},
{
"epoch": 6.122448979591836,
"grad_norm": 0.11620360612869263,
"learning_rate": 3.193162393162394e-05,
"loss": 0.0077,
"step": 753
},
{
"epoch": 6.130612244897959,
"grad_norm": 0.04071362689137459,
"learning_rate": 3.186324786324787e-05,
"loss": 0.0013,
"step": 754
},
{
"epoch": 6.1387755102040815,
"grad_norm": 0.037312667816877365,
"learning_rate": 3.1794871794871795e-05,
"loss": 0.0007,
"step": 755
},
{
"epoch": 6.146938775510204,
"grad_norm": 0.11435768008232117,
"learning_rate": 3.172649572649573e-05,
"loss": 0.0016,
"step": 756
},
{
"epoch": 6.155102040816327,
"grad_norm": 0.1955532729625702,
"learning_rate": 3.1658119658119656e-05,
"loss": 0.0134,
"step": 757
},
{
"epoch": 6.163265306122449,
"grad_norm": 0.06615955382585526,
"learning_rate": 3.158974358974359e-05,
"loss": 0.0015,
"step": 758
},
{
"epoch": 6.171428571428572,
"grad_norm": 0.1115867868065834,
"learning_rate": 3.1521367521367525e-05,
"loss": 0.0023,
"step": 759
},
{
"epoch": 6.179591836734694,
"grad_norm": 0.13121837377548218,
"learning_rate": 3.145299145299145e-05,
"loss": 0.0035,
"step": 760
},
{
"epoch": 6.187755102040816,
"grad_norm": 0.16537025570869446,
"learning_rate": 3.1384615384615386e-05,
"loss": 0.0022,
"step": 761
},
{
"epoch": 6.1959183673469385,
"grad_norm": 0.16818967461585999,
"learning_rate": 3.131623931623932e-05,
"loss": 0.0075,
"step": 762
},
{
"epoch": 6.204081632653061,
"grad_norm": 0.16630838811397552,
"learning_rate": 3.124786324786325e-05,
"loss": 0.0085,
"step": 763
},
{
"epoch": 6.2122448979591836,
"grad_norm": 0.2355005294084549,
"learning_rate": 3.117948717948718e-05,
"loss": 0.0053,
"step": 764
},
{
"epoch": 6.220408163265306,
"grad_norm": 0.04024514928460121,
"learning_rate": 3.111111111111112e-05,
"loss": 0.0009,
"step": 765
},
{
"epoch": 6.228571428571429,
"grad_norm": 0.23014380037784576,
"learning_rate": 3.1042735042735044e-05,
"loss": 0.0146,
"step": 766
},
{
"epoch": 6.236734693877551,
"grad_norm": 0.006015291437506676,
"learning_rate": 3.097435897435898e-05,
"loss": 0.0001,
"step": 767
},
{
"epoch": 6.244897959183674,
"grad_norm": 0.08510640263557434,
"learning_rate": 3.090598290598291e-05,
"loss": 0.002,
"step": 768
},
{
"epoch": 6.253061224489796,
"grad_norm": 0.14379987120628357,
"learning_rate": 3.083760683760684e-05,
"loss": 0.0091,
"step": 769
},
{
"epoch": 6.261224489795918,
"grad_norm": 0.2117013931274414,
"learning_rate": 3.0769230769230774e-05,
"loss": 0.0025,
"step": 770
},
{
"epoch": 6.2693877551020405,
"grad_norm": 0.040728114545345306,
"learning_rate": 3.070085470085471e-05,
"loss": 0.0007,
"step": 771
},
{
"epoch": 6.277551020408163,
"grad_norm": 0.03688928857445717,
"learning_rate": 3.0632478632478636e-05,
"loss": 0.0006,
"step": 772
},
{
"epoch": 6.285714285714286,
"grad_norm": 0.13431765139102936,
"learning_rate": 3.0564102564102564e-05,
"loss": 0.0019,
"step": 773
},
{
"epoch": 6.293877551020408,
"grad_norm": 0.05924392119050026,
"learning_rate": 3.0495726495726498e-05,
"loss": 0.0022,
"step": 774
},
{
"epoch": 6.302040816326531,
"grad_norm": 0.11792515218257904,
"learning_rate": 3.042735042735043e-05,
"loss": 0.0021,
"step": 775
},
{
"epoch": 6.310204081632653,
"grad_norm": 0.2088608741760254,
"learning_rate": 3.035897435897436e-05,
"loss": 0.0073,
"step": 776
},
{
"epoch": 6.318367346938776,
"grad_norm": 0.8228505253791809,
"learning_rate": 3.029059829059829e-05,
"loss": 0.007,
"step": 777
},
{
"epoch": 6.326530612244898,
"grad_norm": 0.19457820057868958,
"learning_rate": 3.0222222222222225e-05,
"loss": 0.0055,
"step": 778
},
{
"epoch": 6.33469387755102,
"grad_norm": 0.07778234779834747,
"learning_rate": 3.0153846153846155e-05,
"loss": 0.0009,
"step": 779
},
{
"epoch": 6.3428571428571425,
"grad_norm": 0.43910640478134155,
"learning_rate": 3.0085470085470086e-05,
"loss": 0.0351,
"step": 780
},
{
"epoch": 6.351020408163265,
"grad_norm": 0.10128612816333771,
"learning_rate": 3.001709401709402e-05,
"loss": 0.003,
"step": 781
},
{
"epoch": 6.359183673469388,
"grad_norm": 0.09770739078521729,
"learning_rate": 2.994871794871795e-05,
"loss": 0.0033,
"step": 782
},
{
"epoch": 6.36734693877551,
"grad_norm": 0.08708677440881729,
"learning_rate": 2.9880341880341882e-05,
"loss": 0.0077,
"step": 783
},
{
"epoch": 6.375510204081633,
"grad_norm": 0.5605522990226746,
"learning_rate": 2.9811965811965817e-05,
"loss": 0.0139,
"step": 784
},
{
"epoch": 6.383673469387755,
"grad_norm": 0.11796006560325623,
"learning_rate": 2.9743589743589747e-05,
"loss": 0.0049,
"step": 785
},
{
"epoch": 6.391836734693878,
"grad_norm": 0.08884254842996597,
"learning_rate": 2.9675213675213678e-05,
"loss": 0.0039,
"step": 786
},
{
"epoch": 6.4,
"grad_norm": 0.09800074994564056,
"learning_rate": 2.960683760683761e-05,
"loss": 0.002,
"step": 787
},
{
"epoch": 6.408163265306122,
"grad_norm": 0.07932932674884796,
"learning_rate": 2.9538461538461543e-05,
"loss": 0.0038,
"step": 788
},
{
"epoch": 6.416326530612245,
"grad_norm": 0.49927836656570435,
"learning_rate": 2.9470085470085474e-05,
"loss": 0.0156,
"step": 789
},
{
"epoch": 6.424489795918367,
"grad_norm": 0.14197352528572083,
"learning_rate": 2.9401709401709405e-05,
"loss": 0.0129,
"step": 790
},
{
"epoch": 6.43265306122449,
"grad_norm": 0.47378090023994446,
"learning_rate": 2.9333333333333333e-05,
"loss": 0.0206,
"step": 791
},
{
"epoch": 6.440816326530612,
"grad_norm": 0.12690985202789307,
"learning_rate": 2.9264957264957267e-05,
"loss": 0.0035,
"step": 792
},
{
"epoch": 6.448979591836735,
"grad_norm": 0.16043904423713684,
"learning_rate": 2.9196581196581198e-05,
"loss": 0.0069,
"step": 793
},
{
"epoch": 6.457142857142857,
"grad_norm": 0.37268689274787903,
"learning_rate": 2.912820512820513e-05,
"loss": 0.0081,
"step": 794
},
{
"epoch": 6.46530612244898,
"grad_norm": 0.08618529886007309,
"learning_rate": 2.9059829059829063e-05,
"loss": 0.0019,
"step": 795
},
{
"epoch": 6.473469387755102,
"grad_norm": 0.46465665102005005,
"learning_rate": 2.8991452991452994e-05,
"loss": 0.0041,
"step": 796
},
{
"epoch": 6.481632653061224,
"grad_norm": 0.21116529405117035,
"learning_rate": 2.8923076923076925e-05,
"loss": 0.0014,
"step": 797
},
{
"epoch": 6.489795918367347,
"grad_norm": 0.03455302491784096,
"learning_rate": 2.8854700854700855e-05,
"loss": 0.001,
"step": 798
},
{
"epoch": 6.497959183673469,
"grad_norm": 0.07090940326452255,
"learning_rate": 2.878632478632479e-05,
"loss": 0.0021,
"step": 799
},
{
"epoch": 6.506122448979592,
"grad_norm": 0.04578676074743271,
"learning_rate": 2.871794871794872e-05,
"loss": 0.0008,
"step": 800
},
{
"epoch": 6.514285714285714,
"grad_norm": 0.14446327090263367,
"learning_rate": 2.864957264957265e-05,
"loss": 0.0053,
"step": 801
},
{
"epoch": 6.522448979591837,
"grad_norm": 0.1537717580795288,
"learning_rate": 2.8581196581196586e-05,
"loss": 0.0023,
"step": 802
},
{
"epoch": 6.530612244897959,
"grad_norm": 0.31299567222595215,
"learning_rate": 2.8512820512820516e-05,
"loss": 0.0068,
"step": 803
},
{
"epoch": 6.538775510204082,
"grad_norm": 0.1606074422597885,
"learning_rate": 2.8444444444444447e-05,
"loss": 0.004,
"step": 804
},
{
"epoch": 6.546938775510204,
"grad_norm": 0.254300594329834,
"learning_rate": 2.8376068376068378e-05,
"loss": 0.0063,
"step": 805
},
{
"epoch": 6.555102040816326,
"grad_norm": 0.1450517326593399,
"learning_rate": 2.8307692307692312e-05,
"loss": 0.0036,
"step": 806
},
{
"epoch": 6.563265306122449,
"grad_norm": 0.11473794281482697,
"learning_rate": 2.8239316239316243e-05,
"loss": 0.0074,
"step": 807
},
{
"epoch": 6.571428571428571,
"grad_norm": 0.04597209766507149,
"learning_rate": 2.8170940170940174e-05,
"loss": 0.0008,
"step": 808
},
{
"epoch": 6.579591836734694,
"grad_norm": 0.20627528429031372,
"learning_rate": 2.81025641025641e-05,
"loss": 0.0099,
"step": 809
},
{
"epoch": 6.587755102040816,
"grad_norm": 0.6563801169395447,
"learning_rate": 2.8034188034188036e-05,
"loss": 0.0113,
"step": 810
},
{
"epoch": 6.595918367346939,
"grad_norm": 0.12874148786067963,
"learning_rate": 2.7965811965811967e-05,
"loss": 0.0015,
"step": 811
},
{
"epoch": 6.604081632653061,
"grad_norm": 0.21111907064914703,
"learning_rate": 2.7897435897435898e-05,
"loss": 0.0095,
"step": 812
},
{
"epoch": 6.612244897959184,
"grad_norm": 0.13607758283615112,
"learning_rate": 2.7829059829059832e-05,
"loss": 0.0022,
"step": 813
},
{
"epoch": 6.6204081632653065,
"grad_norm": 0.008681375533342361,
"learning_rate": 2.7760683760683763e-05,
"loss": 0.0002,
"step": 814
},
{
"epoch": 6.628571428571428,
"grad_norm": 0.19657264649868011,
"learning_rate": 2.7692307692307694e-05,
"loss": 0.0059,
"step": 815
},
{
"epoch": 6.636734693877551,
"grad_norm": 0.22438636422157288,
"learning_rate": 2.7623931623931624e-05,
"loss": 0.0101,
"step": 816
},
{
"epoch": 6.644897959183673,
"grad_norm": 0.022456951439380646,
"learning_rate": 2.755555555555556e-05,
"loss": 0.0005,
"step": 817
},
{
"epoch": 6.653061224489796,
"grad_norm": 0.40233445167541504,
"learning_rate": 2.748717948717949e-05,
"loss": 0.011,
"step": 818
},
{
"epoch": 6.661224489795918,
"grad_norm": 0.4001105725765228,
"learning_rate": 2.741880341880342e-05,
"loss": 0.0084,
"step": 819
},
{
"epoch": 6.669387755102041,
"grad_norm": 0.12446096539497375,
"learning_rate": 2.7350427350427355e-05,
"loss": 0.0021,
"step": 820
},
{
"epoch": 6.677551020408163,
"grad_norm": 0.09965896606445312,
"learning_rate": 2.7282051282051285e-05,
"loss": 0.0052,
"step": 821
},
{
"epoch": 6.685714285714286,
"grad_norm": 0.11254263669252396,
"learning_rate": 2.7213675213675216e-05,
"loss": 0.0035,
"step": 822
},
{
"epoch": 6.6938775510204085,
"grad_norm": 0.12855035066604614,
"learning_rate": 2.7145299145299147e-05,
"loss": 0.0079,
"step": 823
},
{
"epoch": 6.70204081632653,
"grad_norm": 0.13291221857070923,
"learning_rate": 2.707692307692308e-05,
"loss": 0.0038,
"step": 824
},
{
"epoch": 6.710204081632653,
"grad_norm": 0.08022642135620117,
"learning_rate": 2.7008547008547012e-05,
"loss": 0.0032,
"step": 825
},
{
"epoch": 6.718367346938775,
"grad_norm": 0.14532768726348877,
"learning_rate": 2.6940170940170943e-05,
"loss": 0.0019,
"step": 826
},
{
"epoch": 6.726530612244898,
"grad_norm": 0.01848861761391163,
"learning_rate": 2.687179487179487e-05,
"loss": 0.0003,
"step": 827
},
{
"epoch": 6.73469387755102,
"grad_norm": 0.18730799853801727,
"learning_rate": 2.6803418803418805e-05,
"loss": 0.0101,
"step": 828
},
{
"epoch": 6.742857142857143,
"grad_norm": 0.2433444857597351,
"learning_rate": 2.6735042735042736e-05,
"loss": 0.0111,
"step": 829
},
{
"epoch": 6.751020408163265,
"grad_norm": 0.10054635256528854,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.0048,
"step": 830
},
{
"epoch": 6.759183673469388,
"grad_norm": 0.1453963816165924,
"learning_rate": 2.65982905982906e-05,
"loss": 0.0034,
"step": 831
},
{
"epoch": 6.7673469387755105,
"grad_norm": 0.12410593032836914,
"learning_rate": 2.6529914529914532e-05,
"loss": 0.0055,
"step": 832
},
{
"epoch": 6.775510204081632,
"grad_norm": 0.02357162907719612,
"learning_rate": 2.6461538461538463e-05,
"loss": 0.0003,
"step": 833
},
{
"epoch": 6.783673469387755,
"grad_norm": 0.5636110305786133,
"learning_rate": 2.6393162393162393e-05,
"loss": 0.0138,
"step": 834
},
{
"epoch": 6.791836734693877,
"grad_norm": 0.021261123940348625,
"learning_rate": 2.6324786324786328e-05,
"loss": 0.0003,
"step": 835
},
{
"epoch": 6.8,
"grad_norm": 0.34472304582595825,
"learning_rate": 2.625641025641026e-05,
"loss": 0.0105,
"step": 836
},
{
"epoch": 6.808163265306122,
"grad_norm": 0.10296373069286346,
"learning_rate": 2.618803418803419e-05,
"loss": 0.001,
"step": 837
},
{
"epoch": 6.816326530612245,
"grad_norm": 0.11989521980285645,
"learning_rate": 2.6119658119658124e-05,
"loss": 0.003,
"step": 838
},
{
"epoch": 6.8244897959183675,
"grad_norm": 0.2446180284023285,
"learning_rate": 2.6051282051282054e-05,
"loss": 0.0143,
"step": 839
},
{
"epoch": 6.83265306122449,
"grad_norm": 0.14607751369476318,
"learning_rate": 2.5982905982905985e-05,
"loss": 0.0028,
"step": 840
},
{
"epoch": 6.840816326530613,
"grad_norm": 0.2830953598022461,
"learning_rate": 2.5914529914529916e-05,
"loss": 0.0136,
"step": 841
},
{
"epoch": 6.848979591836734,
"grad_norm": 0.5133418440818787,
"learning_rate": 2.584615384615385e-05,
"loss": 0.0147,
"step": 842
},
{
"epoch": 6.857142857142857,
"grad_norm": 0.17149963974952698,
"learning_rate": 2.577777777777778e-05,
"loss": 0.002,
"step": 843
},
{
"epoch": 6.865306122448979,
"grad_norm": 0.15987303853034973,
"learning_rate": 2.5709401709401712e-05,
"loss": 0.0011,
"step": 844
},
{
"epoch": 6.873469387755102,
"grad_norm": 0.4356565773487091,
"learning_rate": 2.5641025641025646e-05,
"loss": 0.0204,
"step": 845
},
{
"epoch": 6.881632653061224,
"grad_norm": 0.04300348833203316,
"learning_rate": 2.5572649572649574e-05,
"loss": 0.0006,
"step": 846
},
{
"epoch": 6.889795918367347,
"grad_norm": 0.26859360933303833,
"learning_rate": 2.5504273504273505e-05,
"loss": 0.0105,
"step": 847
},
{
"epoch": 6.8979591836734695,
"grad_norm": 0.12719742953777313,
"learning_rate": 2.5435897435897436e-05,
"loss": 0.0019,
"step": 848
},
{
"epoch": 6.906122448979592,
"grad_norm": 0.10546525567770004,
"learning_rate": 2.536752136752137e-05,
"loss": 0.003,
"step": 849
},
{
"epoch": 6.914285714285715,
"grad_norm": 0.15803073346614838,
"learning_rate": 2.52991452991453e-05,
"loss": 0.0034,
"step": 850
},
{
"epoch": 6.922448979591836,
"grad_norm": 0.08309769630432129,
"learning_rate": 2.523076923076923e-05,
"loss": 0.0019,
"step": 851
},
{
"epoch": 6.930612244897959,
"grad_norm": 0.011221293359994888,
"learning_rate": 2.5162393162393162e-05,
"loss": 0.0002,
"step": 852
},
{
"epoch": 6.938775510204081,
"grad_norm": 0.06381987035274506,
"learning_rate": 2.5094017094017097e-05,
"loss": 0.0025,
"step": 853
},
{
"epoch": 6.946938775510204,
"grad_norm": 0.18709778785705566,
"learning_rate": 2.5025641025641028e-05,
"loss": 0.003,
"step": 854
},
{
"epoch": 6.955102040816326,
"grad_norm": 0.12326924502849579,
"learning_rate": 2.495726495726496e-05,
"loss": 0.0031,
"step": 855
},
{
"epoch": 6.963265306122449,
"grad_norm": 0.12623760104179382,
"learning_rate": 2.4888888888888893e-05,
"loss": 0.0033,
"step": 856
},
{
"epoch": 6.9714285714285715,
"grad_norm": 0.15498894453048706,
"learning_rate": 2.4820512820512824e-05,
"loss": 0.0028,
"step": 857
},
{
"epoch": 6.979591836734694,
"grad_norm": 0.031409814953804016,
"learning_rate": 2.4752136752136754e-05,
"loss": 0.0004,
"step": 858
},
{
"epoch": 6.987755102040817,
"grad_norm": 0.18738146126270294,
"learning_rate": 2.4683760683760685e-05,
"loss": 0.0075,
"step": 859
},
{
"epoch": 6.995918367346938,
"grad_norm": 0.14360497891902924,
"learning_rate": 2.461538461538462e-05,
"loss": 0.0047,
"step": 860
},
{
"epoch": 7.0,
"grad_norm": 0.3702276945114136,
"learning_rate": 2.454700854700855e-05,
"loss": 0.0163,
"step": 861
},
{
"epoch": 7.0,
"eval_accuracy": 0.9416139343727575,
"eval_f1": 0.9411935049117857,
"eval_loss": 0.17100121080875397,
"eval_precision": 0.9413414416659728,
"eval_recall": 0.9416139343727575,
"eval_runtime": 25.0743,
"eval_samples_per_second": 17.388,
"eval_steps_per_second": 17.388,
"step": 861
},
{
"epoch": 7.0081632653061225,
"grad_norm": 0.005380494520068169,
"learning_rate": 2.447863247863248e-05,
"loss": 0.0001,
"step": 862
},
{
"epoch": 7.016326530612245,
"grad_norm": 0.07181048393249512,
"learning_rate": 2.4410256410256415e-05,
"loss": 0.0021,
"step": 863
},
{
"epoch": 7.024489795918368,
"grad_norm": 0.17317695915699005,
"learning_rate": 2.4341880341880343e-05,
"loss": 0.0014,
"step": 864
},
{
"epoch": 7.03265306122449,
"grad_norm": 0.11304262280464172,
"learning_rate": 2.4273504273504274e-05,
"loss": 0.007,
"step": 865
},
{
"epoch": 7.040816326530612,
"grad_norm": 0.11228794604539871,
"learning_rate": 2.4205128205128205e-05,
"loss": 0.0014,
"step": 866
},
{
"epoch": 7.048979591836734,
"grad_norm": 0.0365217849612236,
"learning_rate": 2.413675213675214e-05,
"loss": 0.0009,
"step": 867
},
{
"epoch": 7.057142857142857,
"grad_norm": 0.2410961538553238,
"learning_rate": 2.406837606837607e-05,
"loss": 0.0008,
"step": 868
},
{
"epoch": 7.0653061224489795,
"grad_norm": 0.4587385058403015,
"learning_rate": 2.4e-05,
"loss": 0.0062,
"step": 869
},
{
"epoch": 7.073469387755102,
"grad_norm": 0.1474837362766266,
"learning_rate": 2.393162393162393e-05,
"loss": 0.0046,
"step": 870
},
{
"epoch": 7.081632653061225,
"grad_norm": 0.12067204713821411,
"learning_rate": 2.3863247863247866e-05,
"loss": 0.0118,
"step": 871
},
{
"epoch": 7.089795918367347,
"grad_norm": 0.07675416022539139,
"learning_rate": 2.3794871794871797e-05,
"loss": 0.0017,
"step": 872
},
{
"epoch": 7.09795918367347,
"grad_norm": 0.010980883613228798,
"learning_rate": 2.3726495726495727e-05,
"loss": 0.0003,
"step": 873
},
{
"epoch": 7.106122448979592,
"grad_norm": 0.10777924954891205,
"learning_rate": 2.365811965811966e-05,
"loss": 0.0029,
"step": 874
},
{
"epoch": 7.114285714285714,
"grad_norm": 0.04947957023978233,
"learning_rate": 2.3589743589743593e-05,
"loss": 0.0016,
"step": 875
},
{
"epoch": 7.122448979591836,
"grad_norm": 0.2570093870162964,
"learning_rate": 2.3521367521367523e-05,
"loss": 0.0298,
"step": 876
},
{
"epoch": 7.130612244897959,
"grad_norm": 0.09827487170696259,
"learning_rate": 2.3452991452991458e-05,
"loss": 0.0034,
"step": 877
},
{
"epoch": 7.1387755102040815,
"grad_norm": 0.013062435202300549,
"learning_rate": 2.338461538461539e-05,
"loss": 0.0002,
"step": 878
},
{
"epoch": 7.146938775510204,
"grad_norm": 0.042147841304540634,
"learning_rate": 2.331623931623932e-05,
"loss": 0.0007,
"step": 879
},
{
"epoch": 7.155102040816327,
"grad_norm": 0.052288565784692764,
"learning_rate": 2.324786324786325e-05,
"loss": 0.0023,
"step": 880
},
{
"epoch": 7.163265306122449,
"grad_norm": 0.019575530663132668,
"learning_rate": 2.3179487179487184e-05,
"loss": 0.0004,
"step": 881
},
{
"epoch": 7.171428571428572,
"grad_norm": 0.02954856865108013,
"learning_rate": 2.3111111111111112e-05,
"loss": 0.0005,
"step": 882
},
{
"epoch": 7.179591836734694,
"grad_norm": 0.09335067123174667,
"learning_rate": 2.3042735042735043e-05,
"loss": 0.0015,
"step": 883
},
{
"epoch": 7.187755102040816,
"grad_norm": 0.2734461724758148,
"learning_rate": 2.2974358974358974e-05,
"loss": 0.0014,
"step": 884
},
{
"epoch": 7.1959183673469385,
"grad_norm": 0.49978339672088623,
"learning_rate": 2.2905982905982908e-05,
"loss": 0.02,
"step": 885
},
{
"epoch": 7.204081632653061,
"grad_norm": 0.04176400974392891,
"learning_rate": 2.283760683760684e-05,
"loss": 0.0009,
"step": 886
},
{
"epoch": 7.2122448979591836,
"grad_norm": 0.09729107469320297,
"learning_rate": 2.276923076923077e-05,
"loss": 0.0016,
"step": 887
},
{
"epoch": 7.220408163265306,
"grad_norm": 0.04460914805531502,
"learning_rate": 2.27008547008547e-05,
"loss": 0.0012,
"step": 888
},
{
"epoch": 7.228571428571429,
"grad_norm": 0.038177452981472015,
"learning_rate": 2.2632478632478635e-05,
"loss": 0.0003,
"step": 889
},
{
"epoch": 7.236734693877551,
"grad_norm": 0.10953059792518616,
"learning_rate": 2.2564102564102566e-05,
"loss": 0.0035,
"step": 890
},
{
"epoch": 7.244897959183674,
"grad_norm": 0.311824768781662,
"learning_rate": 2.2495726495726496e-05,
"loss": 0.0073,
"step": 891
},
{
"epoch": 7.253061224489796,
"grad_norm": 0.046279143542051315,
"learning_rate": 2.242735042735043e-05,
"loss": 0.0012,
"step": 892
},
{
"epoch": 7.261224489795918,
"grad_norm": 0.016753727570176125,
"learning_rate": 2.235897435897436e-05,
"loss": 0.0003,
"step": 893
},
{
"epoch": 7.2693877551020405,
"grad_norm": 0.4180339574813843,
"learning_rate": 2.2290598290598292e-05,
"loss": 0.0082,
"step": 894
},
{
"epoch": 7.277551020408163,
"grad_norm": 0.7917170524597168,
"learning_rate": 2.2222222222222227e-05,
"loss": 0.0118,
"step": 895
},
{
"epoch": 7.285714285714286,
"grad_norm": 0.023676620796322823,
"learning_rate": 2.2153846153846158e-05,
"loss": 0.0005,
"step": 896
},
{
"epoch": 7.293877551020408,
"grad_norm": 0.20448362827301025,
"learning_rate": 2.208547008547009e-05,
"loss": 0.0042,
"step": 897
},
{
"epoch": 7.302040816326531,
"grad_norm": 0.08442284911870956,
"learning_rate": 2.201709401709402e-05,
"loss": 0.0015,
"step": 898
},
{
"epoch": 7.310204081632653,
"grad_norm": 0.12260103970766068,
"learning_rate": 2.1948717948717954e-05,
"loss": 0.0031,
"step": 899
},
{
"epoch": 7.318367346938776,
"grad_norm": 0.19080136716365814,
"learning_rate": 2.1880341880341884e-05,
"loss": 0.0023,
"step": 900
},
{
"epoch": 7.326530612244898,
"grad_norm": 0.15384361147880554,
"learning_rate": 2.1811965811965812e-05,
"loss": 0.0012,
"step": 901
},
{
"epoch": 7.33469387755102,
"grad_norm": 0.05359187722206116,
"learning_rate": 2.1743589743589743e-05,
"loss": 0.0006,
"step": 902
},
{
"epoch": 7.3428571428571425,
"grad_norm": 0.2594751715660095,
"learning_rate": 2.1675213675213677e-05,
"loss": 0.0051,
"step": 903
},
{
"epoch": 7.351020408163265,
"grad_norm": 0.04371648281812668,
"learning_rate": 2.1606837606837608e-05,
"loss": 0.0006,
"step": 904
},
{
"epoch": 7.359183673469388,
"grad_norm": 0.5175739526748657,
"learning_rate": 2.153846153846154e-05,
"loss": 0.0058,
"step": 905
},
{
"epoch": 7.36734693877551,
"grad_norm": 0.3708977699279785,
"learning_rate": 2.147008547008547e-05,
"loss": 0.0035,
"step": 906
},
{
"epoch": 7.375510204081633,
"grad_norm": 0.2661634385585785,
"learning_rate": 2.1401709401709404e-05,
"loss": 0.0054,
"step": 907
},
{
"epoch": 7.383673469387755,
"grad_norm": 0.11005009710788727,
"learning_rate": 2.1333333333333335e-05,
"loss": 0.0026,
"step": 908
},
{
"epoch": 7.391836734693878,
"grad_norm": 0.09081326425075531,
"learning_rate": 2.1264957264957265e-05,
"loss": 0.0009,
"step": 909
},
{
"epoch": 7.4,
"grad_norm": 0.07192150503396988,
"learning_rate": 2.11965811965812e-05,
"loss": 0.0051,
"step": 910
},
{
"epoch": 7.408163265306122,
"grad_norm": 0.026940980926156044,
"learning_rate": 2.112820512820513e-05,
"loss": 0.0005,
"step": 911
},
{
"epoch": 7.416326530612245,
"grad_norm": 0.08359820395708084,
"learning_rate": 2.105982905982906e-05,
"loss": 0.0045,
"step": 912
},
{
"epoch": 7.424489795918367,
"grad_norm": 0.12868310511112213,
"learning_rate": 2.0991452991452996e-05,
"loss": 0.0056,
"step": 913
},
{
"epoch": 7.43265306122449,
"grad_norm": 0.16965226829051971,
"learning_rate": 2.0923076923076927e-05,
"loss": 0.0008,
"step": 914
},
{
"epoch": 7.440816326530612,
"grad_norm": 0.4554808437824249,
"learning_rate": 2.0854700854700857e-05,
"loss": 0.0092,
"step": 915
},
{
"epoch": 7.448979591836735,
"grad_norm": 0.008080328814685345,
"learning_rate": 2.0786324786324788e-05,
"loss": 0.0002,
"step": 916
},
{
"epoch": 7.457142857142857,
"grad_norm": 0.03749796375632286,
"learning_rate": 2.0717948717948723e-05,
"loss": 0.0036,
"step": 917
},
{
"epoch": 7.46530612244898,
"grad_norm": 0.01586100459098816,
"learning_rate": 2.0649572649572653e-05,
"loss": 0.0002,
"step": 918
},
{
"epoch": 7.473469387755102,
"grad_norm": 0.13012056052684784,
"learning_rate": 2.058119658119658e-05,
"loss": 0.0018,
"step": 919
},
{
"epoch": 7.481632653061224,
"grad_norm": 0.04649100825190544,
"learning_rate": 2.0512820512820512e-05,
"loss": 0.0006,
"step": 920
},
{
"epoch": 7.489795918367347,
"grad_norm": 0.03851509839296341,
"learning_rate": 2.0444444444444446e-05,
"loss": 0.0036,
"step": 921
},
{
"epoch": 7.497959183673469,
"grad_norm": 0.1530081182718277,
"learning_rate": 2.0376068376068377e-05,
"loss": 0.01,
"step": 922
},
{
"epoch": 7.506122448979592,
"grad_norm": 0.028013063594698906,
"learning_rate": 2.0307692307692308e-05,
"loss": 0.0005,
"step": 923
},
{
"epoch": 7.514285714285714,
"grad_norm": 0.017429566010832787,
"learning_rate": 2.023931623931624e-05,
"loss": 0.0003,
"step": 924
},
{
"epoch": 7.522448979591837,
"grad_norm": 0.08652772009372711,
"learning_rate": 2.0170940170940173e-05,
"loss": 0.0041,
"step": 925
},
{
"epoch": 7.530612244897959,
"grad_norm": 0.015552469529211521,
"learning_rate": 2.0102564102564104e-05,
"loss": 0.0004,
"step": 926
},
{
"epoch": 7.538775510204082,
"grad_norm": 0.1635313630104065,
"learning_rate": 2.0034188034188035e-05,
"loss": 0.0024,
"step": 927
},
{
"epoch": 7.546938775510204,
"grad_norm": 0.09557072073221207,
"learning_rate": 1.996581196581197e-05,
"loss": 0.0031,
"step": 928
},
{
"epoch": 7.555102040816326,
"grad_norm": 0.056514523923397064,
"learning_rate": 1.98974358974359e-05,
"loss": 0.001,
"step": 929
},
{
"epoch": 7.563265306122449,
"grad_norm": 0.11032027006149292,
"learning_rate": 1.982905982905983e-05,
"loss": 0.0027,
"step": 930
},
{
"epoch": 7.571428571428571,
"grad_norm": 0.1199721097946167,
"learning_rate": 1.9760683760683765e-05,
"loss": 0.0045,
"step": 931
},
{
"epoch": 7.579591836734694,
"grad_norm": 0.06572246551513672,
"learning_rate": 1.9692307692307696e-05,
"loss": 0.0005,
"step": 932
},
{
"epoch": 7.587755102040816,
"grad_norm": 0.02812982350587845,
"learning_rate": 1.9623931623931623e-05,
"loss": 0.0016,
"step": 933
},
{
"epoch": 7.595918367346939,
"grad_norm": 0.36669132113456726,
"learning_rate": 1.9555555555555557e-05,
"loss": 0.0075,
"step": 934
},
{
"epoch": 7.604081632653061,
"grad_norm": 0.007166026625782251,
"learning_rate": 1.9487179487179488e-05,
"loss": 0.0002,
"step": 935
},
{
"epoch": 7.612244897959184,
"grad_norm": 0.0843917652964592,
"learning_rate": 1.941880341880342e-05,
"loss": 0.0014,
"step": 936
},
{
"epoch": 7.6204081632653065,
"grad_norm": 0.03270947188138962,
"learning_rate": 1.9350427350427353e-05,
"loss": 0.0004,
"step": 937
},
{
"epoch": 7.628571428571428,
"grad_norm": 0.11428512632846832,
"learning_rate": 1.9282051282051284e-05,
"loss": 0.0037,
"step": 938
},
{
"epoch": 7.636734693877551,
"grad_norm": 0.14075659215450287,
"learning_rate": 1.9213675213675215e-05,
"loss": 0.0038,
"step": 939
},
{
"epoch": 7.644897959183673,
"grad_norm": 0.039455536752939224,
"learning_rate": 1.914529914529915e-05,
"loss": 0.0003,
"step": 940
},
{
"epoch": 7.653061224489796,
"grad_norm": 0.08807907998561859,
"learning_rate": 1.907692307692308e-05,
"loss": 0.0032,
"step": 941
},
{
"epoch": 7.661224489795918,
"grad_norm": 0.016785893589258194,
"learning_rate": 1.9008547008547008e-05,
"loss": 0.0003,
"step": 942
},
{
"epoch": 7.669387755102041,
"grad_norm": 0.050439443439245224,
"learning_rate": 1.8940170940170942e-05,
"loss": 0.0006,
"step": 943
},
{
"epoch": 7.677551020408163,
"grad_norm": 0.05136784538626671,
"learning_rate": 1.8871794871794873e-05,
"loss": 0.0008,
"step": 944
},
{
"epoch": 7.685714285714286,
"grad_norm": 0.032696232199668884,
"learning_rate": 1.8803418803418804e-05,
"loss": 0.0004,
"step": 945
},
{
"epoch": 7.6938775510204085,
"grad_norm": 0.06387408822774887,
"learning_rate": 1.8735042735042738e-05,
"loss": 0.0017,
"step": 946
},
{
"epoch": 7.70204081632653,
"grad_norm": 0.3237035870552063,
"learning_rate": 1.866666666666667e-05,
"loss": 0.0077,
"step": 947
},
{
"epoch": 7.710204081632653,
"grad_norm": 0.14317689836025238,
"learning_rate": 1.85982905982906e-05,
"loss": 0.0036,
"step": 948
},
{
"epoch": 7.718367346938775,
"grad_norm": 0.03586750105023384,
"learning_rate": 1.8529914529914534e-05,
"loss": 0.0018,
"step": 949
},
{
"epoch": 7.726530612244898,
"grad_norm": 0.005396117921918631,
"learning_rate": 1.8461538461538465e-05,
"loss": 0.0001,
"step": 950
},
{
"epoch": 7.73469387755102,
"grad_norm": 0.010027500800788403,
"learning_rate": 1.8393162393162392e-05,
"loss": 0.0002,
"step": 951
},
{
"epoch": 7.742857142857143,
"grad_norm": 0.047518227249383926,
"learning_rate": 1.8324786324786326e-05,
"loss": 0.0016,
"step": 952
},
{
"epoch": 7.751020408163265,
"grad_norm": 0.005562972743064165,
"learning_rate": 1.8256410256410257e-05,
"loss": 0.0001,
"step": 953
},
{
"epoch": 7.759183673469388,
"grad_norm": 0.007851188071072102,
"learning_rate": 1.8188034188034188e-05,
"loss": 0.0002,
"step": 954
},
{
"epoch": 7.7673469387755105,
"grad_norm": 0.005186399444937706,
"learning_rate": 1.8119658119658122e-05,
"loss": 0.0001,
"step": 955
},
{
"epoch": 7.775510204081632,
"grad_norm": 0.020631812512874603,
"learning_rate": 1.8051282051282053e-05,
"loss": 0.0003,
"step": 956
},
{
"epoch": 7.783673469387755,
"grad_norm": 0.0623784177005291,
"learning_rate": 1.7982905982905984e-05,
"loss": 0.0007,
"step": 957
},
{
"epoch": 7.791836734693877,
"grad_norm": 0.10035212337970734,
"learning_rate": 1.7914529914529918e-05,
"loss": 0.0041,
"step": 958
},
{
"epoch": 7.8,
"grad_norm": 0.10691452026367188,
"learning_rate": 1.784615384615385e-05,
"loss": 0.004,
"step": 959
},
{
"epoch": 7.808163265306122,
"grad_norm": 0.2187003642320633,
"learning_rate": 1.7777777777777777e-05,
"loss": 0.003,
"step": 960
},
{
"epoch": 7.816326530612245,
"grad_norm": 0.12766751646995544,
"learning_rate": 1.770940170940171e-05,
"loss": 0.003,
"step": 961
},
{
"epoch": 7.8244897959183675,
"grad_norm": 0.10042405128479004,
"learning_rate": 1.7641025641025642e-05,
"loss": 0.0025,
"step": 962
},
{
"epoch": 7.83265306122449,
"grad_norm": 0.024409618228673935,
"learning_rate": 1.7572649572649573e-05,
"loss": 0.0003,
"step": 963
},
{
"epoch": 7.840816326530613,
"grad_norm": 0.08938995003700256,
"learning_rate": 1.7504273504273507e-05,
"loss": 0.0017,
"step": 964
},
{
"epoch": 7.848979591836734,
"grad_norm": 0.006908862851560116,
"learning_rate": 1.7435897435897438e-05,
"loss": 0.0001,
"step": 965
},
{
"epoch": 7.857142857142857,
"grad_norm": 0.33812665939331055,
"learning_rate": 1.736752136752137e-05,
"loss": 0.0101,
"step": 966
},
{
"epoch": 7.865306122448979,
"grad_norm": 0.059313975274562836,
"learning_rate": 1.7299145299145303e-05,
"loss": 0.0023,
"step": 967
},
{
"epoch": 7.873469387755102,
"grad_norm": 0.2146165370941162,
"learning_rate": 1.7230769230769234e-05,
"loss": 0.0082,
"step": 968
},
{
"epoch": 7.881632653061224,
"grad_norm": 0.07495953142642975,
"learning_rate": 1.716239316239316e-05,
"loss": 0.0052,
"step": 969
},
{
"epoch": 7.889795918367347,
"grad_norm": 0.17084024846553802,
"learning_rate": 1.7094017094017095e-05,
"loss": 0.0009,
"step": 970
},
{
"epoch": 7.8979591836734695,
"grad_norm": 0.16996727883815765,
"learning_rate": 1.7025641025641026e-05,
"loss": 0.0038,
"step": 971
},
{
"epoch": 7.906122448979592,
"grad_norm": 0.06174658238887787,
"learning_rate": 1.6957264957264957e-05,
"loss": 0.0009,
"step": 972
},
{
"epoch": 7.914285714285715,
"grad_norm": 0.035608597099781036,
"learning_rate": 1.688888888888889e-05,
"loss": 0.0006,
"step": 973
},
{
"epoch": 7.922448979591836,
"grad_norm": 0.2112169861793518,
"learning_rate": 1.6820512820512822e-05,
"loss": 0.0035,
"step": 974
},
{
"epoch": 7.930612244897959,
"grad_norm": 0.15173368155956268,
"learning_rate": 1.6752136752136753e-05,
"loss": 0.0024,
"step": 975
},
{
"epoch": 7.938775510204081,
"grad_norm": 0.00365807325579226,
"learning_rate": 1.6683760683760687e-05,
"loss": 0.0001,
"step": 976
},
{
"epoch": 7.946938775510204,
"grad_norm": 0.04172469303011894,
"learning_rate": 1.6615384615384618e-05,
"loss": 0.0003,
"step": 977
},
{
"epoch": 7.955102040816326,
"grad_norm": 0.02554394118487835,
"learning_rate": 1.6547008547008546e-05,
"loss": 0.0005,
"step": 978
},
{
"epoch": 7.963265306122449,
"grad_norm": 0.14168842136859894,
"learning_rate": 1.647863247863248e-05,
"loss": 0.0022,
"step": 979
},
{
"epoch": 7.9714285714285715,
"grad_norm": 0.06103862076997757,
"learning_rate": 1.641025641025641e-05,
"loss": 0.0009,
"step": 980
},
{
"epoch": 7.979591836734694,
"grad_norm": 0.09359610825777054,
"learning_rate": 1.634188034188034e-05,
"loss": 0.0004,
"step": 981
},
{
"epoch": 7.987755102040817,
"grad_norm": 0.09833401441574097,
"learning_rate": 1.6273504273504276e-05,
"loss": 0.0015,
"step": 982
},
{
"epoch": 7.995918367346938,
"grad_norm": 0.14906033873558044,
"learning_rate": 1.6205128205128207e-05,
"loss": 0.0052,
"step": 983
},
{
"epoch": 8.0,
"grad_norm": 0.03419484943151474,
"learning_rate": 1.6136752136752138e-05,
"loss": 0.0006,
"step": 984
},
{
"epoch": 8.0,
"eval_accuracy": 0.9416139343727575,
"eval_f1": 0.9410915355911077,
"eval_loss": 0.20951753854751587,
"eval_precision": 0.9414299271894795,
"eval_recall": 0.9416139343727575,
"eval_runtime": 24.82,
"eval_samples_per_second": 17.567,
"eval_steps_per_second": 17.567,
"step": 984
},
{
"epoch": 8.008163265306122,
"grad_norm": 0.027842367067933083,
"learning_rate": 1.6068376068376072e-05,
"loss": 0.0005,
"step": 985
},
{
"epoch": 8.016326530612245,
"grad_norm": 0.015295000746846199,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0003,
"step": 986
},
{
"epoch": 8.024489795918367,
"grad_norm": 0.04106583446264267,
"learning_rate": 1.5931623931623934e-05,
"loss": 0.0029,
"step": 987
},
{
"epoch": 8.03265306122449,
"grad_norm": 0.19340075552463531,
"learning_rate": 1.5863247863247864e-05,
"loss": 0.0128,
"step": 988
},
{
"epoch": 8.040816326530612,
"grad_norm": 0.018350228667259216,
"learning_rate": 1.5794871794871795e-05,
"loss": 0.0002,
"step": 989
},
{
"epoch": 8.048979591836735,
"grad_norm": 0.010258130729198456,
"learning_rate": 1.5726495726495726e-05,
"loss": 0.0002,
"step": 990
},
{
"epoch": 8.057142857142857,
"grad_norm": 0.12881983816623688,
"learning_rate": 1.565811965811966e-05,
"loss": 0.0094,
"step": 991
},
{
"epoch": 8.06530612244898,
"grad_norm": 0.04051700606942177,
"learning_rate": 1.558974358974359e-05,
"loss": 0.0005,
"step": 992
},
{
"epoch": 8.073469387755102,
"grad_norm": 0.08483976870775223,
"learning_rate": 1.5521367521367522e-05,
"loss": 0.0015,
"step": 993
},
{
"epoch": 8.081632653061224,
"grad_norm": 0.0454951710999012,
"learning_rate": 1.5452991452991456e-05,
"loss": 0.0013,
"step": 994
},
{
"epoch": 8.089795918367347,
"grad_norm": 0.01969115249812603,
"learning_rate": 1.5384615384615387e-05,
"loss": 0.0003,
"step": 995
},
{
"epoch": 8.097959183673469,
"grad_norm": 0.033365558832883835,
"learning_rate": 1.5316239316239318e-05,
"loss": 0.0004,
"step": 996
},
{
"epoch": 8.106122448979592,
"grad_norm": 0.0014622848248109221,
"learning_rate": 1.5247863247863249e-05,
"loss": 0.0,
"step": 997
},
{
"epoch": 8.114285714285714,
"grad_norm": 0.007859136909246445,
"learning_rate": 1.517948717948718e-05,
"loss": 0.0001,
"step": 998
},
{
"epoch": 8.122448979591837,
"grad_norm": 0.03159104660153389,
"learning_rate": 1.5111111111111112e-05,
"loss": 0.0003,
"step": 999
},
{
"epoch": 8.130612244897959,
"grad_norm": 0.008046046830713749,
"learning_rate": 1.5042735042735043e-05,
"loss": 0.0002,
"step": 1000
},
{
"epoch": 8.138775510204082,
"grad_norm": 0.0062632174231112,
"learning_rate": 1.4974358974358976e-05,
"loss": 0.0001,
"step": 1001
},
{
"epoch": 8.146938775510204,
"grad_norm": 0.018382323905825615,
"learning_rate": 1.4905982905982908e-05,
"loss": 0.0003,
"step": 1002
},
{
"epoch": 8.155102040816326,
"grad_norm": 0.0031079400796443224,
"learning_rate": 1.4837606837606839e-05,
"loss": 0.0001,
"step": 1003
},
{
"epoch": 8.16326530612245,
"grad_norm": 0.037184253334999084,
"learning_rate": 1.4769230769230772e-05,
"loss": 0.0009,
"step": 1004
},
{
"epoch": 8.17142857142857,
"grad_norm": 0.04274457320570946,
"learning_rate": 1.4700854700854703e-05,
"loss": 0.0007,
"step": 1005
},
{
"epoch": 8.179591836734694,
"grad_norm": 0.03666644170880318,
"learning_rate": 1.4632478632478633e-05,
"loss": 0.0006,
"step": 1006
},
{
"epoch": 8.187755102040816,
"grad_norm": 0.09907951951026917,
"learning_rate": 1.4564102564102564e-05,
"loss": 0.0022,
"step": 1007
},
{
"epoch": 8.19591836734694,
"grad_norm": 0.057159584015607834,
"learning_rate": 1.4495726495726497e-05,
"loss": 0.0005,
"step": 1008
},
{
"epoch": 8.204081632653061,
"grad_norm": 0.03271704539656639,
"learning_rate": 1.4427350427350428e-05,
"loss": 0.0006,
"step": 1009
},
{
"epoch": 8.212244897959184,
"grad_norm": 0.0030933571979403496,
"learning_rate": 1.435897435897436e-05,
"loss": 0.0,
"step": 1010
},
{
"epoch": 8.220408163265306,
"grad_norm": 0.02095775492489338,
"learning_rate": 1.4290598290598293e-05,
"loss": 0.0005,
"step": 1011
},
{
"epoch": 8.228571428571428,
"grad_norm": 0.049353066831827164,
"learning_rate": 1.4222222222222224e-05,
"loss": 0.0007,
"step": 1012
},
{
"epoch": 8.236734693877551,
"grad_norm": 0.023141806945204735,
"learning_rate": 1.4153846153846156e-05,
"loss": 0.0003,
"step": 1013
},
{
"epoch": 8.244897959183673,
"grad_norm": 0.015122964978218079,
"learning_rate": 1.4085470085470087e-05,
"loss": 0.0007,
"step": 1014
},
{
"epoch": 8.253061224489796,
"grad_norm": 0.003815308678895235,
"learning_rate": 1.4017094017094018e-05,
"loss": 0.0001,
"step": 1015
},
{
"epoch": 8.261224489795918,
"grad_norm": 0.006639714352786541,
"learning_rate": 1.3948717948717949e-05,
"loss": 0.0001,
"step": 1016
},
{
"epoch": 8.269387755102041,
"grad_norm": 0.20341694355010986,
"learning_rate": 1.3880341880341881e-05,
"loss": 0.0075,
"step": 1017
},
{
"epoch": 8.277551020408163,
"grad_norm": 0.006596466526389122,
"learning_rate": 1.3811965811965812e-05,
"loss": 0.0,
"step": 1018
},
{
"epoch": 8.285714285714286,
"grad_norm": 0.059214457869529724,
"learning_rate": 1.3743589743589745e-05,
"loss": 0.0022,
"step": 1019
},
{
"epoch": 8.293877551020408,
"grad_norm": 0.01356984581798315,
"learning_rate": 1.3675213675213677e-05,
"loss": 0.0002,
"step": 1020
},
{
"epoch": 8.30204081632653,
"grad_norm": 0.052929263561964035,
"learning_rate": 1.3606837606837608e-05,
"loss": 0.0013,
"step": 1021
},
{
"epoch": 8.310204081632653,
"grad_norm": 0.0020449981093406677,
"learning_rate": 1.353846153846154e-05,
"loss": 0.0,
"step": 1022
},
{
"epoch": 8.318367346938775,
"grad_norm": 0.3605046272277832,
"learning_rate": 1.3470085470085472e-05,
"loss": 0.0136,
"step": 1023
},
{
"epoch": 8.326530612244898,
"grad_norm": 0.05114666745066643,
"learning_rate": 1.3401709401709402e-05,
"loss": 0.0009,
"step": 1024
},
{
"epoch": 8.33469387755102,
"grad_norm": 0.025500185787677765,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.0004,
"step": 1025
},
{
"epoch": 8.342857142857143,
"grad_norm": 0.006557499058544636,
"learning_rate": 1.3264957264957266e-05,
"loss": 0.0001,
"step": 1026
},
{
"epoch": 8.351020408163265,
"grad_norm": 0.02797406166791916,
"learning_rate": 1.3196581196581197e-05,
"loss": 0.0002,
"step": 1027
},
{
"epoch": 8.359183673469389,
"grad_norm": 0.003020975971594453,
"learning_rate": 1.312820512820513e-05,
"loss": 0.0001,
"step": 1028
},
{
"epoch": 8.36734693877551,
"grad_norm": 0.0024696961045265198,
"learning_rate": 1.3059829059829062e-05,
"loss": 0.0,
"step": 1029
},
{
"epoch": 8.375510204081632,
"grad_norm": 0.05511481314897537,
"learning_rate": 1.2991452991452993e-05,
"loss": 0.001,
"step": 1030
},
{
"epoch": 8.383673469387755,
"grad_norm": 0.03543705493211746,
"learning_rate": 1.2923076923076925e-05,
"loss": 0.0004,
"step": 1031
},
{
"epoch": 8.391836734693877,
"grad_norm": 1.5244866609573364,
"learning_rate": 1.2854700854700856e-05,
"loss": 0.0021,
"step": 1032
},
{
"epoch": 8.4,
"grad_norm": 0.030157363042235374,
"learning_rate": 1.2786324786324787e-05,
"loss": 0.0006,
"step": 1033
},
{
"epoch": 8.408163265306122,
"grad_norm": 0.06905968487262726,
"learning_rate": 1.2717948717948718e-05,
"loss": 0.0011,
"step": 1034
},
{
"epoch": 8.416326530612245,
"grad_norm": 0.002873434219509363,
"learning_rate": 1.264957264957265e-05,
"loss": 0.0,
"step": 1035
},
{
"epoch": 8.424489795918367,
"grad_norm": 0.012141639366745949,
"learning_rate": 1.2581196581196581e-05,
"loss": 0.0002,
"step": 1036
},
{
"epoch": 8.43265306122449,
"grad_norm": 0.016885140910744667,
"learning_rate": 1.2512820512820514e-05,
"loss": 0.0002,
"step": 1037
},
{
"epoch": 8.440816326530612,
"grad_norm": 0.0075583746656775475,
"learning_rate": 1.2444444444444446e-05,
"loss": 0.0001,
"step": 1038
},
{
"epoch": 8.448979591836734,
"grad_norm": 0.01263425312936306,
"learning_rate": 1.2376068376068377e-05,
"loss": 0.0003,
"step": 1039
},
{
"epoch": 8.457142857142857,
"grad_norm": 0.004327788483351469,
"learning_rate": 1.230769230769231e-05,
"loss": 0.0001,
"step": 1040
},
{
"epoch": 8.465306122448979,
"grad_norm": 0.005272835027426481,
"learning_rate": 1.223931623931624e-05,
"loss": 0.0001,
"step": 1041
},
{
"epoch": 8.473469387755102,
"grad_norm": 0.00520187197253108,
"learning_rate": 1.2170940170940171e-05,
"loss": 0.0001,
"step": 1042
},
{
"epoch": 8.481632653061224,
"grad_norm": 0.03641340509057045,
"learning_rate": 1.2102564102564102e-05,
"loss": 0.0003,
"step": 1043
},
{
"epoch": 8.489795918367347,
"grad_norm": 0.05241888388991356,
"learning_rate": 1.2034188034188035e-05,
"loss": 0.0017,
"step": 1044
},
{
"epoch": 8.49795918367347,
"grad_norm": 0.009457703679800034,
"learning_rate": 1.1965811965811966e-05,
"loss": 0.0002,
"step": 1045
},
{
"epoch": 8.506122448979593,
"grad_norm": 0.003092976287007332,
"learning_rate": 1.1897435897435898e-05,
"loss": 0.0,
"step": 1046
},
{
"epoch": 8.514285714285714,
"grad_norm": 0.001040496164932847,
"learning_rate": 1.182905982905983e-05,
"loss": 0.0,
"step": 1047
},
{
"epoch": 8.522448979591836,
"grad_norm": 0.01029434148222208,
"learning_rate": 1.1760683760683762e-05,
"loss": 0.0001,
"step": 1048
},
{
"epoch": 8.53061224489796,
"grad_norm": 0.07402694225311279,
"learning_rate": 1.1692307692307694e-05,
"loss": 0.003,
"step": 1049
},
{
"epoch": 8.538775510204081,
"grad_norm": 0.0047174179926514626,
"learning_rate": 1.1623931623931625e-05,
"loss": 0.0001,
"step": 1050
},
{
"epoch": 8.546938775510204,
"grad_norm": 0.029351942241191864,
"learning_rate": 1.1555555555555556e-05,
"loss": 0.0005,
"step": 1051
},
{
"epoch": 8.555102040816326,
"grad_norm": 0.002197829307988286,
"learning_rate": 1.1487179487179487e-05,
"loss": 0.0,
"step": 1052
},
{
"epoch": 8.56326530612245,
"grad_norm": 0.0036702328361570835,
"learning_rate": 1.141880341880342e-05,
"loss": 0.0001,
"step": 1053
},
{
"epoch": 8.571428571428571,
"grad_norm": 0.03191132843494415,
"learning_rate": 1.135042735042735e-05,
"loss": 0.0024,
"step": 1054
},
{
"epoch": 8.579591836734695,
"grad_norm": 0.03412720188498497,
"learning_rate": 1.1282051282051283e-05,
"loss": 0.0006,
"step": 1055
},
{
"epoch": 8.587755102040816,
"grad_norm": 0.005484213586896658,
"learning_rate": 1.1213675213675215e-05,
"loss": 0.0001,
"step": 1056
},
{
"epoch": 8.59591836734694,
"grad_norm": 0.0703863799571991,
"learning_rate": 1.1145299145299146e-05,
"loss": 0.0018,
"step": 1057
},
{
"epoch": 8.604081632653061,
"grad_norm": 0.025943145155906677,
"learning_rate": 1.1076923076923079e-05,
"loss": 0.0002,
"step": 1058
},
{
"epoch": 8.612244897959183,
"grad_norm": 0.1388336420059204,
"learning_rate": 1.100854700854701e-05,
"loss": 0.0015,
"step": 1059
},
{
"epoch": 8.620408163265306,
"grad_norm": 0.020861342549324036,
"learning_rate": 1.0940170940170942e-05,
"loss": 0.0004,
"step": 1060
},
{
"epoch": 8.628571428571428,
"grad_norm": 0.08928504586219788,
"learning_rate": 1.0871794871794871e-05,
"loss": 0.002,
"step": 1061
},
{
"epoch": 8.636734693877552,
"grad_norm": 0.012267638929188251,
"learning_rate": 1.0803418803418804e-05,
"loss": 0.0001,
"step": 1062
},
{
"epoch": 8.644897959183673,
"grad_norm": 0.0072326031513512135,
"learning_rate": 1.0735042735042735e-05,
"loss": 0.0001,
"step": 1063
},
{
"epoch": 8.653061224489797,
"grad_norm": 0.004826648626476526,
"learning_rate": 1.0666666666666667e-05,
"loss": 0.0001,
"step": 1064
},
{
"epoch": 8.661224489795918,
"grad_norm": 0.10817044228315353,
"learning_rate": 1.05982905982906e-05,
"loss": 0.0053,
"step": 1065
},
{
"epoch": 8.66938775510204,
"grad_norm": 0.0037550870329141617,
"learning_rate": 1.052991452991453e-05,
"loss": 0.0,
"step": 1066
},
{
"epoch": 8.677551020408163,
"grad_norm": 0.002859473694115877,
"learning_rate": 1.0461538461538463e-05,
"loss": 0.0001,
"step": 1067
},
{
"epoch": 8.685714285714285,
"grad_norm": 0.0039060532581061125,
"learning_rate": 1.0393162393162394e-05,
"loss": 0.0001,
"step": 1068
},
{
"epoch": 8.693877551020408,
"grad_norm": 0.0033676582388579845,
"learning_rate": 1.0324786324786327e-05,
"loss": 0.0,
"step": 1069
},
{
"epoch": 8.70204081632653,
"grad_norm": 0.0012714867480099201,
"learning_rate": 1.0256410256410256e-05,
"loss": 0.0,
"step": 1070
},
{
"epoch": 8.710204081632654,
"grad_norm": 0.02062312327325344,
"learning_rate": 1.0188034188034188e-05,
"loss": 0.0002,
"step": 1071
},
{
"epoch": 8.718367346938775,
"grad_norm": 0.05126015469431877,
"learning_rate": 1.011965811965812e-05,
"loss": 0.0017,
"step": 1072
},
{
"epoch": 8.726530612244899,
"grad_norm": 0.009600671008229256,
"learning_rate": 1.0051282051282052e-05,
"loss": 0.0001,
"step": 1073
},
{
"epoch": 8.73469387755102,
"grad_norm": 0.02762376330792904,
"learning_rate": 9.982905982905984e-06,
"loss": 0.0006,
"step": 1074
},
{
"epoch": 8.742857142857144,
"grad_norm": 0.06737780570983887,
"learning_rate": 9.914529914529915e-06,
"loss": 0.0005,
"step": 1075
},
{
"epoch": 8.751020408163265,
"grad_norm": 0.07408367842435837,
"learning_rate": 9.846153846153848e-06,
"loss": 0.0032,
"step": 1076
},
{
"epoch": 8.759183673469387,
"grad_norm": 0.0068168556317687035,
"learning_rate": 9.777777777777779e-06,
"loss": 0.0001,
"step": 1077
},
{
"epoch": 8.76734693877551,
"grad_norm": 0.02478768117725849,
"learning_rate": 9.70940170940171e-06,
"loss": 0.0004,
"step": 1078
},
{
"epoch": 8.775510204081632,
"grad_norm": 0.03732183575630188,
"learning_rate": 9.641025641025642e-06,
"loss": 0.0014,
"step": 1079
},
{
"epoch": 8.783673469387756,
"grad_norm": 0.7320724725723267,
"learning_rate": 9.572649572649575e-06,
"loss": 0.0094,
"step": 1080
},
{
"epoch": 8.791836734693877,
"grad_norm": 0.02578953094780445,
"learning_rate": 9.504273504273504e-06,
"loss": 0.0003,
"step": 1081
},
{
"epoch": 8.8,
"grad_norm": 0.026644522324204445,
"learning_rate": 9.435897435897436e-06,
"loss": 0.0005,
"step": 1082
},
{
"epoch": 8.808163265306122,
"grad_norm": 0.04598251357674599,
"learning_rate": 9.367521367521369e-06,
"loss": 0.0014,
"step": 1083
},
{
"epoch": 8.816326530612244,
"grad_norm": 0.2066701352596283,
"learning_rate": 9.2991452991453e-06,
"loss": 0.003,
"step": 1084
},
{
"epoch": 8.824489795918367,
"grad_norm": 0.20979352295398712,
"learning_rate": 9.230769230769232e-06,
"loss": 0.0021,
"step": 1085
},
{
"epoch": 8.83265306122449,
"grad_norm": 0.11119628697633743,
"learning_rate": 9.162393162393163e-06,
"loss": 0.0034,
"step": 1086
},
{
"epoch": 8.840816326530613,
"grad_norm": 0.04252481833100319,
"learning_rate": 9.094017094017094e-06,
"loss": 0.0013,
"step": 1087
},
{
"epoch": 8.848979591836734,
"grad_norm": 0.004191332496702671,
"learning_rate": 9.025641025641027e-06,
"loss": 0.0001,
"step": 1088
},
{
"epoch": 8.857142857142858,
"grad_norm": 0.19183596968650818,
"learning_rate": 8.957264957264959e-06,
"loss": 0.0055,
"step": 1089
},
{
"epoch": 8.86530612244898,
"grad_norm": 0.012185700237751007,
"learning_rate": 8.888888888888888e-06,
"loss": 0.0002,
"step": 1090
},
{
"epoch": 8.873469387755103,
"grad_norm": 0.05381055921316147,
"learning_rate": 8.820512820512821e-06,
"loss": 0.0028,
"step": 1091
},
{
"epoch": 8.881632653061224,
"grad_norm": 0.008519163355231285,
"learning_rate": 8.752136752136753e-06,
"loss": 0.0001,
"step": 1092
},
{
"epoch": 8.889795918367348,
"grad_norm": 0.03137361258268356,
"learning_rate": 8.683760683760684e-06,
"loss": 0.0005,
"step": 1093
},
{
"epoch": 8.89795918367347,
"grad_norm": 0.0023849045392125845,
"learning_rate": 8.615384615384617e-06,
"loss": 0.0,
"step": 1094
},
{
"epoch": 8.906122448979591,
"grad_norm": 0.0032758014276623726,
"learning_rate": 8.547008547008548e-06,
"loss": 0.0,
"step": 1095
},
{
"epoch": 8.914285714285715,
"grad_norm": 0.004983650054782629,
"learning_rate": 8.478632478632479e-06,
"loss": 0.0001,
"step": 1096
},
{
"epoch": 8.922448979591836,
"grad_norm": 0.06227012723684311,
"learning_rate": 8.410256410256411e-06,
"loss": 0.0029,
"step": 1097
},
{
"epoch": 8.93061224489796,
"grad_norm": 0.008136185817420483,
"learning_rate": 8.341880341880344e-06,
"loss": 0.0001,
"step": 1098
},
{
"epoch": 8.938775510204081,
"grad_norm": 0.01348600722849369,
"learning_rate": 8.273504273504273e-06,
"loss": 0.0001,
"step": 1099
},
{
"epoch": 8.946938775510205,
"grad_norm": 0.017930278554558754,
"learning_rate": 8.205128205128205e-06,
"loss": 0.0002,
"step": 1100
},
{
"epoch": 8.955102040816326,
"grad_norm": 0.18920879065990448,
"learning_rate": 8.136752136752138e-06,
"loss": 0.0022,
"step": 1101
},
{
"epoch": 8.963265306122448,
"grad_norm": 0.024649549275636673,
"learning_rate": 8.068376068376069e-06,
"loss": 0.001,
"step": 1102
},
{
"epoch": 8.971428571428572,
"grad_norm": 0.040687914937734604,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0007,
"step": 1103
},
{
"epoch": 8.979591836734693,
"grad_norm": 0.024966664612293243,
"learning_rate": 7.931623931623932e-06,
"loss": 0.0003,
"step": 1104
},
{
"epoch": 8.987755102040817,
"grad_norm": 0.016600683331489563,
"learning_rate": 7.863247863247863e-06,
"loss": 0.0002,
"step": 1105
},
{
"epoch": 8.995918367346938,
"grad_norm": 0.10007146000862122,
"learning_rate": 7.794871794871796e-06,
"loss": 0.0015,
"step": 1106
},
{
"epoch": 9.0,
"grad_norm": 0.007699214853346348,
"learning_rate": 7.726495726495728e-06,
"loss": 0.0001,
"step": 1107
},
{
"epoch": 9.0,
"eval_accuracy": 0.942788179268054,
"eval_f1": 0.9422835056672126,
"eval_loss": 0.22396019101142883,
"eval_precision": 0.9426181079752368,
"eval_recall": 0.942788179268054,
"eval_runtime": 25.2075,
"eval_samples_per_second": 17.296,
"eval_steps_per_second": 17.296,
"step": 1107
},
{
"epoch": 9.008163265306122,
"grad_norm": 0.004843783099204302,
"learning_rate": 7.658119658119659e-06,
"loss": 0.0001,
"step": 1108
},
{
"epoch": 9.016326530612245,
"grad_norm": 0.003891325555741787,
"learning_rate": 7.58974358974359e-06,
"loss": 0.0,
"step": 1109
},
{
"epoch": 9.024489795918367,
"grad_norm": 0.08905791491270065,
"learning_rate": 7.521367521367522e-06,
"loss": 0.01,
"step": 1110
},
{
"epoch": 9.03265306122449,
"grad_norm": 0.016216980293393135,
"learning_rate": 7.452991452991454e-06,
"loss": 0.0003,
"step": 1111
},
{
"epoch": 9.040816326530612,
"grad_norm": 0.020533408969640732,
"learning_rate": 7.384615384615386e-06,
"loss": 0.0004,
"step": 1112
},
{
"epoch": 9.048979591836735,
"grad_norm": 0.002070846501737833,
"learning_rate": 7.316239316239317e-06,
"loss": 0.0,
"step": 1113
},
{
"epoch": 9.057142857142857,
"grad_norm": 0.0092104347422719,
"learning_rate": 7.247863247863248e-06,
"loss": 0.0001,
"step": 1114
},
{
"epoch": 9.06530612244898,
"grad_norm": 0.007390407379716635,
"learning_rate": 7.17948717948718e-06,
"loss": 0.0002,
"step": 1115
},
{
"epoch": 9.073469387755102,
"grad_norm": 0.17624011635780334,
"learning_rate": 7.111111111111112e-06,
"loss": 0.0009,
"step": 1116
},
{
"epoch": 9.081632653061224,
"grad_norm": 0.01938670314848423,
"learning_rate": 7.0427350427350435e-06,
"loss": 0.0004,
"step": 1117
},
{
"epoch": 9.089795918367347,
"grad_norm": 0.00450798450037837,
"learning_rate": 6.974358974358974e-06,
"loss": 0.0001,
"step": 1118
},
{
"epoch": 9.097959183673469,
"grad_norm": 0.0029924893751740456,
"learning_rate": 6.905982905982906e-06,
"loss": 0.0,
"step": 1119
},
{
"epoch": 9.106122448979592,
"grad_norm": 0.0016975200269371271,
"learning_rate": 6.837606837606839e-06,
"loss": 0.0,
"step": 1120
},
{
"epoch": 9.114285714285714,
"grad_norm": 0.02085467241704464,
"learning_rate": 6.76923076923077e-06,
"loss": 0.0004,
"step": 1121
},
{
"epoch": 9.122448979591837,
"grad_norm": 0.008936136029660702,
"learning_rate": 6.700854700854701e-06,
"loss": 0.0001,
"step": 1122
},
{
"epoch": 9.130612244897959,
"grad_norm": 0.12007738649845123,
"learning_rate": 6.632478632478633e-06,
"loss": 0.0013,
"step": 1123
},
{
"epoch": 9.138775510204082,
"grad_norm": 0.017182469367980957,
"learning_rate": 6.564102564102565e-06,
"loss": 0.0003,
"step": 1124
},
{
"epoch": 9.146938775510204,
"grad_norm": 0.042313314974308014,
"learning_rate": 6.495726495726496e-06,
"loss": 0.0009,
"step": 1125
},
{
"epoch": 9.155102040816326,
"grad_norm": 0.0015840729465708137,
"learning_rate": 6.427350427350428e-06,
"loss": 0.0,
"step": 1126
},
{
"epoch": 9.16326530612245,
"grad_norm": 0.11032771319150925,
"learning_rate": 6.358974358974359e-06,
"loss": 0.0052,
"step": 1127
},
{
"epoch": 9.17142857142857,
"grad_norm": 0.030041849240660667,
"learning_rate": 6.290598290598291e-06,
"loss": 0.0013,
"step": 1128
},
{
"epoch": 9.179591836734694,
"grad_norm": 0.005967669188976288,
"learning_rate": 6.222222222222223e-06,
"loss": 0.0001,
"step": 1129
},
{
"epoch": 9.187755102040816,
"grad_norm": 0.00804552435874939,
"learning_rate": 6.153846153846155e-06,
"loss": 0.0001,
"step": 1130
},
{
"epoch": 9.19591836734694,
"grad_norm": 0.004231815226376057,
"learning_rate": 6.085470085470086e-06,
"loss": 0.0001,
"step": 1131
},
{
"epoch": 9.204081632653061,
"grad_norm": 0.0016901058843359351,
"learning_rate": 6.0170940170940174e-06,
"loss": 0.0,
"step": 1132
},
{
"epoch": 9.212244897959184,
"grad_norm": 0.004359536338597536,
"learning_rate": 5.948717948717949e-06,
"loss": 0.0,
"step": 1133
},
{
"epoch": 9.220408163265306,
"grad_norm": 0.006538494490087032,
"learning_rate": 5.880341880341881e-06,
"loss": 0.0001,
"step": 1134
},
{
"epoch": 9.228571428571428,
"grad_norm": 0.009976035915315151,
"learning_rate": 5.8119658119658126e-06,
"loss": 0.0002,
"step": 1135
},
{
"epoch": 9.236734693877551,
"grad_norm": 0.005809263791888952,
"learning_rate": 5.743589743589743e-06,
"loss": 0.0001,
"step": 1136
},
{
"epoch": 9.244897959183673,
"grad_norm": 0.01854880154132843,
"learning_rate": 5.675213675213675e-06,
"loss": 0.0003,
"step": 1137
},
{
"epoch": 9.253061224489796,
"grad_norm": 0.00991115067154169,
"learning_rate": 5.606837606837608e-06,
"loss": 0.0002,
"step": 1138
},
{
"epoch": 9.261224489795918,
"grad_norm": 0.02814176119863987,
"learning_rate": 5.538461538461539e-06,
"loss": 0.0006,
"step": 1139
},
{
"epoch": 9.269387755102041,
"grad_norm": 0.003233405062928796,
"learning_rate": 5.470085470085471e-06,
"loss": 0.0,
"step": 1140
},
{
"epoch": 9.277551020408163,
"grad_norm": 0.005762243643403053,
"learning_rate": 5.401709401709402e-06,
"loss": 0.0001,
"step": 1141
},
{
"epoch": 9.285714285714286,
"grad_norm": 0.03154560178518295,
"learning_rate": 5.333333333333334e-06,
"loss": 0.0008,
"step": 1142
},
{
"epoch": 9.293877551020408,
"grad_norm": 0.025290396064519882,
"learning_rate": 5.264957264957265e-06,
"loss": 0.0004,
"step": 1143
},
{
"epoch": 9.30204081632653,
"grad_norm": 0.004073767457157373,
"learning_rate": 5.196581196581197e-06,
"loss": 0.0,
"step": 1144
},
{
"epoch": 9.310204081632653,
"grad_norm": 0.006276815664023161,
"learning_rate": 5.128205128205128e-06,
"loss": 0.0001,
"step": 1145
},
{
"epoch": 9.318367346938775,
"grad_norm": 0.013420458883047104,
"learning_rate": 5.05982905982906e-06,
"loss": 0.0001,
"step": 1146
},
{
"epoch": 9.326530612244898,
"grad_norm": 0.003337176749482751,
"learning_rate": 4.991452991452992e-06,
"loss": 0.0,
"step": 1147
},
{
"epoch": 9.33469387755102,
"grad_norm": 0.0010663908906280994,
"learning_rate": 4.923076923076924e-06,
"loss": 0.0,
"step": 1148
},
{
"epoch": 9.342857142857143,
"grad_norm": 0.00427238317206502,
"learning_rate": 4.854700854700855e-06,
"loss": 0.0001,
"step": 1149
},
{
"epoch": 9.351020408163265,
"grad_norm": 0.0492619089782238,
"learning_rate": 4.786324786324787e-06,
"loss": 0.0023,
"step": 1150
},
{
"epoch": 9.359183673469389,
"grad_norm": 0.014486223459243774,
"learning_rate": 4.717948717948718e-06,
"loss": 0.0002,
"step": 1151
},
{
"epoch": 9.36734693877551,
"grad_norm": 0.006705184932798147,
"learning_rate": 4.64957264957265e-06,
"loss": 0.0001,
"step": 1152
},
{
"epoch": 9.375510204081632,
"grad_norm": 0.12139922380447388,
"learning_rate": 4.581196581196582e-06,
"loss": 0.0029,
"step": 1153
},
{
"epoch": 9.383673469387755,
"grad_norm": 0.01572628878057003,
"learning_rate": 4.512820512820513e-06,
"loss": 0.0003,
"step": 1154
},
{
"epoch": 9.391836734693877,
"grad_norm": 0.015249603427946568,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0003,
"step": 1155
},
{
"epoch": 9.4,
"grad_norm": 0.013273622840642929,
"learning_rate": 4.376068376068377e-06,
"loss": 0.0002,
"step": 1156
},
{
"epoch": 9.408163265306122,
"grad_norm": 0.002208263147622347,
"learning_rate": 4.307692307692308e-06,
"loss": 0.0,
"step": 1157
},
{
"epoch": 9.416326530612245,
"grad_norm": 0.0044462066143751144,
"learning_rate": 4.239316239316239e-06,
"loss": 0.0,
"step": 1158
},
{
"epoch": 9.424489795918367,
"grad_norm": 0.04531145468354225,
"learning_rate": 4.170940170940172e-06,
"loss": 0.0003,
"step": 1159
},
{
"epoch": 9.43265306122449,
"grad_norm": 0.005794985685497522,
"learning_rate": 4.102564102564103e-06,
"loss": 0.0001,
"step": 1160
},
{
"epoch": 9.440816326530612,
"grad_norm": 0.007728431839495897,
"learning_rate": 4.034188034188034e-06,
"loss": 0.0001,
"step": 1161
},
{
"epoch": 9.448979591836734,
"grad_norm": 0.00340407807379961,
"learning_rate": 3.965811965811966e-06,
"loss": 0.0001,
"step": 1162
},
{
"epoch": 9.457142857142857,
"grad_norm": 0.01496533490717411,
"learning_rate": 3.897435897435898e-06,
"loss": 0.0003,
"step": 1163
},
{
"epoch": 9.465306122448979,
"grad_norm": 0.002183685777708888,
"learning_rate": 3.8290598290598295e-06,
"loss": 0.0,
"step": 1164
},
{
"epoch": 9.473469387755102,
"grad_norm": 0.07050516456365585,
"learning_rate": 3.760683760683761e-06,
"loss": 0.0001,
"step": 1165
},
{
"epoch": 9.481632653061224,
"grad_norm": 0.002324522938579321,
"learning_rate": 3.692307692307693e-06,
"loss": 0.0,
"step": 1166
},
{
"epoch": 9.489795918367347,
"grad_norm": 0.006657972000539303,
"learning_rate": 3.623931623931624e-06,
"loss": 0.0001,
"step": 1167
},
{
"epoch": 9.49795918367347,
"grad_norm": 0.04592962563037872,
"learning_rate": 3.555555555555556e-06,
"loss": 0.004,
"step": 1168
},
{
"epoch": 9.506122448979593,
"grad_norm": 0.0031562778167426586,
"learning_rate": 3.487179487179487e-06,
"loss": 0.0,
"step": 1169
},
{
"epoch": 9.514285714285714,
"grad_norm": 0.006828220561146736,
"learning_rate": 3.4188034188034193e-06,
"loss": 0.0001,
"step": 1170
},
{
"epoch": 9.522448979591836,
"grad_norm": 0.003534778719767928,
"learning_rate": 3.3504273504273506e-06,
"loss": 0.0001,
"step": 1171
},
{
"epoch": 9.53061224489796,
"grad_norm": 0.00211413879878819,
"learning_rate": 3.2820512820512823e-06,
"loss": 0.0,
"step": 1172
},
{
"epoch": 9.538775510204081,
"grad_norm": 0.006114844232797623,
"learning_rate": 3.213675213675214e-06,
"loss": 0.0001,
"step": 1173
},
{
"epoch": 9.546938775510204,
"grad_norm": 0.0042116702534258366,
"learning_rate": 3.1452991452991453e-06,
"loss": 0.0,
"step": 1174
},
{
"epoch": 9.555102040816326,
"grad_norm": 0.005698191002011299,
"learning_rate": 3.0769230769230774e-06,
"loss": 0.0001,
"step": 1175
},
{
"epoch": 9.56326530612245,
"grad_norm": 0.02607082575559616,
"learning_rate": 3.0085470085470087e-06,
"loss": 0.0003,
"step": 1176
},
{
"epoch": 9.571428571428571,
"grad_norm": 0.024089762941002846,
"learning_rate": 2.9401709401709404e-06,
"loss": 0.0005,
"step": 1177
},
{
"epoch": 9.579591836734695,
"grad_norm": 0.00590532599017024,
"learning_rate": 2.8717948717948717e-06,
"loss": 0.0001,
"step": 1178
},
{
"epoch": 9.587755102040816,
"grad_norm": 0.0025553421583026648,
"learning_rate": 2.803418803418804e-06,
"loss": 0.0,
"step": 1179
},
{
"epoch": 9.59591836734694,
"grad_norm": 0.005867726169526577,
"learning_rate": 2.7350427350427355e-06,
"loss": 0.0001,
"step": 1180
},
{
"epoch": 9.604081632653061,
"grad_norm": 0.0035446197725832462,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0001,
"step": 1181
},
{
"epoch": 9.612244897959183,
"grad_norm": 0.0048804013058543205,
"learning_rate": 2.5982905982905985e-06,
"loss": 0.0,
"step": 1182
},
{
"epoch": 9.620408163265306,
"grad_norm": 0.0234014093875885,
"learning_rate": 2.52991452991453e-06,
"loss": 0.0,
"step": 1183
},
{
"epoch": 9.628571428571428,
"grad_norm": 0.014328445307910442,
"learning_rate": 2.461538461538462e-06,
"loss": 0.0002,
"step": 1184
},
{
"epoch": 9.636734693877552,
"grad_norm": 0.03956277295947075,
"learning_rate": 2.3931623931623937e-06,
"loss": 0.0018,
"step": 1185
},
{
"epoch": 9.644897959183673,
"grad_norm": 0.1734299510717392,
"learning_rate": 2.324786324786325e-06,
"loss": 0.0005,
"step": 1186
},
{
"epoch": 9.653061224489797,
"grad_norm": 0.0017454794142395258,
"learning_rate": 2.2564102564102566e-06,
"loss": 0.0,
"step": 1187
},
{
"epoch": 9.661224489795918,
"grad_norm": 0.009274955838918686,
"learning_rate": 2.1880341880341884e-06,
"loss": 0.0001,
"step": 1188
},
{
"epoch": 9.66938775510204,
"grad_norm": 0.045709025114774704,
"learning_rate": 2.1196581196581196e-06,
"loss": 0.0006,
"step": 1189
},
{
"epoch": 9.677551020408163,
"grad_norm": 0.044529132544994354,
"learning_rate": 2.0512820512820513e-06,
"loss": 0.0019,
"step": 1190
},
{
"epoch": 9.685714285714285,
"grad_norm": 0.0007452021236531436,
"learning_rate": 1.982905982905983e-06,
"loss": 0.0,
"step": 1191
},
{
"epoch": 9.693877551020408,
"grad_norm": 0.004460840951651335,
"learning_rate": 1.9145299145299148e-06,
"loss": 0.0001,
"step": 1192
},
{
"epoch": 9.70204081632653,
"grad_norm": 0.004089562688022852,
"learning_rate": 1.8461538461538465e-06,
"loss": 0.0001,
"step": 1193
},
{
"epoch": 9.710204081632654,
"grad_norm": 0.01671615056693554,
"learning_rate": 1.777777777777778e-06,
"loss": 0.0001,
"step": 1194
},
{
"epoch": 9.718367346938775,
"grad_norm": 0.0166789498180151,
"learning_rate": 1.7094017094017097e-06,
"loss": 0.0003,
"step": 1195
},
{
"epoch": 9.726530612244899,
"grad_norm": 0.0030747223645448685,
"learning_rate": 1.6410256410256412e-06,
"loss": 0.0,
"step": 1196
},
{
"epoch": 9.73469387755102,
"grad_norm": 0.027723681181669235,
"learning_rate": 1.5726495726495727e-06,
"loss": 0.0005,
"step": 1197
},
{
"epoch": 9.742857142857144,
"grad_norm": 0.006767381448298693,
"learning_rate": 1.5042735042735044e-06,
"loss": 0.0001,
"step": 1198
},
{
"epoch": 9.751020408163265,
"grad_norm": 0.014646001160144806,
"learning_rate": 1.4358974358974359e-06,
"loss": 0.0003,
"step": 1199
},
{
"epoch": 9.759183673469387,
"grad_norm": 0.005182509310543537,
"learning_rate": 1.3675213675213678e-06,
"loss": 0.0001,
"step": 1200
},
{
"epoch": 9.76734693877551,
"grad_norm": 0.002610682277008891,
"learning_rate": 1.2991452991452993e-06,
"loss": 0.0,
"step": 1201
},
{
"epoch": 9.775510204081632,
"grad_norm": 0.026194339618086815,
"learning_rate": 1.230769230769231e-06,
"loss": 0.002,
"step": 1202
},
{
"epoch": 9.783673469387756,
"grad_norm": 0.005605866201221943,
"learning_rate": 1.1623931623931625e-06,
"loss": 0.0,
"step": 1203
},
{
"epoch": 9.791836734693877,
"grad_norm": 0.06640844792127609,
"learning_rate": 1.0940170940170942e-06,
"loss": 0.0033,
"step": 1204
},
{
"epoch": 9.8,
"grad_norm": 0.006540970876812935,
"learning_rate": 1.0256410256410257e-06,
"loss": 0.0001,
"step": 1205
},
{
"epoch": 9.808163265306122,
"grad_norm": 0.005154821090400219,
"learning_rate": 9.572649572649574e-07,
"loss": 0.0001,
"step": 1206
},
{
"epoch": 9.816326530612244,
"grad_norm": 0.0638870820403099,
"learning_rate": 8.88888888888889e-07,
"loss": 0.0008,
"step": 1207
},
{
"epoch": 9.824489795918367,
"grad_norm": 0.022943247109651566,
"learning_rate": 8.205128205128206e-07,
"loss": 0.001,
"step": 1208
},
{
"epoch": 9.83265306122449,
"grad_norm": 0.020996147766709328,
"learning_rate": 7.521367521367522e-07,
"loss": 0.0001,
"step": 1209
},
{
"epoch": 9.840816326530613,
"grad_norm": 0.05984543636441231,
"learning_rate": 6.837606837606839e-07,
"loss": 0.0045,
"step": 1210
},
{
"epoch": 9.848979591836734,
"grad_norm": 0.008096975274384022,
"learning_rate": 6.153846153846155e-07,
"loss": 0.0,
"step": 1211
},
{
"epoch": 9.857142857142858,
"grad_norm": 0.0041329097002744675,
"learning_rate": 5.470085470085471e-07,
"loss": 0.0,
"step": 1212
},
{
"epoch": 9.86530612244898,
"grad_norm": 0.032030075788497925,
"learning_rate": 4.786324786324787e-07,
"loss": 0.0004,
"step": 1213
},
{
"epoch": 9.873469387755103,
"grad_norm": 0.029202815145254135,
"learning_rate": 4.102564102564103e-07,
"loss": 0.0012,
"step": 1214
},
{
"epoch": 9.881632653061224,
"grad_norm": 0.2898118495941162,
"learning_rate": 3.4188034188034194e-07,
"loss": 0.0014,
"step": 1215
},
{
"epoch": 9.889795918367348,
"grad_norm": 0.016399463638663292,
"learning_rate": 2.7350427350427354e-07,
"loss": 0.0002,
"step": 1216
},
{
"epoch": 9.89795918367347,
"grad_norm": 0.06763066351413727,
"learning_rate": 2.0512820512820514e-07,
"loss": 0.0015,
"step": 1217
},
{
"epoch": 9.906122448979591,
"grad_norm": 0.008314032107591629,
"learning_rate": 1.3675213675213677e-07,
"loss": 0.0001,
"step": 1218
},
{
"epoch": 9.914285714285715,
"grad_norm": 0.0004530400619842112,
"learning_rate": 6.837606837606839e-08,
"loss": 0.0,
"step": 1219
},
{
"epoch": 9.922448979591836,
"grad_norm": 0.017806239426136017,
"learning_rate": 0.0,
"loss": 0.0002,
"step": 1220
},
{
"epoch": 9.922448979591836,
"eval_accuracy": 0.9429838867506034,
"eval_f1": 0.9425042335887034,
"eval_loss": 0.2353067696094513,
"eval_precision": 0.9427915319091013,
"eval_recall": 0.9429838867506034,
"eval_runtime": 24.893,
"eval_samples_per_second": 17.515,
"eval_steps_per_second": 17.515,
"step": 1220
}
],
"logging_steps": 1,
"max_steps": 1220,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.581551776666747e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}